bpf: Move getsockopt retval to struct bpf_cg_run_ctx
The retval value is moved to struct bpf_cg_run_ctx for ease of access in different prog types with different context structs layouts. The helper implementation (to be added in a later patch in the series) can simply perform a container_of from current->bpf_ctx to retrieve bpf_cg_run_ctx. Unfortunately, there is no easy way to access the current task_struct via the verifier BPF bytecode rewrite, aside from possibly calling a helper, so a pointer to current task is added to struct bpf_sockopt_kern so that the rewritten BPF bytecode can access struct bpf_cg_run_ctx with an indirection. For backward compatibility, if a getsockopt program rejects a syscall by returning 0, an -EPERM will be generated, by having the BPF_PROG_RUN_ARRAY_CG family macros automatically set the retval to -EPERM. Unlike prior to this patch, this -EPERM will be visible to ctx->retval for any other hooks down the line in the prog array. Additionally, the restriction that getsockopt filters can only set the retval to 0 is removed, considering that certain getsockopt implementations may return optlen. Filters are now able to set the value arbitrarily. Signed-off-by: YiFei Zhu <zhuyifei@google.com> Reviewed-by: Stanislav Fomichev <sdf@google.com> Link: https://lore.kernel.org/r/73b0325f5c29912ccea7ea57ec1ed4d388fc1d37.1639619851.git.zhuyifei@google.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
committed by
Alexei Starovoitov
parent
f10d059661
commit
c4dcfdd406
@ -1245,6 +1245,7 @@ struct bpf_run_ctx {};
|
||||
struct bpf_cg_run_ctx {
|
||||
struct bpf_run_ctx run_ctx;
|
||||
const struct bpf_prog_array_item *prog_item;
|
||||
int retval;
|
||||
};
|
||||
|
||||
struct bpf_trace_run_ctx {
|
||||
@ -1280,16 +1281,16 @@ typedef u32 (*bpf_prog_run_fn)(const struct bpf_prog *prog, const void *ctx);
|
||||
static __always_inline int
|
||||
BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
|
||||
const void *ctx, bpf_prog_run_fn run_prog,
|
||||
u32 *ret_flags)
|
||||
int retval, u32 *ret_flags)
|
||||
{
|
||||
const struct bpf_prog_array_item *item;
|
||||
const struct bpf_prog *prog;
|
||||
const struct bpf_prog_array *array;
|
||||
struct bpf_run_ctx *old_run_ctx;
|
||||
struct bpf_cg_run_ctx run_ctx;
|
||||
int ret = 0;
|
||||
u32 func_ret;
|
||||
|
||||
run_ctx.retval = retval;
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
array = rcu_dereference(array_rcu);
|
||||
@ -1299,27 +1300,28 @@ BPF_PROG_RUN_ARRAY_CG_FLAGS(const struct bpf_prog_array __rcu *array_rcu,
|
||||
run_ctx.prog_item = item;
|
||||
func_ret = run_prog(prog, ctx);
|
||||
if (!(func_ret & 1))
|
||||
ret = -EPERM;
|
||||
run_ctx.retval = -EPERM;
|
||||
*(ret_flags) |= (func_ret >> 1);
|
||||
item++;
|
||||
}
|
||||
bpf_reset_run_ctx(old_run_ctx);
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
return ret;
|
||||
return run_ctx.retval;
|
||||
}
|
||||
|
||||
static __always_inline int
|
||||
BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
|
||||
const void *ctx, bpf_prog_run_fn run_prog)
|
||||
const void *ctx, bpf_prog_run_fn run_prog,
|
||||
int retval)
|
||||
{
|
||||
const struct bpf_prog_array_item *item;
|
||||
const struct bpf_prog *prog;
|
||||
const struct bpf_prog_array *array;
|
||||
struct bpf_run_ctx *old_run_ctx;
|
||||
struct bpf_cg_run_ctx run_ctx;
|
||||
int ret = 0;
|
||||
|
||||
run_ctx.retval = retval;
|
||||
migrate_disable();
|
||||
rcu_read_lock();
|
||||
array = rcu_dereference(array_rcu);
|
||||
@ -1328,13 +1330,13 @@ BPF_PROG_RUN_ARRAY_CG(const struct bpf_prog_array __rcu *array_rcu,
|
||||
while ((prog = READ_ONCE(item->prog))) {
|
||||
run_ctx.prog_item = item;
|
||||
if (!run_prog(prog, ctx))
|
||||
ret = -EPERM;
|
||||
run_ctx.retval = -EPERM;
|
||||
item++;
|
||||
}
|
||||
bpf_reset_run_ctx(old_run_ctx);
|
||||
rcu_read_unlock();
|
||||
migrate_enable();
|
||||
return ret;
|
||||
return run_ctx.retval;
|
||||
}
|
||||
|
||||
static __always_inline u32
|
||||
@ -1394,7 +1396,7 @@ out:
|
||||
u32 _flags = 0; \
|
||||
bool _cn; \
|
||||
u32 _ret; \
|
||||
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, &_flags); \
|
||||
_ret = BPF_PROG_RUN_ARRAY_CG_FLAGS(array, ctx, func, 0, &_flags); \
|
||||
_cn = _flags & BPF_RET_SET_CN; \
|
||||
if (!_ret) \
|
||||
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
|
||||
|
@ -1356,7 +1356,10 @@ struct bpf_sockopt_kern {
|
||||
s32 level;
|
||||
s32 optname;
|
||||
s32 optlen;
|
||||
s32 retval;
|
||||
/* for retval in struct bpf_cg_run_ctx */
|
||||
struct task_struct *current_task;
|
||||
/* Temporary "register" for indirect stores to ppos. */
|
||||
u64 tmp_reg;
|
||||
};
|
||||
|
||||
int copy_bpf_fprog_from_user(struct sock_fprog *dst, sockptr_t src, int len);
|
||||
|
@ -1079,7 +1079,7 @@ int __cgroup_bpf_run_filter_skb(struct sock *sk,
|
||||
cgrp->bpf.effective[atype], skb, __bpf_prog_run_save_cb);
|
||||
} else {
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], skb,
|
||||
__bpf_prog_run_save_cb);
|
||||
__bpf_prog_run_save_cb, 0);
|
||||
}
|
||||
bpf_restore_data_end(skb, saved_data_end);
|
||||
__skb_pull(skb, offset);
|
||||
@ -1108,7 +1108,7 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
|
||||
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
|
||||
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sk,
|
||||
bpf_prog_run);
|
||||
bpf_prog_run, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk);
|
||||
|
||||
@ -1154,7 +1154,7 @@ int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
|
||||
|
||||
cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
return BPF_PROG_RUN_ARRAY_CG_FLAGS(cgrp->bpf.effective[atype], &ctx,
|
||||
bpf_prog_run, flags);
|
||||
bpf_prog_run, 0, flags);
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr);
|
||||
|
||||
@ -1181,7 +1181,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
|
||||
struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
|
||||
|
||||
return BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], sock_ops,
|
||||
bpf_prog_run);
|
||||
bpf_prog_run, 0);
|
||||
}
|
||||
EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops);
|
||||
|
||||
@ -1199,7 +1199,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
|
||||
rcu_read_lock();
|
||||
cgrp = task_dfl_cgroup(current);
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
|
||||
bpf_prog_run);
|
||||
bpf_prog_run, 0);
|
||||
rcu_read_unlock();
|
||||
|
||||
return ret;
|
||||
@ -1330,7 +1330,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
|
||||
|
||||
rcu_read_lock();
|
||||
cgrp = task_dfl_cgroup(current);
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx, bpf_prog_run);
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[atype], &ctx,
|
||||
bpf_prog_run, 0);
|
||||
rcu_read_unlock();
|
||||
|
||||
kfree(ctx.cur_val);
|
||||
@ -1445,7 +1446,7 @@ int __cgroup_bpf_run_filter_setsockopt(struct sock *sk, int *level,
|
||||
|
||||
lock_sock(sk);
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_SETSOCKOPT],
|
||||
&ctx, bpf_prog_run);
|
||||
&ctx, bpf_prog_run, 0);
|
||||
release_sock(sk);
|
||||
|
||||
if (ret)
|
||||
@ -1509,7 +1510,7 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
.sk = sk,
|
||||
.level = level,
|
||||
.optname = optname,
|
||||
.retval = retval,
|
||||
.current_task = current,
|
||||
};
|
||||
int ret;
|
||||
|
||||
@ -1553,10 +1554,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
|
||||
lock_sock(sk);
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
|
||||
&ctx, bpf_prog_run);
|
||||
&ctx, bpf_prog_run, retval);
|
||||
release_sock(sk);
|
||||
|
||||
if (ret)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (ctx.optlen > max_optlen || ctx.optlen < 0) {
|
||||
@ -1564,14 +1565,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* BPF programs only allowed to set retval to 0, not some
|
||||
* arbitrary value.
|
||||
*/
|
||||
if (ctx.retval != 0 && ctx.retval != retval) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ctx.optlen != 0) {
|
||||
if (copy_to_user(optval, ctx.optval, ctx.optlen) ||
|
||||
put_user(ctx.optlen, optlen)) {
|
||||
@ -1580,8 +1573,6 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
|
||||
}
|
||||
}
|
||||
|
||||
ret = ctx.retval;
|
||||
|
||||
out:
|
||||
sockopt_free_buf(&ctx, &buf);
|
||||
return ret;
|
||||
@ -1596,10 +1587,10 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
|
||||
.sk = sk,
|
||||
.level = level,
|
||||
.optname = optname,
|
||||
.retval = retval,
|
||||
.optlen = *optlen,
|
||||
.optval = optval,
|
||||
.optval_end = optval + *optlen,
|
||||
.current_task = current,
|
||||
};
|
||||
int ret;
|
||||
|
||||
@ -1612,25 +1603,19 @@ int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
|
||||
*/
|
||||
|
||||
ret = BPF_PROG_RUN_ARRAY_CG(cgrp->bpf.effective[CGROUP_GETSOCKOPT],
|
||||
&ctx, bpf_prog_run);
|
||||
if (ret)
|
||||
&ctx, bpf_prog_run, retval);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ctx.optlen > *optlen)
|
||||
return -EFAULT;
|
||||
|
||||
/* BPF programs only allowed to set retval to 0, not some
|
||||
* arbitrary value.
|
||||
*/
|
||||
if (ctx.retval != 0 && ctx.retval != retval)
|
||||
return -EFAULT;
|
||||
|
||||
/* BPF programs can shrink the buffer, export the modifications.
|
||||
*/
|
||||
if (ctx.optlen != 0)
|
||||
*optlen = ctx.optlen;
|
||||
|
||||
return ctx.retval;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -2046,10 +2031,39 @@ static u32 cg_sockopt_convert_ctx_access(enum bpf_access_type type,
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optlen);
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, retval):
|
||||
if (type == BPF_WRITE)
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_STX_MEM, retval);
|
||||
else
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, retval);
|
||||
BUILD_BUG_ON(offsetof(struct bpf_cg_run_ctx, run_ctx) != 0);
|
||||
|
||||
if (type == BPF_WRITE) {
|
||||
int treg = BPF_REG_9;
|
||||
|
||||
if (si->src_reg == treg || si->dst_reg == treg)
|
||||
--treg;
|
||||
if (si->src_reg == treg || si->dst_reg == treg)
|
||||
--treg;
|
||||
*insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, treg,
|
||||
offsetof(struct bpf_sockopt_kern, tmp_reg));
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
|
||||
treg, si->dst_reg,
|
||||
offsetof(struct bpf_sockopt_kern, current_task));
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
|
||||
treg, treg,
|
||||
offsetof(struct task_struct, bpf_ctx));
|
||||
*insn++ = BPF_STX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
|
||||
treg, si->src_reg,
|
||||
offsetof(struct bpf_cg_run_ctx, retval));
|
||||
*insn++ = BPF_LDX_MEM(BPF_DW, treg, si->dst_reg,
|
||||
offsetof(struct bpf_sockopt_kern, tmp_reg));
|
||||
} else {
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sockopt_kern, current_task),
|
||||
si->dst_reg, si->src_reg,
|
||||
offsetof(struct bpf_sockopt_kern, current_task));
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct task_struct, bpf_ctx),
|
||||
si->dst_reg, si->dst_reg,
|
||||
offsetof(struct task_struct, bpf_ctx));
|
||||
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_cg_run_ctx, retval),
|
||||
si->dst_reg, si->dst_reg,
|
||||
offsetof(struct bpf_cg_run_ctx, retval));
|
||||
}
|
||||
break;
|
||||
case offsetof(struct bpf_sockopt, optval):
|
||||
*insn++ = CG_SOCKOPT_ACCESS_FIELD(BPF_LDX_MEM, optval);
|
||||
|
Reference in New Issue
Block a user