UPSTREAM: io_uring: import 5.15-stable io_uring
No upstream commit exists.
This imports the io_uring codebase from 5.15.85, wholesale. Changes
from that code base:
- Drop IOCB_ALLOC_CACHE, we don't have that in 5.10.
- Drop MKDIRAT/SYMLINKAT/LINKAT. Would require further VFS backports,
and we don't support these in 5.10 to begin with.
- sock_from_file() old style calling convention.
- Use compat_get_bitmap() only for CONFIG_COMPAT=y
Change-Id: I7ce5226d6b39763ffc246fd6357cece9aafd4b59
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 788d082426
)
Bug: 268174392
Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
This commit is contained in:
parent
b5959be936
commit
ec3f24f4eb
2
Makefile
2
Makefile
@ -1224,7 +1224,7 @@ endif
|
|||||||
$(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)arch/$(SRCARCH)/include/uapi
|
$(Q)$(MAKE) $(hdr-inst)=$(hdr-prefix)arch/$(SRCARCH)/include/uapi
|
||||||
|
|
||||||
ifeq ($(KBUILD_EXTMOD),)
|
ifeq ($(KBUILD_EXTMOD),)
|
||||||
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/
|
core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ io_uring/
|
||||||
|
|
||||||
vmlinux-dirs := $(patsubst %/,%,$(filter %/, \
|
vmlinux-dirs := $(patsubst %/,%,$(filter %/, \
|
||||||
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
|
$(core-y) $(core-m) $(drivers-y) $(drivers-m) \
|
||||||
|
@ -34,8 +34,6 @@ obj-$(CONFIG_TIMERFD) += timerfd.o
|
|||||||
obj-$(CONFIG_EVENTFD) += eventfd.o
|
obj-$(CONFIG_EVENTFD) += eventfd.o
|
||||||
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
|
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
|
||||||
obj-$(CONFIG_AIO) += aio.o
|
obj-$(CONFIG_AIO) += aio.o
|
||||||
obj-$(CONFIG_IO_URING) += io_uring.o
|
|
||||||
obj-$(CONFIG_IO_WQ) += io-wq.o
|
|
||||||
obj-$(CONFIG_FS_DAX) += dax.o
|
obj-$(CONFIG_FS_DAX) += dax.o
|
||||||
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
|
obj-$(CONFIG_FS_ENCRYPTION) += crypto/
|
||||||
obj-$(CONFIG_FS_VERITY) += verity/
|
obj-$(CONFIG_FS_VERITY) += verity/
|
||||||
|
1242
fs/io-wq.c
1242
fs/io-wq.c
File diff suppressed because it is too large
Load Diff
@ -5,50 +5,20 @@
|
|||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
#include <linux/xarray.h>
|
#include <linux/xarray.h>
|
||||||
|
|
||||||
struct io_identity {
|
|
||||||
struct files_struct *files;
|
|
||||||
struct mm_struct *mm;
|
|
||||||
#ifdef CONFIG_BLK_CGROUP
|
|
||||||
struct cgroup_subsys_state *blkcg_css;
|
|
||||||
#endif
|
|
||||||
const struct cred *creds;
|
|
||||||
struct nsproxy *nsproxy;
|
|
||||||
struct fs_struct *fs;
|
|
||||||
unsigned long fsize;
|
|
||||||
#ifdef CONFIG_AUDIT
|
|
||||||
kuid_t loginuid;
|
|
||||||
unsigned int sessionid;
|
|
||||||
#endif
|
|
||||||
refcount_t count;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct io_uring_task {
|
|
||||||
/* submission side */
|
|
||||||
struct xarray xa;
|
|
||||||
struct wait_queue_head wait;
|
|
||||||
struct file *last;
|
|
||||||
struct percpu_counter inflight;
|
|
||||||
struct io_identity __identity;
|
|
||||||
struct io_identity *identity;
|
|
||||||
atomic_t in_idle;
|
|
||||||
bool sqpoll;
|
|
||||||
};
|
|
||||||
|
|
||||||
#if defined(CONFIG_IO_URING)
|
#if defined(CONFIG_IO_URING)
|
||||||
struct sock *io_uring_get_socket(struct file *file);
|
struct sock *io_uring_get_socket(struct file *file);
|
||||||
void __io_uring_task_cancel(void);
|
void __io_uring_cancel(bool cancel_all);
|
||||||
void __io_uring_files_cancel(struct files_struct *files);
|
|
||||||
void __io_uring_free(struct task_struct *tsk);
|
void __io_uring_free(struct task_struct *tsk);
|
||||||
|
|
||||||
|
static inline void io_uring_files_cancel(void)
|
||||||
|
{
|
||||||
|
if (current->io_uring)
|
||||||
|
__io_uring_cancel(false);
|
||||||
|
}
|
||||||
static inline void io_uring_task_cancel(void)
|
static inline void io_uring_task_cancel(void)
|
||||||
{
|
{
|
||||||
if (current->io_uring && !xa_empty(¤t->io_uring->xa))
|
if (current->io_uring)
|
||||||
__io_uring_task_cancel();
|
__io_uring_cancel(true);
|
||||||
}
|
|
||||||
static inline void io_uring_files_cancel(struct files_struct *files)
|
|
||||||
{
|
|
||||||
if (current->io_uring && !xa_empty(¤t->io_uring->xa))
|
|
||||||
__io_uring_files_cancel(files);
|
|
||||||
}
|
}
|
||||||
static inline void io_uring_free(struct task_struct *tsk)
|
static inline void io_uring_free(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
@ -63,7 +33,7 @@ static inline struct sock *io_uring_get_socket(struct file *file)
|
|||||||
static inline void io_uring_task_cancel(void)
|
static inline void io_uring_task_cancel(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void io_uring_files_cancel(struct files_struct *files)
|
static inline void io_uring_files_cancel(void)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void io_uring_free(struct task_struct *tsk)
|
static inline void io_uring_free(struct task_struct *tsk)
|
||||||
|
@ -901,6 +901,9 @@ struct task_struct {
|
|||||||
/* CLONE_CHILD_CLEARTID: */
|
/* CLONE_CHILD_CLEARTID: */
|
||||||
int __user *clear_child_tid;
|
int __user *clear_child_tid;
|
||||||
|
|
||||||
|
/* PF_IO_WORKER */
|
||||||
|
void *pf_io_worker;
|
||||||
|
|
||||||
u64 utime;
|
u64 utime;
|
||||||
u64 stime;
|
u64 stime;
|
||||||
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
|
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
|
||||||
|
@ -341,7 +341,7 @@ asmlinkage long sys_io_uring_setup(u32 entries,
|
|||||||
struct io_uring_params __user *p);
|
struct io_uring_params __user *p);
|
||||||
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
|
asmlinkage long sys_io_uring_enter(unsigned int fd, u32 to_submit,
|
||||||
u32 min_complete, u32 flags,
|
u32 min_complete, u32 flags,
|
||||||
const sigset_t __user *sig, size_t sigsz);
|
const void __user *argp, size_t argsz);
|
||||||
asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
|
asmlinkage long sys_io_uring_register(unsigned int fd, unsigned int op,
|
||||||
void __user *arg, unsigned int nr_args);
|
void __user *arg, unsigned int nr_args);
|
||||||
|
|
||||||
|
@ -12,11 +12,11 @@ struct io_wq_work;
|
|||||||
/**
|
/**
|
||||||
* io_uring_create - called after a new io_uring context was prepared
|
* io_uring_create - called after a new io_uring context was prepared
|
||||||
*
|
*
|
||||||
* @fd: corresponding file descriptor
|
* @fd: corresponding file descriptor
|
||||||
* @ctx: pointer to a ring context structure
|
* @ctx: pointer to a ring context structure
|
||||||
* @sq_entries: actual SQ size
|
* @sq_entries: actual SQ size
|
||||||
* @cq_entries: actual CQ size
|
* @cq_entries: actual CQ size
|
||||||
* @flags: SQ ring flags, provided to io_uring_setup(2)
|
* @flags: SQ ring flags, provided to io_uring_setup(2)
|
||||||
*
|
*
|
||||||
* Allows to trace io_uring creation and provide pointer to a context, that can
|
* Allows to trace io_uring creation and provide pointer to a context, that can
|
||||||
* be used later to find correlated events.
|
* be used later to find correlated events.
|
||||||
@ -49,15 +49,15 @@ TRACE_EVENT(io_uring_create,
|
|||||||
);
|
);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* io_uring_register - called after a buffer/file/eventfd was succesfully
|
* io_uring_register - called after a buffer/file/eventfd was successfully
|
||||||
* registered for a ring
|
* registered for a ring
|
||||||
*
|
*
|
||||||
* @ctx: pointer to a ring context structure
|
* @ctx: pointer to a ring context structure
|
||||||
* @opcode: describes which operation to perform
|
* @opcode: describes which operation to perform
|
||||||
* @nr_user_files: number of registered files
|
* @nr_user_files: number of registered files
|
||||||
* @nr_user_bufs: number of registered buffers
|
* @nr_user_bufs: number of registered buffers
|
||||||
* @cq_ev_fd: whether eventfs registered or not
|
* @cq_ev_fd: whether eventfs registered or not
|
||||||
* @ret: return code
|
* @ret: return code
|
||||||
*
|
*
|
||||||
* Allows to trace fixed files/buffers/eventfds, that could be registered to
|
* Allows to trace fixed files/buffers/eventfds, that could be registered to
|
||||||
* avoid an overhead of getting references to them for every operation. This
|
* avoid an overhead of getting references to them for every operation. This
|
||||||
@ -142,16 +142,16 @@ TRACE_EVENT(io_uring_queue_async_work,
|
|||||||
TP_ARGS(ctx, rw, req, work, flags),
|
TP_ARGS(ctx, rw, req, work, flags),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
__field( int, rw )
|
__field( int, rw )
|
||||||
__field( void *, req )
|
__field( void *, req )
|
||||||
__field( struct io_wq_work *, work )
|
__field( struct io_wq_work *, work )
|
||||||
__field( unsigned int, flags )
|
__field( unsigned int, flags )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->ctx = ctx;
|
__entry->ctx = ctx;
|
||||||
__entry->rw = rw;
|
__entry->rw = rw;
|
||||||
__entry->req = req;
|
__entry->req = req;
|
||||||
__entry->work = work;
|
__entry->work = work;
|
||||||
__entry->flags = flags;
|
__entry->flags = flags;
|
||||||
@ -196,10 +196,10 @@ TRACE_EVENT(io_uring_defer,
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* io_uring_link - called before the io_uring request added into link_list of
|
* io_uring_link - called before the io_uring request added into link_list of
|
||||||
* another request
|
* another request
|
||||||
*
|
*
|
||||||
* @ctx: pointer to a ring context structure
|
* @ctx: pointer to a ring context structure
|
||||||
* @req: pointer to a linked request
|
* @req: pointer to a linked request
|
||||||
* @target_req: pointer to a previous request, that would contain @req
|
* @target_req: pointer to a previous request, that would contain @req
|
||||||
*
|
*
|
||||||
* Allows to track linked requests, to understand dependencies between requests
|
* Allows to track linked requests, to understand dependencies between requests
|
||||||
@ -212,8 +212,8 @@ TRACE_EVENT(io_uring_link,
|
|||||||
TP_ARGS(ctx, req, target_req),
|
TP_ARGS(ctx, req, target_req),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
__field( void *, req )
|
__field( void *, req )
|
||||||
__field( void *, target_req )
|
__field( void *, target_req )
|
||||||
),
|
),
|
||||||
|
|
||||||
@ -244,7 +244,7 @@ TRACE_EVENT(io_uring_cqring_wait,
|
|||||||
TP_ARGS(ctx, min_events),
|
TP_ARGS(ctx, min_events),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
__field( int, min_events )
|
__field( int, min_events )
|
||||||
),
|
),
|
||||||
|
|
||||||
@ -272,7 +272,7 @@ TRACE_EVENT(io_uring_fail_link,
|
|||||||
TP_ARGS(req, link),
|
TP_ARGS(req, link),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, req )
|
__field( void *, req )
|
||||||
__field( void *, link )
|
__field( void *, link )
|
||||||
),
|
),
|
||||||
|
|
||||||
@ -290,38 +290,42 @@ TRACE_EVENT(io_uring_fail_link,
|
|||||||
* @ctx: pointer to a ring context structure
|
* @ctx: pointer to a ring context structure
|
||||||
* @user_data: user data associated with the request
|
* @user_data: user data associated with the request
|
||||||
* @res: result of the request
|
* @res: result of the request
|
||||||
|
* @cflags: completion flags
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
TRACE_EVENT(io_uring_complete,
|
TRACE_EVENT(io_uring_complete,
|
||||||
|
|
||||||
TP_PROTO(void *ctx, u64 user_data, long res),
|
TP_PROTO(void *ctx, u64 user_data, int res, unsigned cflags),
|
||||||
|
|
||||||
TP_ARGS(ctx, user_data, res),
|
TP_ARGS(ctx, user_data, res, cflags),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
__field( u64, user_data )
|
__field( u64, user_data )
|
||||||
__field( long, res )
|
__field( int, res )
|
||||||
|
__field( unsigned, cflags )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->ctx = ctx;
|
__entry->ctx = ctx;
|
||||||
__entry->user_data = user_data;
|
__entry->user_data = user_data;
|
||||||
__entry->res = res;
|
__entry->res = res;
|
||||||
|
__entry->cflags = cflags;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("ring %p, user_data 0x%llx, result %ld",
|
TP_printk("ring %p, user_data 0x%llx, result %d, cflags %x",
|
||||||
__entry->ctx, (unsigned long long)__entry->user_data,
|
__entry->ctx, (unsigned long long)__entry->user_data,
|
||||||
__entry->res)
|
__entry->res, __entry->cflags)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* io_uring_submit_sqe - called before submitting one SQE
|
* io_uring_submit_sqe - called before submitting one SQE
|
||||||
*
|
*
|
||||||
* @ctx: pointer to a ring context structure
|
* @ctx: pointer to a ring context structure
|
||||||
|
* @req: pointer to a submitted request
|
||||||
* @opcode: opcode of request
|
* @opcode: opcode of request
|
||||||
* @user_data: user data associated with the request
|
* @user_data: user data associated with the request
|
||||||
|
* @flags request flags
|
||||||
* @force_nonblock: whether a context blocking or not
|
* @force_nonblock: whether a context blocking or not
|
||||||
* @sq_thread: true if sq_thread has submitted this SQE
|
* @sq_thread: true if sq_thread has submitted this SQE
|
||||||
*
|
*
|
||||||
@ -330,41 +334,60 @@ TRACE_EVENT(io_uring_complete,
|
|||||||
*/
|
*/
|
||||||
TRACE_EVENT(io_uring_submit_sqe,
|
TRACE_EVENT(io_uring_submit_sqe,
|
||||||
|
|
||||||
TP_PROTO(void *ctx, u8 opcode, u64 user_data, bool force_nonblock,
|
TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data, u32 flags,
|
||||||
bool sq_thread),
|
bool force_nonblock, bool sq_thread),
|
||||||
|
|
||||||
TP_ARGS(ctx, opcode, user_data, force_nonblock, sq_thread),
|
TP_ARGS(ctx, req, opcode, user_data, flags, force_nonblock, sq_thread),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
|
__field( void *, req )
|
||||||
__field( u8, opcode )
|
__field( u8, opcode )
|
||||||
__field( u64, user_data )
|
__field( u64, user_data )
|
||||||
|
__field( u32, flags )
|
||||||
__field( bool, force_nonblock )
|
__field( bool, force_nonblock )
|
||||||
__field( bool, sq_thread )
|
__field( bool, sq_thread )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->ctx = ctx;
|
__entry->ctx = ctx;
|
||||||
|
__entry->req = req;
|
||||||
__entry->opcode = opcode;
|
__entry->opcode = opcode;
|
||||||
__entry->user_data = user_data;
|
__entry->user_data = user_data;
|
||||||
|
__entry->flags = flags;
|
||||||
__entry->force_nonblock = force_nonblock;
|
__entry->force_nonblock = force_nonblock;
|
||||||
__entry->sq_thread = sq_thread;
|
__entry->sq_thread = sq_thread;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("ring %p, op %d, data 0x%llx, non block %d, sq_thread %d",
|
TP_printk("ring %p, req %p, op %d, data 0x%llx, flags %u, "
|
||||||
__entry->ctx, __entry->opcode,
|
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
|
||||||
(unsigned long long) __entry->user_data,
|
__entry->opcode, (unsigned long long)__entry->user_data,
|
||||||
__entry->force_nonblock, __entry->sq_thread)
|
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* io_uring_poll_arm - called after arming a poll wait if successful
|
||||||
|
*
|
||||||
|
* @ctx: pointer to a ring context structure
|
||||||
|
* @req: pointer to the armed request
|
||||||
|
* @opcode: opcode of request
|
||||||
|
* @user_data: user data associated with the request
|
||||||
|
* @mask: request poll events mask
|
||||||
|
* @events: registered events of interest
|
||||||
|
*
|
||||||
|
* Allows to track which fds are waiting for and what are the events of
|
||||||
|
* interest.
|
||||||
|
*/
|
||||||
TRACE_EVENT(io_uring_poll_arm,
|
TRACE_EVENT(io_uring_poll_arm,
|
||||||
|
|
||||||
TP_PROTO(void *ctx, u8 opcode, u64 user_data, int mask, int events),
|
TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data,
|
||||||
|
int mask, int events),
|
||||||
|
|
||||||
TP_ARGS(ctx, opcode, user_data, mask, events),
|
TP_ARGS(ctx, req, opcode, user_data, mask, events),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
|
__field( void *, req )
|
||||||
__field( u8, opcode )
|
__field( u8, opcode )
|
||||||
__field( u64, user_data )
|
__field( u64, user_data )
|
||||||
__field( int, mask )
|
__field( int, mask )
|
||||||
@ -373,16 +396,17 @@ TRACE_EVENT(io_uring_poll_arm,
|
|||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->ctx = ctx;
|
__entry->ctx = ctx;
|
||||||
|
__entry->req = req;
|
||||||
__entry->opcode = opcode;
|
__entry->opcode = opcode;
|
||||||
__entry->user_data = user_data;
|
__entry->user_data = user_data;
|
||||||
__entry->mask = mask;
|
__entry->mask = mask;
|
||||||
__entry->events = events;
|
__entry->events = events;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("ring %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
|
TP_printk("ring %p, req %p, op %d, data 0x%llx, mask 0x%x, events 0x%x",
|
||||||
__entry->ctx, __entry->opcode,
|
__entry->ctx, __entry->req, __entry->opcode,
|
||||||
(unsigned long long) __entry->user_data,
|
(unsigned long long) __entry->user_data,
|
||||||
__entry->mask, __entry->events)
|
__entry->mask, __entry->events)
|
||||||
);
|
);
|
||||||
|
|
||||||
TRACE_EVENT(io_uring_poll_wake,
|
TRACE_EVENT(io_uring_poll_wake,
|
||||||
@ -437,27 +461,40 @@ TRACE_EVENT(io_uring_task_add,
|
|||||||
__entry->mask)
|
__entry->mask)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* io_uring_task_run - called when task_work_run() executes the poll events
|
||||||
|
* notification callbacks
|
||||||
|
*
|
||||||
|
* @ctx: pointer to a ring context structure
|
||||||
|
* @req: pointer to the armed request
|
||||||
|
* @opcode: opcode of request
|
||||||
|
* @user_data: user data associated with the request
|
||||||
|
*
|
||||||
|
* Allows to track when notified poll events are processed
|
||||||
|
*/
|
||||||
TRACE_EVENT(io_uring_task_run,
|
TRACE_EVENT(io_uring_task_run,
|
||||||
|
|
||||||
TP_PROTO(void *ctx, u8 opcode, u64 user_data),
|
TP_PROTO(void *ctx, void *req, u8 opcode, u64 user_data),
|
||||||
|
|
||||||
TP_ARGS(ctx, opcode, user_data),
|
TP_ARGS(ctx, req, opcode, user_data),
|
||||||
|
|
||||||
TP_STRUCT__entry (
|
TP_STRUCT__entry (
|
||||||
__field( void *, ctx )
|
__field( void *, ctx )
|
||||||
|
__field( void *, req )
|
||||||
__field( u8, opcode )
|
__field( u8, opcode )
|
||||||
__field( u64, user_data )
|
__field( u64, user_data )
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_fast_assign(
|
TP_fast_assign(
|
||||||
__entry->ctx = ctx;
|
__entry->ctx = ctx;
|
||||||
|
__entry->req = req;
|
||||||
__entry->opcode = opcode;
|
__entry->opcode = opcode;
|
||||||
__entry->user_data = user_data;
|
__entry->user_data = user_data;
|
||||||
),
|
),
|
||||||
|
|
||||||
TP_printk("ring %p, op %d, data 0x%llx",
|
TP_printk("ring %p, req %p, op %d, data 0x%llx",
|
||||||
__entry->ctx, __entry->opcode,
|
__entry->ctx, __entry->req, __entry->opcode,
|
||||||
(unsigned long long) __entry->user_data)
|
(unsigned long long) __entry->user_data)
|
||||||
);
|
);
|
||||||
|
|
||||||
#endif /* _TRACE_IO_URING_H */
|
#endif /* _TRACE_IO_URING_H */
|
||||||
|
@ -42,23 +42,25 @@ struct io_uring_sqe {
|
|||||||
__u32 statx_flags;
|
__u32 statx_flags;
|
||||||
__u32 fadvise_advice;
|
__u32 fadvise_advice;
|
||||||
__u32 splice_flags;
|
__u32 splice_flags;
|
||||||
|
__u32 rename_flags;
|
||||||
|
__u32 unlink_flags;
|
||||||
|
__u32 hardlink_flags;
|
||||||
};
|
};
|
||||||
__u64 user_data; /* data to be passed back at completion time */
|
__u64 user_data; /* data to be passed back at completion time */
|
||||||
|
/* pack this to avoid bogus arm OABI complaints */
|
||||||
union {
|
union {
|
||||||
struct {
|
/* index into fixed buffers, if used */
|
||||||
/* pack this to avoid bogus arm OABI complaints */
|
__u16 buf_index;
|
||||||
union {
|
/* for grouped buffer selection */
|
||||||
/* index into fixed buffers, if used */
|
__u16 buf_group;
|
||||||
__u16 buf_index;
|
} __attribute__((packed));
|
||||||
/* for grouped buffer selection */
|
/* personality to use, if used */
|
||||||
__u16 buf_group;
|
__u16 personality;
|
||||||
} __attribute__((packed));
|
union {
|
||||||
/* personality to use, if used */
|
__s32 splice_fd_in;
|
||||||
__u16 personality;
|
__u32 file_index;
|
||||||
__s32 splice_fd_in;
|
|
||||||
};
|
|
||||||
__u64 __pad2[3];
|
|
||||||
};
|
};
|
||||||
|
__u64 __pad2[2];
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
@ -132,6 +134,9 @@ enum {
|
|||||||
IORING_OP_PROVIDE_BUFFERS,
|
IORING_OP_PROVIDE_BUFFERS,
|
||||||
IORING_OP_REMOVE_BUFFERS,
|
IORING_OP_REMOVE_BUFFERS,
|
||||||
IORING_OP_TEE,
|
IORING_OP_TEE,
|
||||||
|
IORING_OP_SHUTDOWN,
|
||||||
|
IORING_OP_RENAMEAT,
|
||||||
|
IORING_OP_UNLINKAT,
|
||||||
|
|
||||||
/* this goes last, obviously */
|
/* this goes last, obviously */
|
||||||
IORING_OP_LAST,
|
IORING_OP_LAST,
|
||||||
@ -145,14 +150,34 @@ enum {
|
|||||||
/*
|
/*
|
||||||
* sqe->timeout_flags
|
* sqe->timeout_flags
|
||||||
*/
|
*/
|
||||||
#define IORING_TIMEOUT_ABS (1U << 0)
|
#define IORING_TIMEOUT_ABS (1U << 0)
|
||||||
|
#define IORING_TIMEOUT_UPDATE (1U << 1)
|
||||||
|
#define IORING_TIMEOUT_BOOTTIME (1U << 2)
|
||||||
|
#define IORING_TIMEOUT_REALTIME (1U << 3)
|
||||||
|
#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
|
||||||
|
#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
|
||||||
|
#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
|
||||||
/*
|
/*
|
||||||
* sqe->splice_flags
|
* sqe->splice_flags
|
||||||
* extends splice(2) flags
|
* extends splice(2) flags
|
||||||
*/
|
*/
|
||||||
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
|
#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
|
||||||
|
* command flags for POLL_ADD are stored in sqe->len.
|
||||||
|
*
|
||||||
|
* IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
|
||||||
|
* the poll handler will continue to report
|
||||||
|
* CQEs on behalf of the same SQE.
|
||||||
|
*
|
||||||
|
* IORING_POLL_UPDATE Update existing poll request, matching
|
||||||
|
* sqe->addr as the old user_data field.
|
||||||
|
*/
|
||||||
|
#define IORING_POLL_ADD_MULTI (1U << 0)
|
||||||
|
#define IORING_POLL_UPDATE_EVENTS (1U << 1)
|
||||||
|
#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* IO completion data structure (Completion Queue Entry)
|
* IO completion data structure (Completion Queue Entry)
|
||||||
*/
|
*/
|
||||||
@ -166,8 +191,10 @@ struct io_uring_cqe {
|
|||||||
* cqe->flags
|
* cqe->flags
|
||||||
*
|
*
|
||||||
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
|
* IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
|
||||||
|
* IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
|
||||||
*/
|
*/
|
||||||
#define IORING_CQE_F_BUFFER (1U << 0)
|
#define IORING_CQE_F_BUFFER (1U << 0)
|
||||||
|
#define IORING_CQE_F_MORE (1U << 1)
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
IORING_CQE_BUFFER_SHIFT = 16,
|
IORING_CQE_BUFFER_SHIFT = 16,
|
||||||
@ -226,6 +253,7 @@ struct io_cqring_offsets {
|
|||||||
#define IORING_ENTER_GETEVENTS (1U << 0)
|
#define IORING_ENTER_GETEVENTS (1U << 0)
|
||||||
#define IORING_ENTER_SQ_WAKEUP (1U << 1)
|
#define IORING_ENTER_SQ_WAKEUP (1U << 1)
|
||||||
#define IORING_ENTER_SQ_WAIT (1U << 2)
|
#define IORING_ENTER_SQ_WAIT (1U << 2)
|
||||||
|
#define IORING_ENTER_EXT_ARG (1U << 3)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
||||||
@ -253,6 +281,10 @@ struct io_uring_params {
|
|||||||
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
|
#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
|
||||||
#define IORING_FEAT_FAST_POLL (1U << 5)
|
#define IORING_FEAT_FAST_POLL (1U << 5)
|
||||||
#define IORING_FEAT_POLL_32BITS (1U << 6)
|
#define IORING_FEAT_POLL_32BITS (1U << 6)
|
||||||
|
#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
|
||||||
|
#define IORING_FEAT_EXT_ARG (1U << 8)
|
||||||
|
#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
|
||||||
|
#define IORING_FEAT_RSRC_TAGS (1U << 10)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* io_uring_register(2) opcodes and arguments
|
* io_uring_register(2) opcodes and arguments
|
||||||
@ -272,16 +304,62 @@ enum {
|
|||||||
IORING_REGISTER_RESTRICTIONS = 11,
|
IORING_REGISTER_RESTRICTIONS = 11,
|
||||||
IORING_REGISTER_ENABLE_RINGS = 12,
|
IORING_REGISTER_ENABLE_RINGS = 12,
|
||||||
|
|
||||||
|
/* extended with tagging */
|
||||||
|
IORING_REGISTER_FILES2 = 13,
|
||||||
|
IORING_REGISTER_FILES_UPDATE2 = 14,
|
||||||
|
IORING_REGISTER_BUFFERS2 = 15,
|
||||||
|
IORING_REGISTER_BUFFERS_UPDATE = 16,
|
||||||
|
|
||||||
|
/* set/clear io-wq thread affinities */
|
||||||
|
IORING_REGISTER_IOWQ_AFF = 17,
|
||||||
|
IORING_UNREGISTER_IOWQ_AFF = 18,
|
||||||
|
|
||||||
|
/* set/get max number of io-wq workers */
|
||||||
|
IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
|
||||||
|
|
||||||
/* this goes last */
|
/* this goes last */
|
||||||
IORING_REGISTER_LAST
|
IORING_REGISTER_LAST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* io-wq worker categories */
|
||||||
|
enum {
|
||||||
|
IO_WQ_BOUND,
|
||||||
|
IO_WQ_UNBOUND,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* deprecated, see struct io_uring_rsrc_update */
|
||||||
struct io_uring_files_update {
|
struct io_uring_files_update {
|
||||||
__u32 offset;
|
__u32 offset;
|
||||||
__u32 resv;
|
__u32 resv;
|
||||||
__aligned_u64 /* __s32 * */ fds;
|
__aligned_u64 /* __s32 * */ fds;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct io_uring_rsrc_register {
|
||||||
|
__u32 nr;
|
||||||
|
__u32 resv;
|
||||||
|
__u64 resv2;
|
||||||
|
__aligned_u64 data;
|
||||||
|
__aligned_u64 tags;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct io_uring_rsrc_update {
|
||||||
|
__u32 offset;
|
||||||
|
__u32 resv;
|
||||||
|
__aligned_u64 data;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct io_uring_rsrc_update2 {
|
||||||
|
__u32 offset;
|
||||||
|
__u32 resv;
|
||||||
|
__aligned_u64 data;
|
||||||
|
__aligned_u64 tags;
|
||||||
|
__u32 nr;
|
||||||
|
__u32 resv2;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Skip updating fd indexes set to this value in the fd table */
|
||||||
|
#define IORING_REGISTER_FILES_SKIP (-2)
|
||||||
|
|
||||||
#define IO_URING_OP_SUPPORTED (1U << 0)
|
#define IO_URING_OP_SUPPORTED (1U << 0)
|
||||||
|
|
||||||
struct io_uring_probe_op {
|
struct io_uring_probe_op {
|
||||||
@ -329,4 +407,11 @@ enum {
|
|||||||
IORING_RESTRICTION_LAST
|
IORING_RESTRICTION_LAST
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct io_uring_getevents_arg {
|
||||||
|
__u64 sigmask;
|
||||||
|
__u32 sigmask_sz;
|
||||||
|
__u32 pad;
|
||||||
|
__u64 ts;
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
6
io_uring/Makefile
Normal file
6
io_uring/Makefile
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-2.0
|
||||||
|
#
|
||||||
|
# Makefile for io_uring
|
||||||
|
|
||||||
|
obj-$(CONFIG_IO_URING) += io_uring.o
|
||||||
|
obj-$(CONFIG_IO_WQ) += io-wq.o
|
1398
io_uring/io-wq.c
Normal file
1398
io_uring/io-wq.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,7 +1,7 @@
|
|||||||
#ifndef INTERNAL_IO_WQ_H
|
#ifndef INTERNAL_IO_WQ_H
|
||||||
#define INTERNAL_IO_WQ_H
|
#define INTERNAL_IO_WQ_H
|
||||||
|
|
||||||
#include <linux/io_uring.h>
|
#include <linux/refcount.h>
|
||||||
|
|
||||||
struct io_wq;
|
struct io_wq;
|
||||||
|
|
||||||
@ -9,16 +9,8 @@ enum {
|
|||||||
IO_WQ_WORK_CANCEL = 1,
|
IO_WQ_WORK_CANCEL = 1,
|
||||||
IO_WQ_WORK_HASHED = 2,
|
IO_WQ_WORK_HASHED = 2,
|
||||||
IO_WQ_WORK_UNBOUND = 4,
|
IO_WQ_WORK_UNBOUND = 4,
|
||||||
IO_WQ_WORK_NO_CANCEL = 8,
|
|
||||||
IO_WQ_WORK_CONCURRENT = 16,
|
IO_WQ_WORK_CONCURRENT = 16,
|
||||||
|
|
||||||
IO_WQ_WORK_FILES = 32,
|
|
||||||
IO_WQ_WORK_FS = 64,
|
|
||||||
IO_WQ_WORK_MM = 128,
|
|
||||||
IO_WQ_WORK_CREDS = 256,
|
|
||||||
IO_WQ_WORK_BLKCG = 512,
|
|
||||||
IO_WQ_WORK_FSIZE = 1024,
|
|
||||||
|
|
||||||
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
|
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -52,6 +44,7 @@ static inline void wq_list_add_after(struct io_wq_work_node *node,
|
|||||||
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
||||||
struct io_wq_work_list *list)
|
struct io_wq_work_list *list)
|
||||||
{
|
{
|
||||||
|
node->next = NULL;
|
||||||
if (!list->first) {
|
if (!list->first) {
|
||||||
list->last = node;
|
list->last = node;
|
||||||
WRITE_ONCE(list->first, node);
|
WRITE_ONCE(list->first, node);
|
||||||
@ -59,7 +52,6 @@ static inline void wq_list_add_tail(struct io_wq_work_node *node,
|
|||||||
list->last->next = node;
|
list->last->next = node;
|
||||||
list->last = node;
|
list->last = node;
|
||||||
}
|
}
|
||||||
node->next = NULL;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void wq_list_cut(struct io_wq_work_list *list,
|
static inline void wq_list_cut(struct io_wq_work_list *list,
|
||||||
@ -95,7 +87,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
|
|||||||
|
|
||||||
struct io_wq_work {
|
struct io_wq_work {
|
||||||
struct io_wq_work_node list;
|
struct io_wq_work_node list;
|
||||||
struct io_identity *identity;
|
|
||||||
unsigned flags;
|
unsigned flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -107,37 +98,48 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
|
|||||||
return container_of(work->list.next, struct io_wq_work, list);
|
return container_of(work->list.next, struct io_wq_work, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef void (free_work_fn)(struct io_wq_work *);
|
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
|
||||||
typedef struct io_wq_work *(io_wq_work_fn)(struct io_wq_work *);
|
typedef void (io_wq_work_fn)(struct io_wq_work *);
|
||||||
|
|
||||||
|
struct io_wq_hash {
|
||||||
|
refcount_t refs;
|
||||||
|
unsigned long map;
|
||||||
|
struct wait_queue_head wait;
|
||||||
|
};
|
||||||
|
|
||||||
|
static inline void io_wq_put_hash(struct io_wq_hash *hash)
|
||||||
|
{
|
||||||
|
if (refcount_dec_and_test(&hash->refs))
|
||||||
|
kfree(hash);
|
||||||
|
}
|
||||||
|
|
||||||
struct io_wq_data {
|
struct io_wq_data {
|
||||||
struct user_struct *user;
|
struct io_wq_hash *hash;
|
||||||
|
struct task_struct *task;
|
||||||
io_wq_work_fn *do_work;
|
io_wq_work_fn *do_work;
|
||||||
free_work_fn *free_work;
|
free_work_fn *free_work;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
|
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
|
||||||
bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
|
void io_wq_exit_start(struct io_wq *wq);
|
||||||
void io_wq_destroy(struct io_wq *wq);
|
void io_wq_put_and_exit(struct io_wq *wq);
|
||||||
|
|
||||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
||||||
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
||||||
|
|
||||||
|
int io_wq_cpu_affinity(struct io_wq *wq, cpumask_var_t mask);
|
||||||
|
int io_wq_max_workers(struct io_wq *wq, int *new_count);
|
||||||
|
|
||||||
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||||
{
|
{
|
||||||
return work->flags & IO_WQ_WORK_HASHED;
|
return work->flags & IO_WQ_WORK_HASHED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void io_wq_cancel_all(struct io_wq *wq);
|
|
||||||
|
|
||||||
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
||||||
|
|
||||||
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
||||||
void *data, bool cancel_all);
|
void *data, bool cancel_all);
|
||||||
|
|
||||||
struct task_struct *io_wq_get_task(struct io_wq *wq);
|
|
||||||
|
|
||||||
#if defined(CONFIG_IO_WQ)
|
#if defined(CONFIG_IO_WQ)
|
||||||
extern void io_wq_worker_sleeping(struct task_struct *);
|
extern void io_wq_worker_sleeping(struct task_struct *);
|
||||||
extern void io_wq_worker_running(struct task_struct *);
|
extern void io_wq_worker_running(struct task_struct *);
|
||||||
@ -152,6 +154,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
|
|||||||
|
|
||||||
static inline bool io_wq_current_is_worker(void)
|
static inline bool io_wq_current_is_worker(void)
|
||||||
{
|
{
|
||||||
return in_task() && (current->flags & PF_IO_WORKER);
|
return in_task() && (current->flags & PF_IO_WORKER) &&
|
||||||
|
current->pf_io_worker;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
File diff suppressed because it is too large
Load Diff
@ -765,7 +765,7 @@ void __noreturn do_exit(long code)
|
|||||||
schedule();
|
schedule();
|
||||||
}
|
}
|
||||||
|
|
||||||
io_uring_files_cancel(tsk->files);
|
io_uring_files_cancel();
|
||||||
exit_signals(tsk); /* sets PF_EXITING */
|
exit_signals(tsk); /* sets PF_EXITING */
|
||||||
|
|
||||||
/* sync mm's RSS info before statistics gathering */
|
/* sync mm's RSS info before statistics gathering */
|
||||||
|
@ -961,6 +961,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
|||||||
tsk->splice_pipe = NULL;
|
tsk->splice_pipe = NULL;
|
||||||
tsk->task_frag.page = NULL;
|
tsk->task_frag.page = NULL;
|
||||||
tsk->wake_q.next = NULL;
|
tsk->wake_q.next = NULL;
|
||||||
|
tsk->pf_io_worker = NULL;
|
||||||
|
|
||||||
account_kernel_stack(tsk, 1);
|
account_kernel_stack(tsk, 1);
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
#include <asm/tlb.h>
|
#include <asm/tlb.h>
|
||||||
|
|
||||||
#include "../workqueue_internal.h"
|
#include "../workqueue_internal.h"
|
||||||
#include "../../fs/io-wq.h"
|
#include "../../io_uring/io-wq.h"
|
||||||
#include "../smpboot.h"
|
#include "../smpboot.h"
|
||||||
|
|
||||||
#include "pelt.h"
|
#include "pelt.h"
|
||||||
|
Loading…
Reference in New Issue
Block a user