summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMartin Jansa <martin.jansa@gmail.com>2023-10-19 13:11:11 +0200
committerMartin Jansa <martin.jansa@gmail.com>2023-10-19 13:45:53 +0200
commit38c813cffdf4931d16044bfe662880d63cbdcfa3 (patch)
tree906173f8b4f91129cd02572e556482e156ac090a
parent4f84537670020a8d902248479efa9f062089c0d3 (diff)
downloadopenembedded-core-contrib-jansa/io_uring.tar.gz
io-uring-writev: add simple test for writting file with io_uringjansa/io_uring
* pseudo doesn't support io_uring yet as shown after nodejs was upgraded and nodejs-native >= 20.3.0 with libuv >= 1.45.0 which has: https://github.com/libuv/libuv/pull/3952 * files created in do_install with nodejs-native aren't tracked by pseudo and will result in host-user-contamination QA issue or "KeyError: 'getpwuid(): uid not found" as documented in: https://github.com/shr-project/com.webos.app.minimal/commit/bd238047c8ce3cd085041d276613396b863213cf * this is much simpler test for io_uring without the need to build whole nodejs-native, it's based on: https://unixism.net/2020/04/io-uring-by-example-part-1-introduction/ just using writev instead of readv * if it works fine, the file "test" will be tracked in pseudo database since the creation in ${D} like: core2-64-oe-linux/io-uring-writev/1.0 $ sqlite3 pseudo/files.db "select * from files" 1|/OE/build/oe-core/tmp-glibc/work/core2-64-oe-linux/io-uring-writev/1.0/image|66305|48357743|0|0|16877|0|0 2|/OE/build/oe-core/tmp-glibc/work/core2-64-oe-linux/io-uring-writev/1.0/image/test|66305|48316709|0|0|33188|0|0 and it does in this case, because I haven't figured out how to call writev() without opening the fd of output file first where the openat() call gets intercepted by pseudo io-uring-writev/1.0 $ strace -v ./io-uring-writev test2 2>&1 | grep openat openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "/usr/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 openat(AT_FDCWD, "test2", O_WRONLY|O_CREAT, 0666) = 4 while with libuv there was no openat() for the output files in strace Signed-off-by: Martin Jansa <martin.jansa@gmail.com>
-rw-r--r--meta-selftest/recipes-test/io-uring/io-uring-writev.bb18
-rw-r--r--meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c389
2 files changed, 407 insertions, 0 deletions
diff --git a/meta-selftest/recipes-test/io-uring/io-uring-writev.bb b/meta-selftest/recipes-test/io-uring/io-uring-writev.bb
new file mode 100644
index 0000000000..8b2ca6005b
--- /dev/null
+++ b/meta-selftest/recipes-test/io-uring/io-uring-writev.bb
@@ -0,0 +1,18 @@
+DESCRIPTION = "Simple io_uring test"
+SECTION = "examples"
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/MIT;md5=0835ade698e0bcf8506ecda2f7b4f302"
+
+SRC_URI = "file://io-uring-writev.c"
+
+S = "${WORKDIR}"
+
+do_compile() {
+ ${BUILD_CC} io-uring-writev.c -o io-uring-writev
+}
+
+do_install() {
+ ${S}/io-uring-writev ${D}/test
+}
+
+FILES:${PN} = "test"
diff --git a/meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c b/meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c
new file mode 100644
index 0000000000..a5e4253b7a
--- /dev/null
+++ b/meta-selftest/recipes-test/io-uring/io-uring-writev/io-uring-writev.c
@@ -0,0 +1,389 @@
+/* Taken from
+ * https://unixism.net/2020/04/io-uring-by-example-part-1-introduction/
+ * with small modification to write into files instead of reading them
+ * to test io_uring support in pseudo (https://git.yoctoproject.org/pseudo/)
+ * once implemented there
+ * */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/uio.h>
+#include <linux/fs.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+/* If your compilation fails because the header file below is missing,
+ * your kernel is probably too old to support io_uring.
+ * */
+#include <linux/io_uring.h>
+
+#define QUEUE_DEPTH 1
+#define BLOCK_SZ 1024
+
+/* This is x86 specific */
+#define read_barrier() __asm__ __volatile__("":::"memory")
+#define write_barrier() __asm__ __volatile__("":::"memory")
+
+struct app_io_sq_ring {
+ unsigned *head;
+ unsigned *tail;
+ unsigned *ring_mask;
+ unsigned *ring_entries;
+ unsigned *flags;
+ unsigned *array;
+};
+
+struct app_io_cq_ring {
+ unsigned *head;
+ unsigned *tail;
+ unsigned *ring_mask;
+ unsigned *ring_entries;
+ struct io_uring_cqe *cqes;
+};
+
+struct submitter {
+ int ring_fd;
+ struct app_io_sq_ring sq_ring;
+ struct io_uring_sqe *sqes;
+ struct app_io_cq_ring cq_ring;
+};
+
+struct file_info {
+ off_t file_sz;
+ struct iovec iovecs[]; /* Referred by readv/writev */
+};
+
+/*
+ * This code is written in the days when io_uring-related system calls are not
+ * part of standard C libraries. So, we roll our own system call wrapper
+ * functions.
+ * */
+
+int io_uring_setup(unsigned entries, struct io_uring_params *p)
+{
+ return (int) syscall(__NR_io_uring_setup, entries, p);
+}
+
+int io_uring_enter(int ring_fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags)
+{
+ return (int) syscall(__NR_io_uring_enter, ring_fd, to_submit, min_complete,
+ flags, NULL, 0);
+}
+
+/*
+ * Returns the size of the file whose open file descriptor is passed in.
+ * Properly handles regular file and block devices as well. Pretty.
+ * */
+
+off_t get_file_size(int fd) {
+ struct stat st;
+
+ if(fstat(fd, &st) < 0) {
+ perror("fstat");
+ return -1;
+ }
+ if (S_ISBLK(st.st_mode)) {
+ unsigned long long bytes;
+ if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) {
+ perror("ioctl");
+ return -1;
+ }
+ return bytes;
+ } else if (S_ISREG(st.st_mode))
+ return st.st_size;
+
+ return -1;
+}
+
+/*
+ * io_uring requires a lot of setup which looks pretty hairy, but isn't all
+ * that difficult to understand. Because of all this boilerplate code,
+ * io_uring's author has created liburing, which is relatively easy to use.
+ * However, you should take your time and understand this code. It is always
+ * good to know how it all works underneath. Apart from bragging rights,
+ * it does offer you a certain strange geeky peace.
+ * */
+
+int app_setup_uring(struct submitter *s) {
+ struct app_io_sq_ring *sring = &s->sq_ring;
+ struct app_io_cq_ring *cring = &s->cq_ring;
+ struct io_uring_params p;
+ void *sq_ptr, *cq_ptr;
+
+ /*
+ * We need to pass in the io_uring_params structure to the io_uring_setup()
+ * call zeroed out. We could set any flags if we need to, but for this
+ * example, we don't.
+ * */
+ memset(&p, 0, sizeof(p));
+ s->ring_fd = io_uring_setup(QUEUE_DEPTH, &p);
+ if (s->ring_fd < 0) {
+ perror("io_uring_setup");
+ return 1;
+ }
+
+ /*
+ * io_uring communication happens via 2 shared kernel-user space ring buffers,
+ * which can be jointly mapped with a single mmap() call in recent kernels.
+ * While the completion queue is directly manipulated, the submission queue
+ * has an indirection array in between. We map that in as well.
+ * */
+
+ int sring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned);
+ int cring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
+
+ /* In kernel version 5.4 and above, it is possible to map the submission and
+ * completion buffers with a single mmap() call. Rather than check for kernel
+ * versions, the recommended way is to just check the features field of the
+ * io_uring_params structure, which is a bit mask. If the
+ * IORING_FEAT_SINGLE_MMAP is set, then we can do away with the second mmap()
+ * call to map the completion ring.
+ * */
+ if (p.features & IORING_FEAT_SINGLE_MMAP) {
+ if (cring_sz > sring_sz) {
+ sring_sz = cring_sz;
+ }
+ cring_sz = sring_sz;
+ }
+
+ /* Map in the submission and completion queue ring buffers.
+ * Older kernels only map in the submission queue, though.
+ * */
+ sq_ptr = mmap(0, sring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE,
+ s->ring_fd, IORING_OFF_SQ_RING);
+ if (sq_ptr == MAP_FAILED) {
+ perror("mmap");
+ return 1;
+ }
+
+ if (p.features & IORING_FEAT_SINGLE_MMAP) {
+ cq_ptr = sq_ptr;
+ } else {
+ /* Map in the completion queue ring buffer in older kernels separately */
+ cq_ptr = mmap(0, cring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE,
+ s->ring_fd, IORING_OFF_CQ_RING);
+ if (cq_ptr == MAP_FAILED) {
+ perror("mmap");
+ return 1;
+ }
+ }
+ /* Save useful fields in a global app_io_sq_ring struct for later
+ * easy reference */
+ sring->head = sq_ptr + p.sq_off.head;
+ sring->tail = sq_ptr + p.sq_off.tail;
+ sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
+ sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
+ sring->flags = sq_ptr + p.sq_off.flags;
+ sring->array = sq_ptr + p.sq_off.array;
+
+ /* Map in the submission queue entries array */
+ s->sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ s->ring_fd, IORING_OFF_SQES);
+ if (s->sqes == MAP_FAILED) {
+ perror("mmap");
+ return 1;
+ }
+
+ /* Save useful fields in a global app_io_cq_ring struct for later
+ * easy reference */
+ cring->head = cq_ptr + p.cq_off.head;
+ cring->tail = cq_ptr + p.cq_off.tail;
+ cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
+ cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
+ cring->cqes = cq_ptr + p.cq_off.cqes;
+
+ return 0;
+}
+
+/*
+ * Output a string of characters of len length to stdout.
+ * We use buffered output here to be efficient,
+ * since we need to output character-by-character.
+ * */
+void output_to_console(char *buf, int len) {
+ while (len--) {
+ fputc(*buf++, stdout);
+ }
+}
+
+/*
+ * Read from completion queue.
+ * In this function, we read completion events from the completion queue, get
+ * the data buffer that will have the file data and print it to the console.
+ * */
+
+void read_from_cq(struct submitter *s) {
+ struct file_info *fi;
+ struct app_io_cq_ring *cring = &s->cq_ring;
+ struct io_uring_cqe *cqe;
+ unsigned head, reaped = 0;
+
+ head = *cring->head;
+
+ do {
+ read_barrier();
+ /*
+ * Remember, this is a ring buffer. If head == tail, it means that the
+ * buffer is empty.
+ * */
+ if (head == *cring->tail)
+ break;
+
+ /* Get the entry */
+ cqe = &cring->cqes[head & *s->cq_ring.ring_mask];
+ fi = (struct file_info*) cqe->user_data;
+ if (cqe->res < 0)
+ fprintf(stderr, "Error: %s\n", strerror(abs(cqe->res)));
+
+ int blocks = (int) fi->file_sz / BLOCK_SZ;
+ if (fi->file_sz % BLOCK_SZ) blocks++;
+
+ for (int i = 0; i < blocks; i++)
+ output_to_console(fi->iovecs[i].iov_base, fi->iovecs[i].iov_len);
+
+ head++;
+ } while (1);
+
+ *cring->head = head;
+ write_barrier();
+}
+/*
+ * Submit to submission queue.
+ * In this function, we submit requests to the submission queue. You can submit
+ * many types of requests. Ours is going to be the readv() request, which we
+ * specify via IORING_OP_READV.
+ *
+ * */
+int submit_to_sq(char *file_path, struct submitter *s) {
+ struct file_info *fi;
+
+ int file_fd = open(file_path, O_WRONLY|O_CREAT, 0666);
+ if (file_fd < 0 ) {
+ perror("open");
+ return 1;
+ }
+
+ struct app_io_sq_ring *sring = &s->sq_ring;
+ unsigned index = 0, current_block = 0, tail = 0, next_tail = 0;
+
+ char *bark = "Hello IO!";
+ off_t file_sz = strlen(bark);
+ if (file_sz < 0)
+ return 1;
+ off_t bytes_remaining = file_sz;
+ int blocks = (int) file_sz / BLOCK_SZ;
+ if (file_sz % BLOCK_SZ) blocks++;
+
+ fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
+ if (!fi) {
+ fprintf(stderr, "Unable to allocate memory\n");
+ return 1;
+ }
+ fi->file_sz = file_sz;
+
+ /*
+ * For each block of the file we need to read, we allocate an iovec struct
+ * which is indexed into the iovecs array. This array is passed in as part
+ * of the submission. If you don't understand this, then you need to look
+ * up how the readv() and writev() system calls work.
+ * */
+ /*
+ while (bytes_remaining) {
+ off_t bytes_to_read = bytes_remaining;
+ if (bytes_to_read > BLOCK_SZ)
+ bytes_to_read = BLOCK_SZ;
+
+ fi->iovecs[current_block].iov_len = bytes_to_read;
+
+ void *buf;
+ if( posix_memalign(&buf, BLOCK_SZ, BLOCK_SZ)) {
+ perror("posix_memalign");
+ return 1;
+ }
+ fi->iovecs[current_block].iov_base = buf;
+
+ current_block++;
+ bytes_remaining -= bytes_to_read;
+ }
+ */
+ fi->iovecs[current_block].iov_len = bytes_remaining;
+ fi->iovecs[current_block].iov_base = bark;
+
+
+ /* Add our submission queue entry to the tail of the SQE ring buffer */
+ next_tail = tail = *sring->tail;
+ next_tail++;
+ read_barrier();
+ index = tail & *s->sq_ring.ring_mask;
+ struct io_uring_sqe *sqe = &s->sqes[index];
+ sqe->fd = file_fd;
+ sqe->flags = 0;
+ sqe->opcode = IORING_OP_WRITEV;
+ sqe->addr = (unsigned long) fi->iovecs;
+ sqe->len = blocks;
+ sqe->off = 0;
+ sqe->user_data = (unsigned long long) fi;
+ sring->array[index] = index;
+ tail = next_tail;
+
+ /* Update the tail so the kernel can see it. */
+ if(*sring->tail != tail) {
+ *sring->tail = tail;
+ write_barrier();
+ }
+
+ /*
+ * Tell the kernel we have submitted events with the io_uring_enter() system
+ * call. We also pass in the IOURING_ENTER_GETEVENTS flag which causes the
+ * io_uring_enter() call to wait until min_complete events (the 3rd param)
+ * complete.
+ * */
+ int ret = io_uring_enter(s->ring_fd, 1,1,
+ IORING_ENTER_GETEVENTS);
+ if(ret < 0) {
+ perror("io_uring_enter");
+ return 1;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ struct submitter *s;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <filename>, barks to <filename>\n", argv[0]);
+ return 1;
+ }
+
+ s = malloc(sizeof(*s));
+ if (!s) {
+ perror("malloc");
+ return 1;
+ }
+ memset(s, 0, sizeof(*s));
+
+ if(app_setup_uring(s)) {
+ fprintf(stderr, "Unable to setup uring!\n");
+ return 1;
+ }
+
+ for (int i = 1; i < argc; i++) {
+ if(submit_to_sq(argv[i], s)) {
+ fprintf(stderr, "Error writting file\n");
+ return 1;
+ }
+ //read_from_cq(s);
+ }
+
+ return 0;
+}