From 6e556e9857f0194bda54ce1e34b07c3c67ece4ff Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sun, 22 Aug 2021 13:16:27 +1200 Subject: [PATCH 1/2] Teach posixaio about iodepth_batch_complete_max. The posixaio engine prevously ignored iodepth_batch_complete_max and polled the whole set of in flight IOs. This might be a little confusing when comparing with other engines. Therefore, provide a new option --posixaio_respect_iodepth_batch_complete_max. Not enabled by default, so as not to change any results unexpectedly. Signed-off-by: Thomas Munro --- HOWTO | 9 +++++++++ engines/posixaio.c | 28 ++++++++++++++++++++++++++++ fio.1 | 7 +++++++ optgroup.h | 2 ++ 4 files changed, 46 insertions(+) diff --git a/HOWTO b/HOWTO index 8c9e41356b..4748743826 100644 --- a/HOWTO +++ b/HOWTO @@ -1944,6 +1944,7 @@ I/O engine **posixaio** POSIX asynchronous I/O using :manpage:`aio_read(3)` and :manpage:`aio_write(3)`. + This engine defines engine specific options. **solarisaio** Solaris native asynchronous I/O. @@ -2642,6 +2643,14 @@ with the caveat that when used on the command line, they must come after the If set, stdout and stderr streams are redirected to files named from the job name. Default is true. +.. options:: posixaio_respect_iodepth_batch_complete_max=bool : [posixaio] + + If set, limit batch completions according to + :option:`iodepth_batch_complete_max`, as other engines do. Default is + false, effectively behaving as though + :option:`iodepth_batch_complete_max` has the same value as + :option:`iodepth`. + I/O depth ~~~~~~~~~ diff --git a/engines/posixaio.c b/engines/posixaio.c index 135d088c7a..ec5d3c689b 100644 --- a/engines/posixaio.c +++ b/engines/posixaio.c @@ -11,12 +11,34 @@ #include #include "../fio.h" +#include "../optgroup.h" struct posixaio_data { struct io_u **aio_events; unsigned int queued; }; +struct posixaio_options { + void *pad; + unsigned int respect_iodepth_batch_complete_max; +}; + +static struct fio_option options[] = { + { + .name = "posixaio_respect_iodepth_batch_complete_max", + .lname = "Respect iodepth_batch_complete_max", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct posixaio_options, respect_iodepth_batch_complete_max), + .help = "Whether to cap batch completion", + .def = "0", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_POSIXAIO, + }, + { + .name = NULL, + }, +}; + static unsigned long long ts_utime_since_now(const struct timespec *start) { struct timespec now; @@ -61,6 +83,7 @@ static int fio_posixaio_getevents(struct thread_data *td, unsigned int min, unsigned int max, const struct timespec *t) { struct posixaio_data *pd = td->io_ops_data; + struct posixaio_options *o = td->eo; os_aiocb_t *suspend_list[SUSPEND_ENTRIES]; struct timespec start; int have_timeout = 0; @@ -105,6 +128,9 @@ static int fio_posixaio_getevents(struct thread_data *td, unsigned int min, io_u->resid = io_u->xfer_buflen - retval; } else io_u->error = err; + + if (o->respect_iodepth_batch_complete_max && r >= max) + break; } if (r >= min) @@ -221,6 +247,8 @@ static struct ioengine_ops ioengine = { .open_file = generic_open_file, .close_file = generic_close_file, .get_file_size = generic_get_file_size, + .options = options, + .option_struct_size = sizeof(struct posixaio_options), }; static void fio_init fio_posixaio_register(void) diff --git a/fio.1 b/fio.1 index a3ebb67d36..e182449a1f 100644 --- a/fio.1 +++ b/fio.1 @@ -2397,6 +2397,13 @@ Defines the time between the SIGTERM and SIGKILL signals. Default is 1 second. .TP .BI (exec)std_redirect\fR=\fbool If set, stdout and stderr streams are redirected to files named from the job name. Default is true. +.TP +.BI (posixaio)posixaio_respect_iodepth_batch_complete_max\fR=\fPbool +If set, limit batch completions according to +\fBiodepth_batch_complete_max\fR, as other engines do. Default is +false, effectively setting +\fBiodepth_batch_complete_max\fR to the same value as +\fBiodepth\fR. .SS "I/O depth" .TP .BI iodepth \fR=\fPint diff --git a/optgroup.h b/optgroup.h index 1fb84a296b..af6bf81e1c 100644 --- a/optgroup.h +++ b/optgroup.h @@ -71,6 +71,7 @@ enum opt_category_group { __FIO_OPT_G_LIBCUFILE, __FIO_OPT_G_DFS, __FIO_OPT_G_NFS, + __FIO_OPT_G_POSIXAIO, FIO_OPT_G_RATE = (1ULL << __FIO_OPT_G_RATE), FIO_OPT_G_ZONE = (1ULL << __FIO_OPT_G_ZONE), @@ -116,6 +117,7 @@ enum opt_category_group { FIO_OPT_G_FILESTAT = (1ULL << __FIO_OPT_G_FILESTAT), FIO_OPT_G_LIBCUFILE = (1ULL << __FIO_OPT_G_LIBCUFILE), FIO_OPT_G_DFS = (1ULL << __FIO_OPT_G_DFS), + FIO_OPT_G_POSIXAIO = (1ULL << __FIO_OPT_G_POSIXAIO), }; extern const struct opt_group *opt_group_from_mask(uint64_t *mask); From 99eea0200dc92726ab0c403a0d9a6b93140bbcc0 Mon Sep 17 00:00:00 2001 From: Thomas Munro Date: Sun, 22 Aug 2021 13:16:27 +1200 Subject: [PATCH 2/2] Add posixaio_wait=aio_waitcomplete. Provide an option to use FreeBSD's aio_waitcomplete() function to wait for completions, instead of aio_suspend(). Not enabled by default. Signed-off-by: Thomas Munro --- HOWTO | 8 +++ configure | 23 +++++++++ engines/posixaio.c | 118 +++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 145 insertions(+), 4 deletions(-) diff --git a/HOWTO b/HOWTO index 4748743826..4d0abd401a 100644 --- a/HOWTO +++ b/HOWTO @@ -2650,6 +2650,14 @@ with the caveat that when used on the command line, they must come after the false, effectively behaving as though :option:`iodepth_batch_complete_max` has the same value as :option:`iodepth`. + Only applies to wait=aio_suspend, as other options already + respect :option:`iodepth_batch_complete_max`. + +.. options:: posixaio_wait=str : [posixaio] + + Selects the mechanism used for waiting for I/Os to complete. + Default is aio_suspend. On FreeBSD, aio_waitcomplete may be used. + I/O depth ~~~~~~~~~ diff --git a/configure b/configure index 84ccce040e..7fc0b8b596 100755 --- a/configure +++ b/configure @@ -737,6 +737,26 @@ EOF fi print_config "POSIX AIO fsync" "$posix_aio_fsync" +########################################## +# aio_waitcomplete probe +if test "have_aio_waitcomplete" != "yes" ; then + have_aio_waitcomplete="no" +fi +cat > $TMPC < +#include +int main(void) +{ + struct aiocb *cb; + aio_waitcomplete(&cb, NULL); + return 0; +} +EOF +if compile_prog "" "" "aio_waitcomplete" ; then + have_aio_waitcomplete="yes" +fi +print_config "aio_waitcomplete()" "$have_aio_waitcomplete" + ########################################## # POSIX pshared attribute probe if test "$posix_pshared" != "yes" ; then @@ -2858,6 +2878,9 @@ fi if test "$posix_aio_fsync" = "yes" ; then output_sym "CONFIG_POSIXAIO_FSYNC" fi +if test "$have_aio_waitcomplete" = "yes" ; then + output_sym "CONFIG_HAVE_AIO_WAITCOMPLETE" +fi if test "$posix_pshared" = "yes" ; then output_sym "CONFIG_PSHARED" fi diff --git a/engines/posixaio.c b/engines/posixaio.c index ec5d3c689b..1e06bac1b1 100644 --- a/engines/posixaio.c +++ b/engines/posixaio.c @@ -13,27 +13,56 @@ #include "../fio.h" #include "../optgroup.h" +enum { + FIO_POSIXAIO_SUSPEND, + FIO_POSIXAIO_WAITCOMPLETE, +}; + struct posixaio_data { struct io_u **aio_events; unsigned int queued; + int (*getevents)(struct thread_data *, unsigned int, unsigned int, const struct timespec *); }; struct posixaio_options { void *pad; unsigned int respect_iodepth_batch_complete_max; + unsigned int wait; }; static struct fio_option options[] = { { .name = "posixaio_respect_iodepth_batch_complete_max", - .lname = "Respect iodepth_batch_complete_max", + .lname = "Respect iodepth_batch_complete_max for wait=aio_suspend", .type = FIO_OPT_BOOL, .off1 = offsetof(struct posixaio_options, respect_iodepth_batch_complete_max), - .help = "Whether to cap batch completion", + .help = "Whether to cap batch completion for wait=aio_suspend", .def = "0", .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_POSIXAIO, }, + { + .name = "posixaio_wait", + .lname = "POSIX AIO wait mechanism", + .type = FIO_OPT_STR, + .off1 = offsetof(struct posixaio_options, wait), + .help = "Select mechanism for waiting for I/O completion", + .def = "aio_suspend", + .posval = { + { .ival = "aio_suspend", + .oval = FIO_POSIXAIO_SUSPEND, + .help = "Use aio_suspend()", + }, +#ifdef CONFIG_HAVE_AIO_WAITCOMPLETE + { .ival = "aio_waitcomplete", + .oval = FIO_POSIXAIO_WAITCOMPLETE, + .help = "Use aio_waitcomplete()", + }, +#endif + }, + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_POSIXAIO, + }, { .name = NULL, }, @@ -77,10 +106,65 @@ static int fio_posixaio_prep(struct thread_data fio_unused *td, return 0; } +#ifdef CONFIG_HAVE_AIO_WAITCOMPLETE + +static int fio_posixaio_getevents_waitcomplete(struct thread_data *td, + unsigned int min, + unsigned int max, + const struct timespec *t) +{ + struct posixaio_data *pd = td->io_ops_data; + struct aiocb *aiocb; + struct io_u *io_u; + ssize_t retval; + unsigned int events = 0; + struct timespec zero_timeout = {0}; + struct timespec *timeout; + + do + { + if (events < min) { + /* Wait until the minimum is satisfied. */ + timeout = (struct timespec *)t; + } else { + /* Consume as many more as we can without waiting. */ + timeout = &zero_timeout; + } + + retval = aio_waitcomplete(&aiocb, timeout); + if (retval < 0) { + if (errno == EINTR) + continue; + if (errno == EAGAIN) + break; + td_verror(td, errno, "aio_waitcomplete"); + break; + } + + io_u = container_of(aiocb, struct io_u, aiocb); + pd->queued--; + pd->aio_events[events++] = io_u; + + if (retval >= 0) + io_u->resid = io_u->xfer_buflen - retval; + else if (errno == ECANCELED) + io_u->resid = io_u->xfer_buflen; + else + io_u->error = errno; + + } while (events < max && pd->queued > 0); + + return events; +} + +#endif + #define SUSPEND_ENTRIES 8 -static int fio_posixaio_getevents(struct thread_data *td, unsigned int min, - unsigned int max, const struct timespec *t) +static int fio_posixaio_getevents_suspend(struct thread_data *td, + unsigned int min, + unsigned int max, + const struct timespec *t) { struct posixaio_data *pd = td->io_ops_data; struct posixaio_options *o = td->eo; @@ -152,6 +236,16 @@ static int fio_posixaio_getevents(struct thread_data *td, unsigned int min, goto restart; } +static int fio_posixaio_getevents(struct thread_data *td, + unsigned int min, + unsigned int max, + const struct timespec *t) +{ + struct posixaio_data *pd = td->io_ops_data; + + return pd->getevents(td, min, max, t); +} + static struct io_u *fio_posixaio_event(struct thread_data *td, int event) { struct posixaio_data *pd = td->io_ops_data; @@ -223,13 +317,29 @@ static void fio_posixaio_cleanup(struct thread_data *td) static int fio_posixaio_init(struct thread_data *td) { + struct posixaio_options *o = td->eo; struct posixaio_data *pd = malloc(sizeof(*pd)); memset(pd, 0, sizeof(*pd)); pd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *)); memset(pd->aio_events, 0, td->o.iodepth * sizeof(struct io_u *)); + switch (o->wait) { + case FIO_POSIXAIO_SUSPEND: + pd->getevents = fio_posixaio_getevents_suspend; + break; +#ifdef CONFIG_HAVE_AIO_WAITCOMPLETE + case FIO_POSIXAIO_WAITCOMPLETE: + pd->getevents = fio_posixaio_getevents_waitcomplete; + break; +#endif + default: + free(pd); + return -1; + } + td->io_ops_data = pd; + return 0; }