Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add posixaio_waitcomplete engine. #1266

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions HOWTO
Original file line number Diff line number Diff line change
Expand Up @@ -1944,6 +1944,7 @@ I/O engine
**posixaio**
POSIX asynchronous I/O using :manpage:`aio_read(3)` and
:manpage:`aio_write(3)`.
This engine defines engine specific options.

**solarisaio**
Solaris native asynchronous I/O.
Expand Down Expand Up @@ -2642,6 +2643,22 @@ with the caveat that when used on the command line, they must come after the

If set, stdout and stderr streams are redirected to files named from the job name. Default is true.

.. options:: posixaio_respect_iodepth_batch_complete_max=bool : [posixaio]

If set, limit batch completions according to
:option:`iodepth_batch_complete_max`, as other engines do. Default is
false, effectively behaving as though
:option:`iodepth_batch_complete_max` has the same value as
:option:`iodepth`.
Only applies to wait=aio_suspend, as other options already
respect :option:`iodepth_batch_complete_max`.

.. options:: posixaio_wait=str : [posixaio]

Selects the mechanism used for waiting for I/Os to complete.
Default is aio_suspend. On FreeBSD, aio_waitcomplete may be used.


I/O depth
~~~~~~~~~

Expand Down
23 changes: 23 additions & 0 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,26 @@ EOF
fi
print_config "POSIX AIO fsync" "$posix_aio_fsync"

##########################################
# aio_waitcomplete probe
if test "have_aio_waitcomplete" != "yes" ; then
have_aio_waitcomplete="no"
fi
cat > $TMPC <<EOF
#include <aio.h>
#include <stdlib.h>
int main(void)
{
struct aiocb *cb;
aio_waitcomplete(&cb, NULL);
return 0;
}
EOF
if compile_prog "" "" "aio_waitcomplete" ; then
have_aio_waitcomplete="yes"
fi
print_config "aio_waitcomplete()" "$have_aio_waitcomplete"

##########################################
# POSIX pshared attribute probe
if test "$posix_pshared" != "yes" ; then
Expand Down Expand Up @@ -2858,6 +2878,9 @@ fi
if test "$posix_aio_fsync" = "yes" ; then
output_sym "CONFIG_POSIXAIO_FSYNC"
fi
if test "$have_aio_waitcomplete" = "yes" ; then
output_sym "CONFIG_HAVE_AIO_WAITCOMPLETE"
fi
if test "$posix_pshared" = "yes" ; then
output_sym "CONFIG_PSHARED"
fi
Expand Down
142 changes: 140 additions & 2 deletions engines/posixaio.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,61 @@
#include <fcntl.h>

#include "../fio.h"
#include "../optgroup.h"

enum {
FIO_POSIXAIO_SUSPEND,
FIO_POSIXAIO_WAITCOMPLETE,
};

struct posixaio_data {
struct io_u **aio_events;
unsigned int queued;
int (*getevents)(struct thread_data *, unsigned int, unsigned int, const struct timespec *);
};

struct posixaio_options {
void *pad;
unsigned int respect_iodepth_batch_complete_max;
unsigned int wait;
};

static struct fio_option options[] = {
{
.name = "posixaio_respect_iodepth_batch_complete_max",
.lname = "Respect iodepth_batch_complete_max for wait=aio_suspend",
.type = FIO_OPT_BOOL,
.off1 = offsetof(struct posixaio_options, respect_iodepth_batch_complete_max),
.help = "Whether to cap batch completion for wait=aio_suspend",
.def = "0",
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_POSIXAIO,
},
{
.name = "posixaio_wait",
.lname = "POSIX AIO wait mechanism",
.type = FIO_OPT_STR,
.off1 = offsetof(struct posixaio_options, wait),
.help = "Select mechanism for waiting for I/O completion",
.def = "aio_suspend",
.posval = {
{ .ival = "aio_suspend",
.oval = FIO_POSIXAIO_SUSPEND,
.help = "Use aio_suspend()",
},
#ifdef CONFIG_HAVE_AIO_WAITCOMPLETE
{ .ival = "aio_waitcomplete",
.oval = FIO_POSIXAIO_WAITCOMPLETE,
.help = "Use aio_waitcomplete()",
},
#endif
},
.category = FIO_OPT_C_ENGINE,
.group = FIO_OPT_G_POSIXAIO,
},
{
.name = NULL,
},
};

static unsigned long long ts_utime_since_now(const struct timespec *start)
Expand Down Expand Up @@ -55,12 +106,68 @@ static int fio_posixaio_prep(struct thread_data fio_unused *td,
return 0;
}

#ifdef CONFIG_HAVE_AIO_WAITCOMPLETE

static int fio_posixaio_getevents_waitcomplete(struct thread_data *td,
unsigned int min,
unsigned int max,
const struct timespec *t)
{
struct posixaio_data *pd = td->io_ops_data;
struct aiocb *aiocb;
struct io_u *io_u;
ssize_t retval;
unsigned int events = 0;
struct timespec zero_timeout = {0};
struct timespec *timeout;

do
{
if (events < min) {
/* Wait until the minimum is satisfied. */
timeout = (struct timespec *)t;
} else {
/* Consume as many more as we can without waiting. */
timeout = &zero_timeout;
}

retval = aio_waitcomplete(&aiocb, timeout);
if (retval < 0) {
if (errno == EINTR)
continue;
if (errno == EAGAIN)
break;
td_verror(td, errno, "aio_waitcomplete");
break;
}

io_u = container_of(aiocb, struct io_u, aiocb);
pd->queued--;
pd->aio_events[events++] = io_u;

if (retval >= 0)
io_u->resid = io_u->xfer_buflen - retval;
else if (errno == ECANCELED)
io_u->resid = io_u->xfer_buflen;
else
io_u->error = errno;

} while (events < max && pd->queued > 0);

return events;
}

#endif

#define SUSPEND_ENTRIES 8

static int fio_posixaio_getevents(struct thread_data *td, unsigned int min,
unsigned int max, const struct timespec *t)
static int fio_posixaio_getevents_suspend(struct thread_data *td,
unsigned int min,
unsigned int max,
const struct timespec *t)
{
struct posixaio_data *pd = td->io_ops_data;
struct posixaio_options *o = td->eo;
os_aiocb_t *suspend_list[SUSPEND_ENTRIES];
struct timespec start;
int have_timeout = 0;
Expand Down Expand Up @@ -105,6 +212,9 @@ static int fio_posixaio_getevents(struct thread_data *td, unsigned int min,
io_u->resid = io_u->xfer_buflen - retval;
} else
io_u->error = err;

if (o->respect_iodepth_batch_complete_max && r >= max)
break;
}

if (r >= min)
Expand All @@ -126,6 +236,16 @@ static int fio_posixaio_getevents(struct thread_data *td, unsigned int min,
goto restart;
}

static int fio_posixaio_getevents(struct thread_data *td,
unsigned int min,
unsigned int max,
const struct timespec *t)
{
struct posixaio_data *pd = td->io_ops_data;

return pd->getevents(td, min, max, t);
}

static struct io_u *fio_posixaio_event(struct thread_data *td, int event)
{
struct posixaio_data *pd = td->io_ops_data;
Expand Down Expand Up @@ -197,13 +317,29 @@ static void fio_posixaio_cleanup(struct thread_data *td)

static int fio_posixaio_init(struct thread_data *td)
{
struct posixaio_options *o = td->eo;
struct posixaio_data *pd = malloc(sizeof(*pd));

memset(pd, 0, sizeof(*pd));
pd->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *));
memset(pd->aio_events, 0, td->o.iodepth * sizeof(struct io_u *));

switch (o->wait) {
case FIO_POSIXAIO_SUSPEND:
pd->getevents = fio_posixaio_getevents_suspend;
break;
#ifdef CONFIG_HAVE_AIO_WAITCOMPLETE
case FIO_POSIXAIO_WAITCOMPLETE:
pd->getevents = fio_posixaio_getevents_waitcomplete;
break;
#endif
default:
free(pd);
return -1;
}

td->io_ops_data = pd;

return 0;
}

Expand All @@ -221,6 +357,8 @@ static struct ioengine_ops ioengine = {
.open_file = generic_open_file,
.close_file = generic_close_file,
.get_file_size = generic_get_file_size,
.options = options,
.option_struct_size = sizeof(struct posixaio_options),
};

static void fio_init fio_posixaio_register(void)
Expand Down
7 changes: 7 additions & 0 deletions fio.1
Original file line number Diff line number Diff line change
Expand Up @@ -2397,6 +2397,13 @@ Defines the time between the SIGTERM and SIGKILL signals. Default is 1 second.
.TP
.BI (exec)std_redirect\fR=\fbool
If set, stdout and stderr streams are redirected to files named from the job name. Default is true.
.TP
.BI (posixaio)posixaio_respect_iodepth_batch_complete_max\fR=\fPbool
If set, limit batch completions according to
\fBiodepth_batch_complete_max\fR, as other engines do. Default is
false, effectively setting
\fBiodepth_batch_complete_max\fR to the same value as
\fBiodepth\fR.
.SS "I/O depth"
.TP
.BI iodepth \fR=\fPint
Expand Down
2 changes: 2 additions & 0 deletions optgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ enum opt_category_group {
__FIO_OPT_G_LIBCUFILE,
__FIO_OPT_G_DFS,
__FIO_OPT_G_NFS,
__FIO_OPT_G_POSIXAIO,

FIO_OPT_G_RATE = (1ULL << __FIO_OPT_G_RATE),
FIO_OPT_G_ZONE = (1ULL << __FIO_OPT_G_ZONE),
Expand Down Expand Up @@ -116,6 +117,7 @@ enum opt_category_group {
FIO_OPT_G_FILESTAT = (1ULL << __FIO_OPT_G_FILESTAT),
FIO_OPT_G_LIBCUFILE = (1ULL << __FIO_OPT_G_LIBCUFILE),
FIO_OPT_G_DFS = (1ULL << __FIO_OPT_G_DFS),
FIO_OPT_G_POSIXAIO = (1ULL << __FIO_OPT_G_POSIXAIO),
};

extern const struct opt_group *opt_group_from_mask(uint64_t *mask);
Expand Down