Skip to content

Commit

Permalink
Merge aio page read requests
Browse files Browse the repository at this point in the history
Summary:
Tries to submit multiple aio page read requests together to improve read
performance.

This code adds an array to buffer aio requests in os_aio_array_t. So far
only os_aio_read_array uses it. A new parameter (should_buffer) is added
to indicate whether an aio request should be buffered or submitted. If
should_submit is true, it will submit all bufferred aio requests on the
os_aio_array.

Only buf_read_ahead_linear is modified to utilize this functionality so
far. All other call sites are setting should_submit to true. Other
os_aio_array_t arrays will also ignore this.

If one thread calling buf_read_ahead_linear is buffering io requests but
another thread issues a normal os_aio_request, that other request will
submit all the buffered requests from buf_read_ahead_linear. This is
still better than nothing I suppose.

Test Plan:
Perf tests were run manually and approved by Yoshinori.

Reviewers: steaphan, jtolmer, yoshinori, mcallaghan

Reviewed By: steaphan, nizamordulu
  • Loading branch information
Rongrong Zhong authored and steaphangreene committed Jun 4, 2013
1 parent 2783df6 commit f9d1a53
Show file tree
Hide file tree
Showing 13 changed files with 255 additions and 33 deletions.
6 changes: 6 additions & 0 deletions mysql-test/suite/innodb/r/innodb_aio_stats.result
Expand Up @@ -23,10 +23,16 @@ IO_WRITE_BYTES > 40000000 IO_WRITE_REQUESTS > 800 IO_WRITE_WAIT_USECS > IO_WRITE
select INNODB_PAGES_WRITTEN > 800, INNODB_PAGES_WRITTEN_INDEX > 800, INNODB_PAGES_WRITTEN_BLOB from information_schema.table_statistics where table_name = 't1';
INNODB_PAGES_WRITTEN > 800 INNODB_PAGES_WRITTEN_INDEX > 800 INNODB_PAGES_WRITTEN_BLOB
1 1 0
show global status like "innodb_buffered_aio_submitted";
Variable_name Value
Innodb_buffered_aio_submitted 0
select * from t1;
select count(*) from t1;
count(*)
65536
show global status like "innodb_buffered_aio_submitted";
Variable_name Value
Innodb_buffered_aio_submitted 1151
select IO_READ_BYTES, IO_READ_REQUESTS, IO_READ_SVC_USECS < IO_READ_WAIT_USECS, IO_READ_SVC_USECS_MAX < IO_READ_WAIT_USECS_MAX, IO_READ_SLOW_IOS from information_schema.table_statistics where table_name = 't1';
IO_READ_BYTES IO_READ_REQUESTS IO_READ_SVC_USECS < IO_READ_WAIT_USECS IO_READ_SVC_USECS_MAX < IO_READ_WAIT_USECS_MAX IO_READ_SLOW_IOS
20512768 1252 1 1 0
Expand Down
30 changes: 30 additions & 0 deletions mysql-test/suite/innodb/r/innodb_merge_read.result
@@ -0,0 +1,30 @@
DROP TABLE if exists t1;
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256)) ENGINE=INNODB;
INSERT INTO t1 VALUES (0, REPEAT('a',256));
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
show global status like "innodb_buffered_aio_submitted";
Variable_name Value
Innodb_buffered_aio_submitted 0
select * from t1;
select count(*) from t1;
count(*)
65536
show global status like "innodb_buffered_aio_submitted";
Variable_name Value
Innodb_buffered_aio_submitted 1151
DROP TABLE t1;
3 changes: 3 additions & 0 deletions mysql-test/suite/innodb/t/innodb_aio_stats.test
Expand Up @@ -33,12 +33,15 @@ select INNODB_PAGES_WRITTEN > 800, INNODB_PAGES_WRITTEN_INDEX > 800, INNODB_PAGE

--source include/restart_mysqld.inc

show global status like "innodb_buffered_aio_submitted";

--disable_result_log
select * from t1;
--enable_result_log

select count(*) from t1;

show global status like "innodb_buffered_aio_submitted";
select IO_READ_BYTES, IO_READ_REQUESTS, IO_READ_SVC_USECS < IO_READ_WAIT_USECS, IO_READ_SVC_USECS_MAX < IO_READ_WAIT_USECS_MAX, IO_READ_SLOW_IOS from information_schema.table_statistics where table_name = 't1';

select INNODB_PAGES_READ, INNODB_PAGES_READ_INDEX, INNODB_PAGES_READ_BLOB from information_schema.table_statistics where table_name = 't1';
Expand Down
42 changes: 42 additions & 0 deletions mysql-test/suite/innodb/t/innodb_merge_read.test
@@ -0,0 +1,42 @@
--source include/have_innodb.inc
--source include/have_native_aio.inc

--disable_warnings
DROP TABLE if exists t1;
--enable_warnings

# Create table.
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b VARCHAR(256)) ENGINE=INNODB;

# Populate table.
INSERT INTO t1 VALUES (0, REPEAT('a',256));
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;
INSERT INTO t1 SELECT 0, b FROM t1;

--source include/restart_mysqld.inc

show global status like "innodb_buffered_aio_submitted";

--disable_result_log
select * from t1;
--enable_result_log

select count(*) from t1;

show global status like "innodb_buffered_aio_submitted";

DROP TABLE t1;
32 changes: 21 additions & 11 deletions storage/innobase/buf/buf0rea.cc
Expand Up @@ -120,7 +120,12 @@ buf_read_page_low(
use to stop dangling page reads from a tablespace
which we have DISCARDed + IMPORTed back */
ulint offset, /*!< in: page number */
trx_t* trx)
trx_t* trx,
ibool should_buffer) /*!< in: whether to buffer an aio request.
AIO read ahead uses this. If you plan to
use this parameter, make sure you remember
to call os_aio_linux_dispatch_read_array_submit
when you are read to commit all your requests.*/

This comment has been minimized.

Copy link
@zeha

zeha Oct 11, 2013

likely typo in comment: read <-> ready

This comment has been minimized.

Copy link
@steaphan-fb-com

steaphan-fb-com Oct 11, 2013

Yep, that's a typo. Thanks. We'll fix it.

This comment has been minimized.

Copy link
@steaphan-fb-com

steaphan-fb-com Oct 11, 2013

Fixed in: b1b4c79

{
buf_page_t* bpage;
ulint wake_later;
Expand Down Expand Up @@ -186,15 +191,17 @@ buf_read_page_low(
| ignore_nonexistent_pages,
sync, space, zip_size, offset, 0, zip_size,
bpage->zip.data, bpage,
trx ? &trx->table_io_perf : NULL);
trx ? &trx->table_io_perf : NULL,
should_buffer);
} else {
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);

*err = _fil_io(OS_FILE_READ | wake_later
| ignore_nonexistent_pages,
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage,
trx ? &trx->table_io_perf : NULL);
trx ? &trx->table_io_perf : NULL,
should_buffer);
}

if (sync) {
Expand Down Expand Up @@ -349,7 +356,7 @@ buf_read_ahead_random(
&err, false,
ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, FALSE,
tablespace_version, i, trx);
tablespace_version, i, trx, FALSE);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
Expand Down Expand Up @@ -413,7 +420,7 @@ buf_read_page(

count = buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
zip_size, FALSE,
tablespace_version, offset, trx);
tablespace_version, offset, trx, FALSE);
srv_stats.buf_pool_reads.add(count);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
Expand Down Expand Up @@ -461,7 +468,7 @@ buf_read_page_async(
| OS_AIO_SIMULATED_WAKE_LATER
| BUF_READ_IGNORE_NONEXISTENT_PAGES,
space, zip_size, FALSE,
tablespace_version, offset, NULL);
tablespace_version, offset, NULL, FALSE);
srv_stats.buf_pool_reads.add(count);

/* We do not increment number of I/O operations used for LRU policy
Expand Down Expand Up @@ -716,13 +723,12 @@ buf_read_ahead_linear(
for (i = low; i < high; i++) {
/* It is only sensible to do read-ahead in the non-sync
aio mode: hence FALSE as the first parameter */

if (!ibuf_bitmap_page(zip_size, i)) {
count += buf_read_page_low(
&err, false,
ibuf_mode,
space, zip_size, FALSE, tablespace_version, i,
trx);
trx, TRUE);
if (err == DB_TABLESPACE_DELETED) {
ut_print_timestamp(stderr);
fprintf(stderr,
Expand All @@ -735,6 +741,10 @@ buf_read_ahead_linear(
}
}
}
#if defined(LINUX_NATIVE_AIO)
/* Tell aio to submit all buffered requests. */
ut_a(os_aio_linux_dispatch_read_array_submit());
#endif

/* In simulated aio we wake the aio handler threads only after
queuing all aio requests, in native aio the following call does
Expand Down Expand Up @@ -812,7 +822,7 @@ buf_read_ibuf_merge_pages(
buf_read_page_low(&err, sync && (i + 1 == n_stored),
BUF_READ_ANY_PAGE, space_ids[i],
zip_size, TRUE, space_versions[i],
page_nos[i], NULL);
page_nos[i], NULL, FALSE);

if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) {
tablespace_deleted:
Expand Down Expand Up @@ -907,13 +917,13 @@ buf_read_recv_pages(
if ((i + 1 == n_stored) && sync) {
buf_read_page_low(&err, true, BUF_READ_ANY_PAGE, space,
zip_size, TRUE, tablespace_version,
page_nos[i], NULL);
page_nos[i], NULL, FALSE);
} else {
buf_read_page_low(&err, false, BUF_READ_ANY_PAGE
| OS_AIO_SIMULATED_WAKE_LATER,
space, zip_size, TRUE,
tablespace_version, page_nos[i],
NULL);
NULL, FALSE);
}
}

Expand Down
11 changes: 8 additions & 3 deletions storage/innobase/fil/fil0fil.cc
Expand Up @@ -5355,7 +5355,7 @@ fil_extend_space_to_desired_size(
index IO stats for system table space */
(TRX_SYS_SPACE == space->id)
? NULL : &space->primary_index_id,
&space->io_perf2, NULL);
&space->io_perf2, NULL, TRUE);
#endif /* UNIV_HOTBACKUP */
if (success) {
os_has_said_disk_full = FALSE;
Expand Down Expand Up @@ -5742,9 +5742,14 @@ _fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
os_io_table_perf_t* table_io_perf)/* in/out: tracks table IO stats
os_io_table_perf_t* table_io_perf,/* in/out: tracks table IO stats
to be counted in IS.user_statistics only
for sync reads and writes */
ibool should_buffer) /*!< in: whether to buffer an aio request.
AIO read ahead uses this. If you plan to
use this parameter, make sure you remember
to call os_aio_linux_dispatch_read_array_submit
when you are read to commit all your requests.*/
{
ulint mode;
fil_space_t* space;
Expand Down Expand Up @@ -5955,7 +5960,7 @@ _fil_io(
? NULL : &space->primary_index_id,
/*(io_flags & OS_AIO_DOUBLE_WRITE)
? &io_perf_doublewrite : */&space->io_perf2,
table_io_perf);
table_io_perf, should_buffer);
#endif /* UNIV_HOTBACKUP */
ut_a(ret);

Expand Down
2 changes: 2 additions & 0 deletions storage/innobase/handler/ha_innodb.cc
Expand Up @@ -899,6 +899,8 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_trx_n_rollback_partial, SHOW_LONG},
{"transaction_rollback_total",
(char*) &export_vars.innodb_trx_n_rollback_total, SHOW_LONG},
{"buffered_aio_submitted",
(char*) &export_vars.innodb_buffered_aio_submitted, SHOW_LONG},
{"zip_1024_compressed",
(char*) &export_vars.zip1024_compressed, SHOW_LONG},
{"zip_1024_compressed_ok",
Expand Down
6 changes: 4 additions & 2 deletions storage/innobase/include/fil0fil.h
Expand Up @@ -993,7 +993,7 @@ fil_space_get_n_reserved_extents(
#define fil_io(type, sync, space_id, zip_size, block_offset, \
byte_offset, len, buf, message) \
_fil_io(type, sync, space_id, zip_size, block_offset, \
byte_offset, len, buf, message, NULL)
byte_offset, len, buf, message, NULL, TRUE)

/****************************************************************//**
Update stats with per-table data from InnoDB tables. */
Expand Down Expand Up @@ -1041,9 +1041,11 @@ _fil_io(
appropriately aligned */
void* message, /*!< in: message for aio handler if non-sync
aio used, else ignored */
os_io_table_perf_t* table_io_perf) /*!< in/out: tracks table IO stats
os_io_table_perf_t* table_io_perf, /*!< in/out: tracks table IO stats
to be used in IS.user_statistics only for
sync reads and writes */
ibool should_buffer) /*!< in: whether to buffer an aio request.
Only used by aio read ahead*/
__attribute__((nonnull(8)));
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
Expand Down
32 changes: 26 additions & 6 deletions storage/innobase/include/os0file.h
Expand Up @@ -325,10 +325,11 @@ The wrapper functions have the prefix of "innodb_". */
pfs_os_file_close_func(file, __FILE__, __LINE__)

# define os_aio(type, mode, name, file, buf, offset, \
n, message1, message2, primary_index_id, io_perf2, tab) \
n, message1, message2, primary_index_id, io_perf2, tab, \
should_buffer) \
pfs_os_aio_func(type, mode, name, file, buf, offset, \
n, message1, message2, __FILE__, __LINE__, \
primary_index_id, io_perf2, tab)
primary_index_id, io_perf2, tab, should_buffer)

# define os_file_read(file, buf, offset, n) \
pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
Expand Down Expand Up @@ -370,9 +371,11 @@ to original un-instrumented file I/O APIs */
# define os_file_close(file) os_file_close_func(file)

# define os_aio(type, mode, name, file, buf, offset, n, \
message1, message2, primary_index_id, io_perf2, tab) \
message1, message2, primary_index_id, io_perf2, tab, \
should_buffer) \
os_aio_func(type, mode, name, file, buf, offset, n, \
message1, message2, primary_index_id, io_perf2, tab)
message1, message2, primary_index_id, io_perf2, tab,\
should_buffer)

# define os_file_read(file, buf, offset, n) \
os_file_read_func(file, buf, offset, n)
Expand Down Expand Up @@ -826,10 +829,16 @@ pfs_os_aio_func(
ib_uint64_t* primary_index_id,/*!< in: index_id of primary index */
os_io_perf2_t* io_perf2,/*!< in: per fil_space_t performance
counters */
os_io_table_perf_t* table_io_perf);
os_io_table_perf_t* table_io_perf,
/*!< in/out: table IO stats counted for
IS.user_statistics only for sync read
and writes */
ibool should_buffer);
/*!< in: Whether to buffer an aio request.
AIO read ahead uses this. If you plan to
use this parameter, make sure you remember
to call os_aio_linux_dispatch_read_array_submit
when you are read to commit all your requests.*/
/*******************************************************************//**
NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
Expand Down Expand Up @@ -1195,10 +1204,15 @@ os_aio_func(
ib_uint64_t* primary_index_id,/*!< in: index_id of primary index */
os_io_perf2_t* io_perf2,/*!< in: per fil_space_t performance
counters */
os_io_table_perf_t* table_io_perf);
os_io_table_perf_t* table_io_perf,
/*!< in/out: table IO stats counted for
IS.user_statistics only for sync read
and writes */
ibool should_buffer); /*!< in: Whether to buffer an aio request.
AIO read ahead uses this. If you plan to
use this parameter, make sure you remember
to call os_aio_linux_dispatch_read_array_submit
when you are read to commit all your requests.*/
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
shutdown. */
Expand Down Expand Up @@ -1362,6 +1376,12 @@ os_aio_linux_handle(
parameters are valid and can be used to
restart the operation. */
ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
/*******************************************************************//**
Submit buffered AIO requests on the given segment to the kernel.
@return TRUE on success. */
UNIV_INTERN
ibool
os_aio_linux_dispatch_read_array_submit();
#endif /* LINUX_NATIVE_AIO */

#ifndef UNIV_NONINL
Expand Down
7 changes: 5 additions & 2 deletions storage/innobase/include/os0file.ic
Expand Up @@ -214,8 +214,11 @@ pfs_os_aio_func(
ulint src_line,/*!< in: line where the func invoked */
ib_uint64_t* primary_index_id,/*!< in: index_id of primary index */
os_io_perf2_t* io_perf2,/*!< in: per fil_space_t performance counters */
os_io_table_perf_t* table_io_perf)/*!< in/out: used for per-table
os_io_table_perf_t* table_io_perf,/*!< in/out: used for per-table
file stats */
ibool should_buffer)
/*!< in: whether to buffer an aio request.
Only used by aio read ahead*/
{
ibool result;
struct PSI_file_locker* locker = NULL;
Expand All @@ -230,7 +233,7 @@ pfs_os_aio_func(

result = os_aio_func(type, mode, name, file, buf, offset,
n, message1, message2, primary_index_id,
io_perf2, table_io_perf);
io_perf2, table_io_perf, should_buffer);

register_pfs_file_io_end(locker, n);

Expand Down

0 comments on commit f9d1a53

Please sign in to comment.