Drizzled Public API Documentation

srv0srv.cc
1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (C) 2008, 2009 Google Inc.
5 Copyright (C) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License as published by the Free Software
22 Foundation; version 2 of the License.
23 
24 This program is distributed in the hope that it will be useful, but WITHOUT
25 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
26 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
27 
28 You should have received a copy of the GNU General Public License along with
29 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
30 St, Fifth Floor, Boston, MA 02110-1301 USA
31 
32 *****************************************************************************/
33 
34 /**************************************************/
60 /* Dummy comment */
61 #include "srv0srv.h"
62 
63 #include <drizzled/error.h>
64 #include <drizzled/errmsg_print.h>
65 
66 #include "ut0mem.h"
67 #include "ut0ut.h"
68 #include "os0proc.h"
69 #include "mem0mem.h"
70 #include "mem0pool.h"
71 #include "sync0sync.h"
72 #include "que0que.h"
73 #include "log0recv.h"
74 #include "pars0pars.h"
75 #include "usr0sess.h"
76 #include "lock0lock.h"
77 #include "trx0purge.h"
78 #include "ibuf0ibuf.h"
79 #include "buf0flu.h"
80 #include "buf0lru.h"
81 #include "btr0sea.h"
82 #include "dict0load.h"
83 #include "dict0boot.h"
84 #include "srv0start.h"
85 #include "row0mysql.h"
86 #include "ha_prototypes.h"
87 #include "trx0i_s.h"
88 #include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
89 
90 /* This is set to the MySQL server value for this variable. It is only
91 needed for FOREIGN KEY definition parsing since FOREIGN KEY names are not
92 stored in the server metadata. The server stores and enforces it for
93 regular database and table names.*/
94 UNIV_INTERN uint srv_lower_case_table_names = 0;
95 
96 /* The following counter is incremented whenever there is some user activity
97 in the server */
98 UNIV_INTERN ulint srv_activity_count = 0;
99 
100 /* The following is the maximum allowed duration of a lock wait. */
101 UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
102 
103 /* How much data manipulation language (DML) statements need to be delayed,
104 in microseconds, in order to reduce the lagging of the purge thread. */
105 UNIV_INTERN ulint srv_dml_needed_delay = 0;
106 
107 UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
108 UNIV_INTERN ibool srv_monitor_active = FALSE;
109 UNIV_INTERN ibool srv_error_monitor_active = FALSE;
110 
111 UNIV_INTERN const char* srv_main_thread_op_info = "";
112 
113 /* Server parameters which are read from the initfile */
114 
115 /* The following three are dir paths which are catenated before file
116 names, where the file name itself may also contain a path */
117 
118 UNIV_INTERN char* srv_data_home = NULL;
119 #ifdef UNIV_LOG_ARCHIVE
120 UNIV_INTERN char* srv_arch_dir = NULL;
121 #endif /* UNIV_LOG_ARCHIVE */
122 
125 UNIV_INTERN my_bool srv_file_per_table;
127 UNIV_INTERN ulint srv_file_format = 0;
132 
133 #if DICT_TF_FORMAT_51
134 # error "DICT_TF_FORMAT_51 must be 0!"
135 #endif
136 
138 UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
139 
140 /* If this flag is TRUE, then we will use the native aio of the
141 OS (provided we compiled Innobase with it in), otherwise we will
142 use simulated aio we build below with threads.
143 Currently we support native aio on windows and linux */
144 UNIV_INTERN my_bool srv_use_native_aio = TRUE;
145 
146 #ifdef __WIN__
147 /* Windows native condition variables. We use runtime loading / function
148 pointers, because they are not available on Windows Server 2003 and
149 Windows XP/2000.
150 
151 We use condition for events on Windows if possible, even if os_event
152 resembles Windows kernel event object well API-wise. The reason is
153 performance, kernel objects are heavyweights and WaitForSingleObject() is a
154 performance killer causing calling thread to context switch. Besides, Innodb
155 is preallocating large number (often millions) of os_events. With kernel event
156 objects it takes a big chunk out of non-paged pool, which is better suited
157 for tasks like IO than for storing idle event objects. */
158 UNIV_INTERN ibool srv_use_native_conditions = FALSE;
159 #endif /* __WIN__ */
160 
161 UNIV_INTERN ulint srv_n_data_files = 0;
162 UNIV_INTERN char** srv_data_file_names = NULL;
163 /* size in database pages */
164 UNIV_INTERN ulint* srv_data_file_sizes = NULL;
165 
166 /* if TRUE, then we auto-extend the last data file */
167 UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
168 /* if != 0, this tells the max size auto-extending may increase the
169 last data file size */
170 UNIV_INTERN ulint srv_last_file_size_max = 0;
171 /* If the last data file is auto-extended, we add this
172 many pages to it at a time */
173 UNIV_INTERN unsigned int srv_auto_extend_increment = 8;
174 UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
175 
176 /* If the following is TRUE we do not allow inserts etc. This protects
177 the user from forgetting the 'newraw' keyword to my.cnf */
178 
179 UNIV_INTERN ibool srv_created_new_raw = FALSE;
180 
181 UNIV_INTERN char** srv_log_group_home_dirs = NULL;
182 
183 UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
184 UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
185 /* size in database pages */
186 UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
187 /* size in database pages */
188 UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
189 UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
190 
191 /* Try to flush dirty pages so as to avoid IO bursts at
192 the checkpoints. */
193 UNIV_INTERN bool srv_adaptive_flushing = TRUE;
194 
197 #define MAX_MUTEX_NOWAIT 20
198 
203 #define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
204 
207 #if defined(BUILD_DRIZZLE)
208 const byte srv_latin1_ordering[256] /* The sort order table of the latin1
209  character set. The following table is
210  the MySQL order as of Feb 10th, 2002 */
211 = {
212  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
213 , 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
214 , 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
215 , 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
216 , 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
217 , 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
218 , 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
219 , 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
220 , 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
221 , 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
222 , 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
223 , 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
224 , 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
225 , 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
226 , 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
227 , 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
228 , 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
229 , 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
230 , 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
231 , 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
232 , 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
233 , 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
234 , 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
235 , 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
236 , 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
237 , 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
238 , 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
239 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
240 , 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
241 , 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
242 , 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
243 , 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
244 };
245 #else
246 UNIV_INTERN const byte* srv_latin1_ordering;
247 #endif /* BUILD_DRIZZLE */
248 
249 
250 /* use os/external memory allocator */
251 UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
252 /* requested size in kilobytes */
253 UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
254 /* requested number of buffer pool instances */
255 UNIV_INTERN ulint srv_buf_pool_instances = 1;
256 /* previously requested size */
257 UNIV_INTERN ulint srv_buf_pool_old_size;
258 /* current size in kilobytes */
259 UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
260 /* size in bytes */
261 UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
262 UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
263 
264 /* This parameter is deprecated. Use srv_n_io_[read|write]_threads
265 instead. */
266 UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
267 UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
268 UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
269 
270 /* The universal page size of the database */
271 UNIV_INTERN ulint srv_page_size_shift = 0;
272 UNIV_INTERN ulint srv_page_size = 0;
273 
274 /* The log block size */
275 UNIV_INTERN uint32_t srv_log_block_size = 0;
276 
277 /* User settable value of the number of pages that must be present
278 in the buffer cache and accessed sequentially for InnoDB to trigger a
279 readahead request. */
280 UNIV_INTERN ulong srv_read_ahead_threshold = 56;
281 
282 #ifdef UNIV_LOG_ARCHIVE
283 UNIV_INTERN ibool srv_log_archive_on = FALSE;
284 UNIV_INTERN ibool srv_archive_recovery = 0;
285 UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
286 #endif /* UNIV_LOG_ARCHIVE */
287 
288 /* This parameter is used to throttle the number of insert buffers that are
289 merged in a batch. By increasing this parameter on a faster disk you can
290 possibly reduce the number of I/O operations performed to complete the
291 merge operation. The value of this parameter is used as is by the
292 background loop when the system is idle (low load), on a busy system
293 the parameter is scaled down by a factor of 4, this is to avoid putting
294 a heavier load on the I/O sub system. */
295 
296 UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
297 
298 UNIV_INTERN char* srv_file_flush_method_str = NULL;
299 UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
300 UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
301 
302 UNIV_INTERN ulint srv_max_n_open_files = 300;
303 
304 /* Number of IO operations per second the server can do */
305 UNIV_INTERN ulong srv_io_capacity = 200;
306 
307 /* The InnoDB main thread tries to keep the ratio of modified pages
308 in the buffer pool to all database pages in the buffer pool smaller than
309 the following number. But it is not guaranteed that the value stays below
310 that during a time of heavy update/insert activity. */
311 
312 UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
313 
314 /* the number of purge threads to use from the worker pool (currently 0 or 1).*/
315 UNIV_INTERN ulong srv_n_purge_threads = 0;
316 
317 /* the number of pages to purge in one batch */
318 UNIV_INTERN ulong srv_purge_batch_size = 20;
319 
320 /* the number of rollback segments to use */
321 UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
322 
323 /* variable counts amount of data read in total (in bytes) */
324 UNIV_INTERN ulint srv_data_read = 0;
325 
326 /* Internal setting for "innodb_stats_method". Decides how InnoDB treats
327 NULL value when collecting statistics. By default, it is set to
328 SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
329 ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
330 
331 /* here we count the amount of data written in total (in bytes) */
332 UNIV_INTERN ulint srv_data_written = 0;
333 
334 /* the number of the log write requests done */
335 UNIV_INTERN ulint srv_log_write_requests = 0;
336 
337 /* the number of physical writes to the log performed */
338 UNIV_INTERN ulint srv_log_writes = 0;
339 
340 /* amount of data written to the log files in bytes */
341 UNIV_INTERN ulint srv_os_log_written = 0;
342 
343 /* amount of writes being done to the log files */
344 UNIV_INTERN ulint srv_os_log_pending_writes = 0;
345 
346 /* we increase this counter, when there we don't have enough space in the
347 log buffer and have to flush it */
348 UNIV_INTERN ulint srv_log_waits = 0;
349 
350 /* this variable counts the amount of times, when the doublewrite buffer
351 was flushed */
352 UNIV_INTERN ulint srv_dblwr_writes = 0;
353 
354 /* here we store the number of pages that have been flushed to the
355 doublewrite buffer */
356 UNIV_INTERN ulint srv_dblwr_pages_written = 0;
357 
358 /* in this variable we store the number of write requests issued */
359 UNIV_INTERN ulint srv_buf_pool_write_requests = 0;
360 
361 /* here we store the number of times when we had to wait for a free page
362 in the buffer pool. It happens when the buffer pool is full and we need
363 to make a flush, in order to be able to read or create a page. */
364 UNIV_INTERN ulint srv_buf_pool_wait_free = 0;
365 
366 /* variable to count the number of pages that were written from buffer
367 pool to the disk */
368 UNIV_INTERN ulint srv_buf_pool_flushed = 0;
369 
372 UNIV_INTERN ulint srv_buf_pool_reads = 0;
373 
375 UNIV_INTERN uint srv_auto_lru_dump = 0;
376 
377 /* structure to pass status variables to MySQL */
379 
380 /* If the following is != 0 we do not allow inserts etc. This protects
381 the user from forgetting the innodb_force_recovery keyword to my.cnf */
382 
383 UNIV_INTERN ulint srv_force_recovery = 0;
384 /*-----------------------*/
385 /* We are prepared for a situation that we have this many threads waiting for
386 a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
387 value. */
388 
389 UNIV_INTERN ulint srv_max_n_threads = 0;
390 
391 /* The following controls how many threads we let inside InnoDB concurrently:
392 threads waiting for locks are not counted into the number because otherwise
393 we could get a deadlock. MySQL creates a thread for each user session, and
394 semaphore contention and convoy problems can occur withput this restriction.
395 Value 10 should be good if there are less than 4 processors + 4 disks in the
396 computer. Bigger computers need bigger values. Value 0 will disable the
397 concurrency check. */
398 
399 UNIV_INTERN ulong srv_thread_concurrency = 0;
400 
401 /* this mutex protects srv_conc data structures */
402 UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
403 /* number of transactions that have declared_to_be_inside_innodb set.
404 It used to be a non-error for this value to drop below zero temporarily.
405 This is no longer true. We'll, however, keep the lint datatype to add
406 assertions to catch any corner cases that we may have missed. */
407 UNIV_INTERN lint srv_conc_n_threads = 0;
408 /* number of OS threads waiting in the FIFO for a permission to enter
409 InnoDB */
410 UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
411 
415  ibool reserved;
417  ibool wait_ended;
424  UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue;
425 };
426 
427 /* queue of threads waiting to get in */
428 UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
429 /* array of wait slots */
430 UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
431 
432 /* Number of times a thread is allowed to enter InnoDB within the same
433 SQL query after it has once got the ticket at srv_conc_enter_innodb */
434 #define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
435 #define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
436 /*-----------------------*/
437 /* If the following is set to 1 then we do not run purge and insert buffer
438 merge to completion before shutdown. If it is set to 2, do not even flush the
439 buffer pool to data files at the shutdown: we effectively 'crash'
440 InnoDB (but lose no committed transactions). */
441 UNIV_INTERN ulint srv_fast_shutdown = 0;
442 
443 /* Generate a innodb_status.<pid> file */
444 UNIV_INTERN ibool srv_innodb_status = FALSE;
445 
446 /* When estimating number of different key values in an index, sample
447 this many index pages */
448 UNIV_INTERN ib_uint64_t srv_stats_sample_pages = 8;
449 
450 UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
451 UNIV_INTERN ibool srv_use_checksums = TRUE;
452 
453 UNIV_INTERN ulong srv_replication_delay = 0;
454 
455 UNIV_INTERN uint64_t srv_ibuf_max_size = 0;
456 UNIV_INTERN uint32_t srv_ibuf_active_contract = 0;
457 UNIV_INTERN uint32_t srv_ibuf_accel_rate = 100;
458 
459 #define PCT_IBUF_IO(pct) (srv_io_capacity * srv_ibuf_accel_rate \
460  * (pct / 10000.0))
461 
462 UNIV_INTERN uint32_t srv_checkpoint_age_target = 0;
463 UNIV_INTERN uint32_t srv_flush_neighbor_pages = 1;
464 
465 UNIV_INTERN uint32_t srv_read_ahead = 3; /* 1: random, 2: linear, 3: both */
466 UNIV_INTERN uint32_t srv_adaptive_flushing_method = 0; /* 0: native,
467  1: estimate,
468  2: keep_average */
469 
470 UNIV_INTERN ibool srv_read_only = FALSE;
471 UNIV_INTERN ibool srv_fake_write = FALSE;
472 UNIV_INTERN ibool srv_apply_log_only = FALSE;
473 
474 /*-------------------------------------------*/
475 UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
476 UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
477 UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
478 UNIV_INTERN ulong srv_spin_wait_delay = 6;
479 UNIV_INTERN ibool srv_priority_boost = TRUE;
480 
481 #ifdef UNIV_DEBUG
482 UNIV_INTERN ibool srv_print_thread_releases = FALSE;
483 UNIV_INTERN ibool srv_print_lock_waits = FALSE;
484 UNIV_INTERN ibool srv_print_buf_io = FALSE;
485 UNIV_INTERN ibool srv_print_log_io = FALSE;
486 UNIV_INTERN ibool srv_print_latch_waits = FALSE;
487 #endif /* UNIV_DEBUG */
488 
489 UNIV_INTERN ulint srv_n_rows_inserted = 0;
490 UNIV_INTERN ulint srv_n_rows_updated = 0;
491 UNIV_INTERN ulint srv_n_rows_deleted = 0;
492 UNIV_INTERN ulint srv_n_rows_read = 0;
493 
494 static ulint srv_n_rows_inserted_old = 0;
495 static ulint srv_n_rows_updated_old = 0;
496 static ulint srv_n_rows_deleted_old = 0;
497 static ulint srv_n_rows_read_old = 0;
498 
499 UNIV_INTERN ulint srv_n_lock_wait_count = 0;
500 UNIV_INTERN ulint srv_n_lock_wait_current_count = 0;
501 UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0;
502 UNIV_INTERN ulint srv_n_lock_max_wait_time = 0;
503 
504 UNIV_INTERN ulint srv_truncated_status_writes = 0;
505 
506 /*
507  Set the following to 0 if you want InnoDB to write messages on
508  stderr on startup/shutdown
509 */
510 UNIV_INTERN ibool srv_print_verbose_log = TRUE;
511 UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
512 UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
513 UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
514 UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
515 
516 /* Array of English strings describing the current state of an
517 i/o handler thread */
518 
519 UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
520 UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
521 
522 UNIV_INTERN time_t srv_last_monitor_time;
523 
524 UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
525 
526 /* Mutex for locking srv_monitor_file */
527 UNIV_INTERN mutex_t srv_monitor_file_mutex;
528 
529 #ifdef UNIV_PFS_MUTEX
530 /* Key to register kernel_mutex with performance schema */
531 UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
532 /* Key to protect writing the commit_id to the sys header */
533 UNIV_INTERN mysql_pfs_key_t commit_id_mutex_key;
534 /* Key to register srv_innodb_monitor_mutex with performance schema */
535 UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
536 /* Key to register srv_monitor_file_mutex with performance schema */
537 UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
538 /* Key to register srv_dict_tmpfile_mutex with performance schema */
539 UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
540 /* Key to register the mutex with performance schema */
541 UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
542 #endif /* UNIV_PFS_MUTEX */
543 
544 /* Temporary file for innodb monitor output */
545 UNIV_INTERN FILE* srv_monitor_file;
546 /* Mutex for locking srv_dict_tmpfile.
547 This mutex has a very high rank; threads reserving it should not
548 be holding any InnoDB latches. */
549 UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
550 /* Temporary file for output from the data dictionary */
551 UNIV_INTERN FILE* srv_dict_tmpfile;
552 /* Mutex for locking srv_misc_tmpfile.
553 This mutex has a very low rank; threads reserving it should not
554 acquire any further latches or sleep before releasing this one. */
555 UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
556 /* Temporary file for miscellanous diagnostic output */
557 UNIV_INTERN FILE* srv_misc_tmpfile;
558 
559 UNIV_INTERN ulint srv_main_thread_process_no = 0;
560 UNIV_INTERN ulint srv_main_thread_id = 0;
561 
562 /* The following count work done by srv_master_thread. */
563 
564 /* Iterations by the 'once per second' loop. */
565 static ulint srv_main_1_second_loops = 0;
566 /* Calls to sleep by the 'once per second' loop. */
567 static ulint srv_main_sleeps = 0;
568 /* Iterations by the 'once per 10 seconds' loop. */
569 static ulint srv_main_10_second_loops = 0;
570 /* Iterations of the loop bounded by the 'background_loop' label. */
571 static ulint srv_main_background_loops = 0;
572 /* Iterations of the loop bounded by the 'flush_loop' label. */
573 static ulint srv_main_flush_loops = 0;
574 /* Log writes involving flush. */
575 static ulint srv_log_writes_and_flush = 0;
576 
577 /* This is only ever touched by the master thread. It records the
578 time when the last flush of log file has happened. The master
579 thread ensures that we flush the log files at least once per
580 second. */
581 static time_t srv_last_log_flush_time;
582 
583 /* The master thread performs various tasks based on the current
584 state of IO activity and the level of IO utilization is past
585 intervals. Following macros define thresholds for these conditions. */
586 #define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
587 #define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
588 #define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
589 
590 /*
591  IMPLEMENTATION OF THE SERVER MAIN PROGRAM
592  =========================================
593 
594 There is the following analogue between this database
595 server and an operating system kernel:
596 
597 DB concept equivalent OS concept
598 ---------- ---------------------
599 transaction -- process;
600 
601 query thread -- thread;
602 
603 lock -- semaphore;
604 
605 transaction set to
606 the rollback state -- kill signal delivered to a process;
607 
608 kernel -- kernel;
609 
610 query thread execution:
611 (a) without kernel mutex
612 reserved -- process executing in user mode;
613 (b) with kernel mutex reserved
614  -- process executing in kernel mode;
615 
616 The server is controlled by a master thread which runs at
617 a priority higher than normal, that is, higher than user threads.
618 It sleeps most of the time, and wakes up, say, every 300 milliseconds,
619 to check whether there is anything happening in the server which
620 requires intervention of the master thread. Such situations may be,
621 for example, when flushing of dirty blocks is needed in the buffer
622 pool or old version of database rows have to be cleaned away.
623 
624 The threads which we call user threads serve the queries of
625 the clients and input from the console of the server.
626 They run at normal priority. The server may have several
627 communications endpoints. A dedicated set of user threads waits
628 at each of these endpoints ready to receive a client request.
629 Each request is taken by a single user thread, which then starts
630 processing and, when the result is ready, sends it to the client
631 and returns to wait at the same endpoint the thread started from.
632 
633 So, we do not have dedicated communication threads listening at
634 the endpoints and dealing the jobs to dedicated worker threads.
635 Our architecture saves one thread swithch per request, compared
636 to the solution with dedicated communication threads
637 which amounts to 15 microseconds on 100 MHz Pentium
638 running NT. If the client
639 is communicating over a network, this saving is negligible, but
640 if the client resides in the same machine, maybe in an SMP machine
641 on a different processor from the server thread, the saving
642 can be important as the threads can communicate over shared
643 memory with an overhead of a few microseconds.
644 
645 We may later implement a dedicated communication thread solution
646 for those endpoints which communicate over a network.
647 
648 Our solution with user threads has two problems: for each endpoint
649 there has to be a number of listening threads. If there are many
650 communication endpoints, it may be difficult to set the right number
651 of concurrent threads in the system, as many of the threads
652 may always be waiting at less busy endpoints. Another problem
653 is queuing of the messages, as the server internally does not
654 offer any queue for jobs.
655 
656 Another group of user threads is intended for splitting the
657 queries and processing them in parallel. Let us call these
658 parallel communication threads. These threads are waiting for
659 parallelized tasks, suspended on event semaphores.
660 
661 A single user thread waits for input from the console,
662 like a command to shut the database.
663 
664 Utility threads are a different group of threads which takes
665 care of the buffer pool flushing and other, mainly background
666 operations, in the server.
667 Some of these utility threads always run at a lower than normal
668 priority, so that they are always in background. Some of them
669 may dynamically boost their priority by the pri_adjust function,
670 even to higher than normal priority, if their task becomes urgent.
671 The running of utilities is controlled by high- and low-water marks
672 of urgency. The urgency may be measured by the number of dirty blocks
673 in the buffer pool, in the case of the flush thread, for example.
674 When the high-water mark is exceeded, an utility starts running, until
675 the urgency drops under the low-water mark. Then the utility thread
676 suspend itself to wait for an event. The master thread is
677 responsible of signaling this event when the utility thread is
678 again needed.
679 
680 For each individual type of utility, some threads always remain
681 at lower than normal priority. This is because pri_adjust is implemented
682 so that the threads at normal or higher priority control their
683 share of running time by calling sleep. Thus, if the load of the
684 system sudenly drops, these threads cannot necessarily utilize
685 the system fully. The background priority threads make up for this,
686 starting to run when the load drops.
687 
688 When there is no activity in the system, also the master thread
689 suspends itself to wait for an event making
690 the server totally silent. The responsibility to signal this
691 event is on the user thread which again receives a message
692 from a client.
693 
694 There is still one complication in our server design. If a
695 background utility thread obtains a resource (e.g., mutex) needed by a user
696 thread, and there is also some other user activity in the system,
697 the user thread may have to wait indefinitely long for the
698 resource, as the OS does not schedule a background thread if
699 there is some other runnable user thread. This problem is called
700 priority inversion in real-time programming.
701 
702 One solution to the priority inversion problem would be to
703 keep record of which thread owns which resource and
704 in the above case boost the priority of the background thread
705 so that it will be scheduled and it can release the resource.
706 This solution is called priority inheritance in real-time programming.
707 A drawback of this solution is that the overhead of acquiring a mutex
708 increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
709 the thread has to call os_thread_get_curr_id.
710 This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
711 pair. Note that the thread
712 cannot store the information in the resource, say mutex, itself,
713 because competing threads could wipe out the information if it is
714 stored before acquiring the mutex, and if it stored afterwards,
715 the information is outdated for the time of one machine instruction,
716 at least. (To be precise, the information could be stored to
717 lock_word in mutex if the machine supports atomic swap.)
718 
719 The above solution with priority inheritance may become actual in the
720 future, but at the moment we plan to implement a more coarse solution,
721 which could be called a global priority inheritance. If a thread
722 has to wait for a long time, say 300 milliseconds, for a resource,
723 we just guess that it may be waiting for a resource owned by a background
724 thread, and boost the the priority of all runnable background threads
725 to the normal level. The background threads then themselves adjust
726 their fixed priority back to background after releasing all resources
727 they had (or, at some fixed points in their program code).
728 
729 What is the performance of the global priority inheritance solution?
730 We may weigh the length of the wait time 300 milliseconds, during
731 which the system processes some other thread
732 to the cost of boosting the priority of each runnable background
733 thread, rescheduling it, and lowering the priority again.
734 On 100 MHz Pentium + NT this overhead may be of the order 100
735 microseconds per thread. So, if the number of runnable background
736 threads is not very big, say < 100, the cost is tolerable.
737 Utility threads probably will access resources used by
738 user threads not very often, so collisions of user threads
739 to preempted utility threads should not happen very often.
740 
741 The thread table contains
742 information of the current status of each thread existing in the system,
743 and also the event semaphores used in suspending the master thread
744 and utility and parallel communication threads when they have nothing to do.
745 The thread table can be seen as an analogue to the process table
746 in a traditional Unix implementation.
747 
748 The thread table is also used in the global priority inheritance
749 scheme. This brings in one additional complication: threads accessing
750 the thread table must have at least normal fixed priority,
751 because the priority inheritance solution does not work if a background
752 thread is preempted while possessing the mutex protecting the thread table.
753 So, if a thread accesses the thread table, its priority has to be
754 boosted at least to normal. This priority requirement can be seen similar to
755 the privileged mode used when processing the kernel calls in traditional
756 Unix.*/
757 
758 /* Thread slot in the thread table */
761  os_thread_t handle;
762  unsigned type:1;
763  unsigned in_use:1;
764  unsigned suspended:1;
772 };
773 
774 /* Table for MySQL threads where they will be suspended to wait for locks */
775 UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
776 
777 UNIV_INTERN os_event_t srv_timeout_event;
778 
779 UNIV_INTERN os_event_t srv_monitor_event;
780 
781 UNIV_INTERN os_event_t srv_error_event;
782 
783 UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
784 
785 UNIV_INTERN srv_sys_t* srv_sys = NULL;
786 
787 /* padding to prevent other memory update hotspots from residing on
788 the same memory cache line */
789 UNIV_INTERN byte srv_pad1[64];
790 /* mutex protecting the server, trx structs, query threads, and lock table */
791 UNIV_INTERN mutex_t* kernel_mutex_temp;
792 /* mutex protecting the sys header for writing the commit id */
793 UNIV_INTERN mutex_t* commit_id_mutex_temp;
794 
795 /* padding to prevent other memory update hotspots from residing on
796 the same memory cache line */
797 UNIV_INTERN byte srv_pad2[64];
798 
799 #if 0
800 /* The following three values measure the urgency of the jobs of
801 buffer, version, and insert threads. They may vary from 0 - 1000.
802 The server mutex protects all these variables. The low-water values
803 tell that the server can acquiesce the utility when the value
804 drops below this low-water mark. */
805 
806 static ulint srv_meter[SRV_MASTER + 1];
807 static ulint srv_meter_low_water[SRV_MASTER + 1];
808 static ulint srv_meter_high_water[SRV_MASTER + 1];
809 static ulint srv_meter_high_water2[SRV_MASTER + 1];
810 static ulint srv_meter_foreground[SRV_MASTER + 1];
811 #endif
812 
813 /***********************************************************************
814 Prints counters for work done by srv_master_thread. */
815 static
816 void
817 srv_print_master_thread_info(
818 /*=========================*/
819  FILE *file) /* in: output stream */
820 {
821  fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
822  "%lu 10_second, %lu background, %lu flush\n",
823  srv_main_1_second_loops, srv_main_sleeps,
824  srv_main_10_second_loops, srv_main_background_loops,
825  srv_main_flush_loops);
826  fprintf(file, "srv_master_thread log flush and writes: %lu\n",
827  srv_log_writes_and_flush);
828 }
829 
830 /* The following values give info about the activity going on in
831 the database. They are protected by the server mutex. The arrays
832 are indexed by the type of the thread. */
833 
834 UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
835 UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
836 
837 /*********************************************************************/
839 UNIV_INTERN
840 void
842 /*======================*/
843  ulint i,
844  const char* str)
846 {
847  ut_a(i < SRV_MAX_N_IO_THREADS);
848 
849  srv_io_thread_op_info[i] = str;
850 }
851 
852 /*********************************************************************/
856 static
857 srv_slot_t*
858 srv_table_get_nth_slot(
859 /*===================*/
860  ulint index)
861 {
862  ut_ad(mutex_own(&kernel_mutex));
863  ut_a(index < OS_THREAD_MAX_N);
864 
865  return(srv_sys->threads + index);
866 }
867 
868 /*********************************************************************/
871 UNIV_INTERN
872 ulint
874 /*===================*/
875 {
876  ulint i;
877  ulint n_threads = 0;
878 
879  mutex_enter(&kernel_mutex);
880 
881  for (i = 0; i < SRV_MASTER + 1; i++) {
882 
883  n_threads += srv_n_threads[i];
884  }
885 
886  mutex_exit(&kernel_mutex);
887 
888  return(n_threads);
889 }
890 
891 #ifdef UNIV_DEBUG
892 /*********************************************************************/
895 static
896 ibool
897 srv_thread_type_validate(
898 /*=====================*/
899  enum srv_thread_type type)
900 {
901  switch (type) {
902  case SRV_WORKER:
903  case SRV_MASTER:
904  return(TRUE);
905  }
906  ut_error;
907  return(FALSE);
908 }
909 #endif /* UNIV_DEBUG */
910 
911 /*********************************************************************/
914 static
915 enum srv_thread_type
916 srv_slot_get_type(
917 /*==============*/
918  const srv_slot_t* slot)
919 {
920  enum srv_thread_type type = (enum srv_thread_type) slot->type;
921  ut_ad(srv_thread_type_validate(type));
922  return(type);
923 }
924 
925 /*********************************************************************/
929 static
930 srv_slot_t*
931 srv_table_reserve_slot(
932 /*===================*/
933  enum srv_thread_type type)
934 {
935  srv_slot_t* slot;
936  ulint i;
937 
938  ut_ad(srv_thread_type_validate(type));
939  ut_ad(mutex_own(&kernel_mutex));
940 
941  i = 0;
942  slot = srv_table_get_nth_slot(i);
943 
944  while (slot->in_use) {
945  i++;
946  slot = srv_table_get_nth_slot(i);
947  }
948 
949  slot->in_use = TRUE;
950  slot->suspended = FALSE;
951  slot->type = type;
952  ut_ad(srv_slot_get_type(slot) == type);
953  slot->id = os_thread_get_curr_id();
954  slot->handle = os_thread_get_curr();
955 
956  return(slot);
957 }
958 
959 /*********************************************************************/
962 static
963 void
964 srv_suspend_thread(
965 /*===============*/
966  srv_slot_t* slot)
967 {
968  enum srv_thread_type type;
969 
970  ut_ad(mutex_own(&kernel_mutex));
971 
972  ut_ad(slot->in_use);
973  ut_ad(!slot->suspended);
974  ut_ad(slot->id == os_thread_get_curr_id());
975 
976  if (srv_print_thread_releases) {
977  fprintf(stderr,
978  "Suspending thread %lu to slot %lu\n",
979  (ulong) os_thread_get_curr_id(),
980  (ulong) (slot - srv_sys->threads));
981  }
982 
983  type = srv_slot_get_type(slot);
984 
985  slot->suspended = TRUE;
986 
987  ut_ad(srv_n_threads_active[type] > 0);
988 
989  srv_n_threads_active[type]--;
990 
991  os_event_reset(slot->event);
992 }
993 
994 /*********************************************************************/
999 UNIV_INTERN
1000 ulint
1002 /*================*/
1003  enum srv_thread_type type,
1004  ulint n)
1005 {
1006  srv_slot_t* slot;
1007  ulint i;
1008  ulint count = 0;
1009 
1010  ut_ad(srv_thread_type_validate(type));
1011  ut_ad(n > 0);
1012  ut_ad(mutex_own(&kernel_mutex));
1013 
1014  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1015 
1016  slot = srv_table_get_nth_slot(i);
1017 
1018  if (slot->in_use && slot->suspended
1019  && srv_slot_get_type(slot) == type) {
1020 
1021  slot->suspended = FALSE;
1022 
1023  srv_n_threads_active[type]++;
1024 
1025  os_event_set(slot->event);
1026 
1027  if (srv_print_thread_releases) {
1028  fprintf(stderr,
1029  "Releasing thread %lu type %lu"
1030  " from slot %lu\n",
1031  (ulong) slot->id, (ulong) type,
1032  (ulong) i);
1033  }
1034 
1035  count++;
1036 
1037  if (count == n) {
1038  break;
1039  }
1040  }
1041  }
1042 
1043  return(count);
1044 }
1045 
1046 /*********************************************************************/
1050 UNIV_INTERN
1051 ulint
1053 /*=========================*/
1054  enum srv_thread_type type)
1055 {
1056  ulint i;
1057  ulint slot_no = ULINT_UNDEFINED;
1058 
1059  ut_ad(srv_thread_type_validate(type));
1060  mutex_enter(&kernel_mutex);
1061 
1062  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1063  srv_slot_t* slot;
1064 
1065  slot = srv_table_get_nth_slot(i);
1066 
1067  if (slot->in_use && slot->type == (unsigned int)type) {
1068  slot_no = i;
1069  break;
1070  }
1071  }
1072 
1073  mutex_exit(&kernel_mutex);
1074 
1075  return(slot_no);
1076 }
1077 
1078 /*********************************************************************/
1080 UNIV_INTERN
1081 void
1083 /*==========*/
1084 {
1085  srv_conc_slot_t* conc_slot;
1086  srv_slot_t* slot;
1087  ulint i;
1088 
1089  srv_sys = static_cast<srv_sys_t *>(mem_alloc(sizeof(srv_sys_t)));
1090 
1091  kernel_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1092  mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
1093 
1094  commit_id_mutex_temp = static_cast<ib_mutex_t *>(mem_alloc(sizeof(mutex_t)));
1095  mutex_create(commit_id_mutex_key, &commit_id_mutex, SYNC_COMMIT_ID_LOCK);
1096 
1097  mutex_create(srv_innodb_monitor_mutex_key,
1098  &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
1099 
1100  srv_sys->threads = static_cast<srv_table_t *>(mem_zalloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1101 
1102  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1103  slot = srv_sys->threads + i;
1104  slot->event = os_event_create(NULL);
1105  ut_a(slot->event);
1106  }
1107 
1108  srv_mysql_table = static_cast<srv_slot_t *>(mem_zalloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)));
1109 
1110  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1111  slot = srv_mysql_table + i;
1112  slot->event = os_event_create(NULL);
1113  ut_a(slot->event);
1114  }
1115 
1116  srv_error_event = os_event_create(NULL);
1117 
1118  srv_timeout_event = os_event_create(NULL);
1119 
1120  srv_monitor_event = os_event_create(NULL);
1121 
1122  srv_lock_timeout_thread_event = os_event_create(NULL);
1123 
1124  for (i = 0; i < SRV_MASTER + 1; i++) {
1125  srv_n_threads_active[i] = 0;
1126  srv_n_threads[i] = 0;
1127 #if 0
1128  srv_meter[i] = 30;
1129  srv_meter_low_water[i] = 50;
1130  srv_meter_high_water[i] = 100;
1131  srv_meter_high_water2[i] = 200;
1132  srv_meter_foreground[i] = 250;
1133 #endif
1134  }
1135 
1136  UT_LIST_INIT(srv_sys->tasks);
1137 
1138  /* Create dummy indexes for infimum and supremum records */
1139 
1140  dict_ind_init();
1141 
1142  /* Init the server concurrency restriction data structures */
1143 
1144  os_fast_mutex_init(&srv_conc_mutex);
1145 
1146  UT_LIST_INIT(srv_conc_queue);
1147 
1148  srv_conc_slots = static_cast<srv_conc_slot_t *>(mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)));
1149 
1150  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1151  conc_slot = srv_conc_slots + i;
1152  conc_slot->reserved = FALSE;
1153  conc_slot->event = os_event_create(NULL);
1154  ut_a(conc_slot->event);
1155  }
1156 
1157  /* Initialize some INFORMATION SCHEMA internal structures */
1159 }
1160 
1161 /*********************************************************************/
1163 UNIV_INTERN
1164 void
1166 /*==========*/
1167 {
1168  os_fast_mutex_free(&srv_conc_mutex);
1169  mem_free(srv_conc_slots);
1170  srv_conc_slots = NULL;
1171 
1172  mem_free(srv_sys->threads);
1173  mem_free(srv_sys);
1174  srv_sys = NULL;
1175 
1176  mem_free(kernel_mutex_temp);
1177  kernel_mutex_temp = NULL;
1178  mem_free(srv_mysql_table);
1179  srv_mysql_table = NULL;
1180 
1181  mem_free(commit_id_mutex_temp);
1182  commit_id_mutex_temp = NULL;
1183 
1185 }
1186 
1187 /*********************************************************************/
1190 UNIV_INTERN
1191 void
1193 /*==================*/
1194 {
1195  ut_mem_init();
1196  /* Reset the system variables in the recovery module. */
1198  os_sync_init();
1199  sync_init();
1200  mem_init(srv_mem_pool_size);
1201 }
1202 
1203 /*======================= InnoDB Server FIFO queue =======================*/
1204 
1205 /* Maximum allowable purge history length. <=0 means 'infinite'. */
1206 UNIV_INTERN ulong srv_max_purge_lag = 0;
1207 
1208 /*********************************************************************/
1211 UNIV_INTERN
1212 void
1214 /*==================*/
1215  trx_t* trx)
1217 {
1218  ibool has_slept = FALSE;
1219  srv_conc_slot_t* slot = NULL;
1220  ulint i;
1221 
1222  if (trx->mysql_thd != NULL
1224 
1225  UT_WAIT_FOR(srv_conc_n_threads
1226  < (lint)srv_thread_concurrency,
1227  srv_replication_delay * 1000);
1228 
1229  return;
1230  }
1231 
1232  /* If trx has 'free tickets' to enter the engine left, then use one
1233  such ticket */
1234 
1235  if (trx->n_tickets_to_enter_innodb > 0) {
1236  trx->n_tickets_to_enter_innodb--;
1237 
1238  return;
1239  }
1240 
1241  os_fast_mutex_lock(&srv_conc_mutex);
1242 retry:
1243  if (trx->declared_to_be_inside_innodb) {
1244  ut_print_timestamp(stderr);
1245  fputs(" InnoDB: Error: trying to declare trx"
1246  " to enter InnoDB, but\n"
1247  "InnoDB: it already is declared.\n", stderr);
1248  trx_print(stderr, trx, 0);
1249  putc('\n', stderr);
1250  os_fast_mutex_unlock(&srv_conc_mutex);
1251 
1252  return;
1253  }
1254 
1255  ut_ad(srv_conc_n_threads >= 0);
1256 
1257  if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1258 
1259  srv_conc_n_threads++;
1260  trx->declared_to_be_inside_innodb = TRUE;
1261  trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1262 
1263  os_fast_mutex_unlock(&srv_conc_mutex);
1264 
1265  return;
1266  }
1267 
1268  /* If the transaction is not holding resources, let it sleep
1269  for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
1270 
1271  if (!has_slept && !trx->has_search_latch
1272  && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
1273 
1274  has_slept = TRUE; /* We let it sleep only once to avoid
1275  starvation */
1276 
1277  srv_conc_n_waiting_threads++;
1278 
1279  os_fast_mutex_unlock(&srv_conc_mutex);
1280 
1281  trx->op_info = "sleeping before joining InnoDB queue";
1282 
1283  /* Peter Zaitsev suggested that we take the sleep away
1284  altogether. But the sleep may be good in pathological
1285  situations of lots of thread switches. Simply put some
1286  threads aside for a while to reduce the number of thread
1287  switches. */
1288  if (SRV_THREAD_SLEEP_DELAY > 0) {
1289  os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
1290  }
1291 
1292  trx->op_info = "";
1293 
1294  os_fast_mutex_lock(&srv_conc_mutex);
1295 
1296  srv_conc_n_waiting_threads--;
1297 
1298  goto retry;
1299  }
1300 
1301  /* Too many threads inside: put the current thread to a queue */
1302 
1303  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1304  slot = srv_conc_slots + i;
1305 
1306  if (!slot->reserved) {
1307 
1308  break;
1309  }
1310  }
1311 
1312  if (i == OS_THREAD_MAX_N) {
1313  /* Could not find a free wait slot, we must let the
1314  thread enter */
1315 
1316  srv_conc_n_threads++;
1317  trx->declared_to_be_inside_innodb = TRUE;
1318  trx->n_tickets_to_enter_innodb = 0;
1319 
1320  os_fast_mutex_unlock(&srv_conc_mutex);
1321 
1322  return;
1323  }
1324 
1325  /* Release possible search system latch this thread has */
1326  if (trx->has_search_latch) {
1328  }
1329 
1330  /* Add to the queue */
1331  slot->reserved = TRUE;
1332  slot->wait_ended = FALSE;
1333 
1334  UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
1335 
1336  os_event_reset(slot->event);
1337 
1338  srv_conc_n_waiting_threads++;
1339 
1340  os_fast_mutex_unlock(&srv_conc_mutex);
1341 
1342  /* Go to wait for the event; when a thread leaves InnoDB it will
1343  release this thread */
1344 
1345  trx->op_info = "waiting in InnoDB queue";
1346 
1347  os_event_wait(slot->event);
1348 
1349  trx->op_info = "";
1350 
1351  os_fast_mutex_lock(&srv_conc_mutex);
1352 
1353  srv_conc_n_waiting_threads--;
1354 
1355  /* NOTE that the thread which released this thread already
1356  incremented the thread counter on behalf of this thread */
1357 
1358  slot->reserved = FALSE;
1359 
1360  UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
1361 
1362  trx->declared_to_be_inside_innodb = TRUE;
1363  trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
1364 
1365  os_fast_mutex_unlock(&srv_conc_mutex);
1366 }
1367 
1368 /*********************************************************************/
1371 UNIV_INTERN
1372 void
1374 /*========================*/
1375  trx_t* trx)
1377 {
1378  if (UNIV_LIKELY(!srv_thread_concurrency)) {
1379 
1380  return;
1381  }
1382 
1383  ut_ad(srv_conc_n_threads >= 0);
1384 
1385  os_fast_mutex_lock(&srv_conc_mutex);
1386 
1387  srv_conc_n_threads++;
1388  trx->declared_to_be_inside_innodb = TRUE;
1389  trx->n_tickets_to_enter_innodb = 1;
1390 
1391  os_fast_mutex_unlock(&srv_conc_mutex);
1392 }
1393 
1394 /*********************************************************************/
1397 UNIV_INTERN
1398 void
1400 /*=======================*/
1401  trx_t* trx)
1403 {
1404  srv_conc_slot_t* slot = NULL;
1405 
1406  if (trx->mysql_thd != NULL
1408 
1409  return;
1410  }
1411 
1412  if (trx->declared_to_be_inside_innodb == FALSE) {
1413 
1414  return;
1415  }
1416 
1417  os_fast_mutex_lock(&srv_conc_mutex);
1418 
1419  ut_ad(srv_conc_n_threads > 0);
1420  srv_conc_n_threads--;
1421  trx->declared_to_be_inside_innodb = FALSE;
1422  trx->n_tickets_to_enter_innodb = 0;
1423 
1424  if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
1425  /* Look for a slot where a thread is waiting and no other
1426  thread has yet released the thread */
1427 
1428  slot = UT_LIST_GET_FIRST(srv_conc_queue);
1429 
1430  while (slot && slot->wait_ended == TRUE) {
1431  slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
1432  }
1433 
1434  if (slot != NULL) {
1435  slot->wait_ended = TRUE;
1436 
1437  /* We increment the count on behalf of the released
1438  thread */
1439 
1440  srv_conc_n_threads++;
1441  }
1442  }
1443 
1444  os_fast_mutex_unlock(&srv_conc_mutex);
1445 
1446  if (slot != NULL) {
1447  os_event_set(slot->event);
1448  }
1449 }
1450 
1451 /*********************************************************************/
1453 UNIV_INTERN
1454 void
1456 /*=================*/
1457  trx_t* trx)
1459 {
1460  if (trx->n_tickets_to_enter_innodb > 0) {
1461  /* We will pretend the thread is still inside InnoDB though it
1462  now leaves the InnoDB engine. In this way we save
1463  a lot of semaphore operations. srv_conc_force_exit_innodb is
1464  used to declare the thread definitely outside InnoDB. It
1465  should be called when there is a lock wait or an SQL statement
1466  ends. */
1467 
1468  return;
1469  }
1470 
1472 }
1473 
1474 /*========================================================================*/
1475 
1476 /*********************************************************************/
1479 static
1480 ulint
1481 srv_normalize_init_values(void)
1482 /*===========================*/
1483 {
1484  ulint n;
1485  ulint i;
1486 
1487  n = srv_n_data_files;
1488 
1489  for (i = 0; i < n; i++) {
1490  srv_data_file_sizes[i] = srv_data_file_sizes[i]
1491  * ((1024 * 1024) / UNIV_PAGE_SIZE);
1492  }
1493 
1494  srv_last_file_size_max = srv_last_file_size_max
1495  * ((1024 * 1024) / UNIV_PAGE_SIZE);
1496 
1497  srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
1498 
1499  srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
1500 
1501  srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
1502 
1503  return(DB_SUCCESS);
1504 }
1505 
1506 /*********************************************************************/
1509 UNIV_INTERN
1510 ulint
1512 /*==========*/
1513 {
1514  ulint err;
1515 
1516  /* Transform the init parameter values given by MySQL to
1517  use units we use inside InnoDB: */
1518 
1519  err = srv_normalize_init_values();
1520 
1521  if (err != DB_SUCCESS) {
1522  return(err);
1523  }
1524 
1525  /* Initialize synchronization primitives, memory management, and thread
1526  local storage */
1527 
1528  srv_general_init();
1529 
1530  /* Initialize this module */
1531 
1532  srv_init();
1533 
1534  return(DB_SUCCESS);
1535 }
1536 
1537 /*********************************************************************/
1541 static
1542 srv_slot_t*
1543 srv_table_reserve_slot_for_mysql(void)
1544 /*==================================*/
1545 {
1546  srv_slot_t* slot;
1547  ulint i;
1548 
1549  ut_ad(mutex_own(&kernel_mutex));
1550 
1551  i = 0;
1552  slot = srv_mysql_table + i;
1553 
1554  while (slot->in_use) {
1555  i++;
1556 
1557  if (UNIV_UNLIKELY(i >= OS_THREAD_MAX_N)) {
1558 
1559  ut_print_timestamp(stderr);
1560 
1561  fprintf(stderr,
1562  " InnoDB: There appear to be %lu MySQL"
1563  " threads currently waiting\n"
1564  "InnoDB: inside InnoDB, which is the"
1565  " upper limit. Cannot continue operation.\n"
1566  "InnoDB: We intentionally generate"
1567  " a seg fault to print a stack trace\n"
1568  "InnoDB: on Linux. But first we print"
1569  " a list of waiting threads.\n", (ulong) i);
1570 
1571  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1572 
1573  slot = srv_mysql_table + i;
1574 
1575  fprintf(stderr,
1576  "Slot %lu: thread id %lu, type %lu,"
1577  " in use %lu, susp %lu, time %lu\n",
1578  (ulong) i,
1579  (ulong) os_thread_pf(slot->id),
1580  (ulong) slot->type,
1581  (ulong) slot->in_use,
1582  (ulong) slot->suspended,
1583  (ulong) difftime(ut_time(),
1584  slot->suspend_time));
1585  }
1586 
1587  ut_error;
1588  }
1589 
1590  slot = srv_mysql_table + i;
1591  }
1592 
1593  ut_a(slot->in_use == FALSE);
1594 
1595  slot->in_use = TRUE;
1596  slot->id = os_thread_get_curr_id();
1597  slot->handle = os_thread_get_curr();
1598 
1599  return(slot);
1600 }
1601 
1602 /***************************************************************/
1608 UNIV_INTERN
1609 void
1611 /*=====================*/
1612  que_thr_t* thr)
1614 {
1615  srv_slot_t* slot;
1616  os_event_t event;
1617  double wait_time;
1618  trx_t* trx;
1619  ulint had_dict_lock;
1620  ibool was_declared_inside_innodb = FALSE;
1621  ib_int64_t start_time = 0;
1622  ib_int64_t finish_time;
1623  ulint diff_time;
1624  ulint sec;
1625  ulint ms;
1626  ulong lock_wait_timeout;
1627 
1628  ut_ad(!mutex_own(&kernel_mutex));
1629 
1630  trx = thr_get_trx(thr);
1631 
1632  os_event_set(srv_lock_timeout_thread_event);
1633 
1634  mutex_enter(&kernel_mutex);
1635 
1636  trx->error_state = DB_SUCCESS;
1637 
1638  if (thr->state == QUE_THR_RUNNING) {
1639 
1640  ut_ad(thr->is_active == TRUE);
1641 
1642  /* The lock has already been released or this transaction
1643  was chosen as a deadlock victim: no need to suspend */
1644 
1645  if (trx->was_chosen_as_deadlock_victim) {
1646 
1647  trx->error_state = DB_DEADLOCK;
1648  trx->was_chosen_as_deadlock_victim = FALSE;
1649  }
1650 
1651  mutex_exit(&kernel_mutex);
1652 
1653  return;
1654  }
1655 
1656  ut_ad(thr->is_active == FALSE);
1657 
1658  slot = srv_table_reserve_slot_for_mysql();
1659 
1660  event = slot->event;
1661 
1662  slot->thr = thr;
1663 
1664  os_event_reset(event);
1665 
1666  slot->suspend_time = ut_time();
1667 
1668  if (thr->lock_state == QUE_THR_LOCK_ROW) {
1669  srv_n_lock_wait_count++;
1670  srv_n_lock_wait_current_count++;
1671 
1672  if (ut_usectime(&sec, &ms) == -1) {
1673  start_time = -1;
1674  } else {
1675  start_time = (ib_int64_t) sec * 1000000 + ms;
1676  }
1677  }
1678  /* Wake the lock timeout monitor thread, if it is suspended */
1679 
1680  os_event_set(srv_lock_timeout_thread_event);
1681 
1682  mutex_exit(&kernel_mutex);
1683 
1684  if (trx->declared_to_be_inside_innodb) {
1685 
1686  was_declared_inside_innodb = TRUE;
1687 
1688  /* We must declare this OS thread to exit InnoDB, since a
1689  possible other thread holding a lock which this thread waits
1690  for must be allowed to enter, sooner or later */
1691 
1693  }
1694 
1695  had_dict_lock = trx->dict_operation_lock_mode;
1696 
1697  switch (had_dict_lock) {
1698  case RW_S_LATCH:
1699  /* Release foreign key check latch */
1701  break;
1702  case RW_X_LATCH:
1703  /* There should never be a lock wait when the
1704  dictionary latch is reserved in X mode. Dictionary
1705  transactions should only acquire locks on dictionary
1706  tables, not other tables. All access to dictionary
1707  tables should be covered by dictionary
1708  transactions. */
1709  ut_print_timestamp(stderr);
1710  fputs(" InnoDB: Error: dict X latch held in "
1711  "srv_suspend_mysql_thread\n", stderr);
1712  /* This should never occur. This incorrect handling
1713  was added in the early development of
1714  ha_innobase::add_index() in InnoDB Plugin 1.0. */
1715  /* Release fast index creation latch */
1717  break;
1718  }
1719 
1720  ut_a(trx->dict_operation_lock_mode == 0);
1721 
1722  /* Suspend this thread and wait for the event. */
1723 
1724  os_event_wait(event);
1725 
1726  /* After resuming, reacquire the data dictionary latch if
1727  necessary. */
1728 
1729  switch (had_dict_lock) {
1730  case RW_S_LATCH:
1731  row_mysql_freeze_data_dictionary(trx);
1732  break;
1733  case RW_X_LATCH:
1734  /* This should never occur. This incorrect handling
1735  was added in the early development of
1736  ha_innobase::add_index() in InnoDB Plugin 1.0. */
1737  row_mysql_lock_data_dictionary(trx);
1738  break;
1739  }
1740 
1741  if (was_declared_inside_innodb) {
1742 
1743  /* Return back inside InnoDB */
1744 
1746  }
1747 
1748  mutex_enter(&kernel_mutex);
1749 
1750  /* Release the slot for others to use */
1751 
1752  slot->in_use = FALSE;
1753 
1754  wait_time = ut_difftime(ut_time(), slot->suspend_time);
1755 
1756  if (thr->lock_state == QUE_THR_LOCK_ROW) {
1757  if (ut_usectime(&sec, &ms) == -1) {
1758  finish_time = -1;
1759  } else {
1760  finish_time = (ib_int64_t) sec * 1000000 + ms;
1761  }
1762 
1763  diff_time = (ulint) (finish_time - start_time);
1764 
1765  srv_n_lock_wait_current_count--;
1766  srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
1767  if (diff_time > srv_n_lock_max_wait_time &&
1768  /* only update the variable if we successfully
1769  retrieved the start and finish times. See Bug#36819. */
1770  start_time != -1 && finish_time != -1) {
1771  srv_n_lock_max_wait_time = diff_time;
1772  }
1773 
1774  /* Record the lock wait time for this thread */
1775  thd_set_lock_wait_time(trx->mysql_thd, diff_time);
1776  }
1777 
1778  if (trx->was_chosen_as_deadlock_victim) {
1779 
1780  trx->error_state = DB_DEADLOCK;
1781  trx->was_chosen_as_deadlock_victim = FALSE;
1782  }
1783 
1784  mutex_exit(&kernel_mutex);
1785 
1786  /* InnoDB system transactions (such as the purge, and
1787  incomplete transactions that are being rolled back after crash
1788  recovery) will use the global value of
1789  innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
1790  lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
1791 
1792  if (lock_wait_timeout < 100000000
1793  && wait_time > (double) lock_wait_timeout) {
1794 
1795  trx->error_state = DB_LOCK_WAIT_TIMEOUT;
1796  }
1797 
1798  if (trx_is_interrupted(trx)) {
1799 
1800  trx->error_state = DB_INTERRUPTED;
1801  }
1802 }
1803 
1804 /********************************************************************/
1807 UNIV_INTERN
1808 void
1810 /*==================================*/
1811  que_thr_t* thr)
1813 {
1814  srv_slot_t* slot;
1815  ulint i;
1816 
1817  ut_ad(mutex_own(&kernel_mutex));
1818 
1819  for (i = 0; i < OS_THREAD_MAX_N; i++) {
1820 
1821  slot = srv_mysql_table + i;
1822 
1823  if (slot->in_use && slot->thr == thr) {
1824  /* Found */
1825 
1826  os_event_set(slot->event);
1827 
1828  return;
1829  }
1830  }
1831 
1832  /* not found */
1833 }
1834 
1835 /******************************************************************/
1837 static
1838 void
1839 srv_refresh_innodb_monitor_stats(void)
1840 /*==================================*/
1841 {
1842  mutex_enter(&srv_innodb_monitor_mutex);
1843 
1844  srv_last_monitor_time = time(NULL);
1845 
1847 
1850 
1852 
1854 
1855  srv_n_rows_inserted_old = srv_n_rows_inserted;
1856  srv_n_rows_updated_old = srv_n_rows_updated;
1857  srv_n_rows_deleted_old = srv_n_rows_deleted;
1858  srv_n_rows_read_old = srv_n_rows_read;
1859 
1860  mutex_exit(&srv_innodb_monitor_mutex);
1861 }
1862 
1863 /******************************************************************/
1867 UNIV_INTERN
1868 ibool
1870 /*======================*/
1871  FILE* file,
1872  ibool nowait,
1873  ulint* trx_start,
1875  ulint* trx_end)
1877 {
1878  double time_elapsed;
1879  time_t current_time;
1880  ulint n_reserved;
1881  ibool ret;
1882 
1883  mutex_enter(&srv_innodb_monitor_mutex);
1884 
1885  current_time = time(NULL);
1886 
1887  /* We add 0.001 seconds to time_elapsed to prevent division
1888  by zero if two users happen to call SHOW INNODB STATUS at the same
1889  time */
1890 
1891  time_elapsed = difftime(current_time, srv_last_monitor_time)
1892  + 0.001;
1893 
1894  srv_last_monitor_time = time(NULL);
1895 
1896  fputs("\n=====================================\n", file);
1897 
1898  ut_print_timestamp(file);
1899  fprintf(file,
1900  " INNODB MONITOR OUTPUT\n"
1901  "=====================================\n"
1902  "Per second averages calculated from the last %lu seconds\n",
1903  (ulong)time_elapsed);
1904 
1905  fputs("-----------------\n"
1906  "BACKGROUND THREAD\n"
1907  "-----------------\n", file);
1908  srv_print_master_thread_info(file);
1909 
1910  fputs("----------\n"
1911  "SEMAPHORES\n"
1912  "----------\n", file);
1913  sync_print(file);
1914 
1915  /* Conceptually, srv_innodb_monitor_mutex has a very high latching
1916  order level in sync0sync.h, while dict_foreign_err_mutex has a very
1917  low level 135. Therefore we can reserve the latter mutex here without
1918  a danger of a deadlock of threads. */
1919 
1920  mutex_enter(&dict_foreign_err_mutex);
1921 
1922  if (ftell(dict_foreign_err_file) != 0L) {
1923  fputs("------------------------\n"
1924  "LATEST FOREIGN KEY ERROR\n"
1925  "------------------------\n", file);
1926  ut_copy_file(file, dict_foreign_err_file);
1927  }
1928 
1929  mutex_exit(&dict_foreign_err_mutex);
1930 
1931  /* Only if lock_print_info_summary proceeds correctly,
1932  before we call the lock_print_info_all_transactions
1933  to print all the lock information. */
1934  ret = lock_print_info_summary(file, nowait);
1935 
1936  if (ret) {
1937  if (trx_start) {
1938  long t = ftell(file);
1939  if (t < 0) {
1940  *trx_start = ULINT_UNDEFINED;
1941  } else {
1942  *trx_start = (ulint) t;
1943  }
1944  }
1946  if (trx_end) {
1947  long t = ftell(file);
1948  if (t < 0) {
1949  *trx_end = ULINT_UNDEFINED;
1950  } else {
1951  *trx_end = (ulint) t;
1952  }
1953  }
1954  }
1955 
1956  fputs("--------\n"
1957  "FILE I/O\n"
1958  "--------\n", file);
1959  os_aio_print(file);
1960 
1961  fputs("-------------------------------------\n"
1962  "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
1963  "-------------------------------------\n", file);
1964  ibuf_print(file);
1965 
1967 
1968  fprintf(file,
1969  "%.2f hash searches/s, %.2f non-hash searches/s\n",
1971  / time_elapsed,
1973  / time_elapsed);
1976 
1977  fputs("---\n"
1978  "LOG\n"
1979  "---\n", file);
1980  log_print(file);
1981 
1982  fputs("----------------------\n"
1983  "BUFFER POOL AND MEMORY\n"
1984  "----------------------\n", file);
1985  fprintf(file,
1986  "Total memory allocated " ULINTPF
1987  "; in additional pool allocated " ULINTPF "\n",
1990  fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
1991  dict_sys->size);
1992 
1993  buf_print_io(file);
1994 
1995  fputs("--------------\n"
1996  "ROW OPERATIONS\n"
1997  "--------------\n", file);
1998  fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
1999  (long) srv_conc_n_threads,
2000  (ulong) srv_conc_n_waiting_threads);
2001 
2002  fprintf(file, "%lu read views open inside InnoDB\n",
2003  static_cast<ulint>(UT_LIST_GET_LEN(trx_sys->view_list)));
2004 
2005  n_reserved = fil_space_get_n_reserved_extents(0);
2006  if (n_reserved > 0) {
2007  fprintf(file,
2008  "%lu tablespace extents now reserved for"
2009  " B-tree split operations\n",
2010  (ulong) n_reserved);
2011  }
2012 
2013 #ifdef UNIV_LINUX
2014  fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
2015  (ulong) srv_main_thread_process_no,
2016  (ulong) srv_main_thread_id,
2017  srv_main_thread_op_info);
2018 #else
2019  fprintf(file, "Main thread id %lu, state: %s\n",
2020  (ulong) srv_main_thread_id,
2021  srv_main_thread_op_info);
2022 #endif
2023  fprintf(file,
2024  "Number of rows inserted " ULINTPF
2025  ", updated " ULINTPF ", deleted " ULINTPF
2026  ", read " ULINTPF "\n",
2027  srv_n_rows_inserted,
2028  srv_n_rows_updated,
2029  srv_n_rows_deleted,
2030  srv_n_rows_read);
2031  fprintf(file,
2032  "%.2f inserts/s, %.2f updates/s,"
2033  " %.2f deletes/s, %.2f reads/s\n",
2034  (srv_n_rows_inserted - srv_n_rows_inserted_old)
2035  / time_elapsed,
2036  (srv_n_rows_updated - srv_n_rows_updated_old)
2037  / time_elapsed,
2038  (srv_n_rows_deleted - srv_n_rows_deleted_old)
2039  / time_elapsed,
2040  (srv_n_rows_read - srv_n_rows_read_old)
2041  / time_elapsed);
2042 
2043  srv_n_rows_inserted_old = srv_n_rows_inserted;
2044  srv_n_rows_updated_old = srv_n_rows_updated;
2045  srv_n_rows_deleted_old = srv_n_rows_deleted;
2046  srv_n_rows_read_old = srv_n_rows_read;
2047 
2048  fputs("----------------------------\n"
2049  "END OF INNODB MONITOR OUTPUT\n"
2050  "============================\n", file);
2051  mutex_exit(&srv_innodb_monitor_mutex);
2052  fflush(file);
2053 
2054  return(ret);
2055 }
2056 
2057 /******************************************************************/
2059 UNIV_INTERN
2060 void
2062 /*==========================*/
2063 {
2064  buf_pool_stat_t stat;
2065  ulint LRU_len;
2066  ulint free_len;
2067  ulint flush_list_len;
2068 
2069  buf_get_total_stat(&stat);
2070  buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
2071 
2072  mutex_enter(&srv_innodb_monitor_mutex);
2073 
2081  export_vars.innodb_data_fsyncs = os_n_fsyncs;
2082  export_vars.innodb_data_read = srv_data_read;
2083  export_vars.innodb_data_reads = os_n_file_reads;
2084  export_vars.innodb_data_writes = os_n_file_writes;
2085  export_vars.innodb_data_written = srv_data_written;
2088  = srv_buf_pool_write_requests;
2089  export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
2090  export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
2093  = stat.n_ra_pages_read;
2095  = stat.n_ra_pages_evicted;
2099 #ifdef UNIV_DEBUG
2100  export_vars.innodb_buffer_pool_pages_latched
2101  = buf_get_latched_pages_number();
2102 #endif /* UNIV_DEBUG */
2104 
2106  = buf_pool_get_n_pages() - LRU_len - free_len;
2107 #ifdef HAVE_ATOMIC_BUILTINS
2109 #else
2111 #endif
2112  export_vars.innodb_page_size = UNIV_PAGE_SIZE;
2113  export_vars.innodb_log_waits = srv_log_waits;
2114  export_vars.innodb_os_log_written = srv_os_log_written;
2117  export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
2118  export_vars.innodb_log_write_requests = srv_log_write_requests;
2119  export_vars.innodb_log_writes = srv_log_writes;
2120  export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
2121  export_vars.innodb_dblwr_writes = srv_dblwr_writes;
2125  export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
2127  = srv_n_lock_wait_current_count;
2128  export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
2129  if (srv_n_lock_wait_count > 0) {
2131  (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
2132  } else {
2134  }
2136  = srv_n_lock_max_wait_time / 1000;
2137  export_vars.innodb_rows_read = srv_n_rows_read;
2138  export_vars.innodb_rows_inserted = srv_n_rows_inserted;
2139  export_vars.innodb_rows_updated = srv_n_rows_updated;
2140  export_vars.innodb_rows_deleted = srv_n_rows_deleted;
2141  export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
2142 
2143  mutex_exit(&srv_innodb_monitor_mutex);
2144 }
2145 
2146 /*********************************************************************/
2149 UNIV_INTERN
2150 os_thread_ret_t
2152 /*===============*/
2153  void* /*arg __attribute__((unused))*/)
2156 {
2157  ib_int64_t sig_count;
2158  double time_elapsed;
2159  time_t current_time;
2160  time_t last_table_monitor_time;
2161  time_t last_tablespace_monitor_time;
2162  time_t last_monitor_time;
2163  ulint mutex_skipped;
2164  ibool last_srv_print_monitor;
2165 
2166 #ifdef UNIV_DEBUG_THREAD_CREATION
2167  fprintf(stderr, "Lock timeout thread starts, id %lu\n",
2169 #endif
2170 
2171 #ifdef UNIV_PFS_THREAD
2172  pfs_register_thread(srv_monitor_thread_key);
2173 #endif
2174 
2175  srv_last_monitor_time = ut_time();
2176  last_table_monitor_time = ut_time();
2177  last_tablespace_monitor_time = ut_time();
2178  last_monitor_time = ut_time();
2179  mutex_skipped = 0;
2180  last_srv_print_monitor = srv_print_innodb_monitor;
2181 loop:
2182  srv_monitor_active = TRUE;
2183 
2184  /* Wake up every 5 seconds to see if we need to print
2185  monitor information or if signalled at shutdown. */
2186 
2187  sig_count = os_event_reset(srv_monitor_event);
2188 
2189  os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
2190 
2191  current_time = ut_time();
2192 
2193  time_elapsed = difftime(current_time, last_monitor_time);
2194 
2195  if (time_elapsed > 15) {
2196  last_monitor_time = ut_time();
2197 
2198  if (srv_print_innodb_monitor) {
2199  /* Reset mutex_skipped counter everytime
2200  srv_print_innodb_monitor changes. This is to
2201  ensure we will not be blocked by kernel_mutex
2202  for short duration information printing,
2203  such as requested by sync_array_print_long_waits() */
2204  if (!last_srv_print_monitor) {
2205  mutex_skipped = 0;
2206  last_srv_print_monitor = TRUE;
2207  }
2208 
2209  if (!srv_printf_innodb_monitor(stderr,
2210  MUTEX_NOWAIT(mutex_skipped),
2211  NULL, NULL)) {
2212  mutex_skipped++;
2213  } else {
2214  /* Reset the counter */
2215  mutex_skipped = 0;
2216  }
2217  } else {
2218  last_srv_print_monitor = FALSE;
2219  }
2220 
2221 
2222  if (srv_innodb_status) {
2223  mutex_enter(&srv_monitor_file_mutex);
2224  rewind(srv_monitor_file);
2225  if (!srv_printf_innodb_monitor(srv_monitor_file,
2226  MUTEX_NOWAIT(mutex_skipped),
2227  NULL, NULL)) {
2228  mutex_skipped++;
2229  } else {
2230  mutex_skipped = 0;
2231  }
2232 
2233  os_file_set_eof(srv_monitor_file);
2234  mutex_exit(&srv_monitor_file_mutex);
2235  }
2236 
2237  if (srv_print_innodb_tablespace_monitor
2238  && difftime(current_time,
2239  last_tablespace_monitor_time) > 60) {
2240  last_tablespace_monitor_time = ut_time();
2241 
2242  fputs("========================"
2243  "========================\n",
2244  stderr);
2245 
2246  ut_print_timestamp(stderr);
2247 
2248  fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
2249  "========================"
2250  "========================\n",
2251  stderr);
2252 
2253  fsp_print(0);
2254  fputs("Validating tablespace\n", stderr);
2255  fsp_validate(0);
2256  fputs("Validation ok\n"
2257  "---------------------------------------\n"
2258  "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
2259  "=======================================\n",
2260  stderr);
2261  }
2262 
2263  if (srv_print_innodb_table_monitor
2264  && difftime(current_time, last_table_monitor_time) > 60) {
2265 
2266  last_table_monitor_time = ut_time();
2267 
2268  fputs("===========================================\n",
2269  stderr);
2270 
2271  ut_print_timestamp(stderr);
2272 
2273  fputs(" INNODB TABLE MONITOR OUTPUT\n"
2274  "===========================================\n",
2275  stderr);
2276  dict_print();
2277 
2278  fputs("-----------------------------------\n"
2279  "END OF INNODB TABLE MONITOR OUTPUT\n"
2280  "==================================\n",
2281  stderr);
2282  }
2283  }
2284 
2286  goto exit_func;
2287  }
2288 
2289  if (srv_print_innodb_monitor
2290  || srv_print_innodb_lock_monitor
2291  || srv_print_innodb_tablespace_monitor
2292  || srv_print_innodb_table_monitor) {
2293  goto loop;
2294  }
2295 
2296  srv_monitor_active = FALSE;
2297 
2298  goto loop;
2299 
2300 exit_func:
2301  srv_monitor_active = FALSE;
2302 
2303  /* We count the number of threads in os_thread_exit(). A created
2304  thread should always use that to exit and not use return() to exit. */
2305 
2306  os_thread_exit(NULL);
2307 
2308  OS_THREAD_DUMMY_RETURN;
2309 }
2310 
2311 /*********************************************************************/
2314 UNIV_INTERN
2315 os_thread_ret_t
2317 /*====================*/
2318  void* /*arg __attribute__((unused))*/)
2319  /* in: a dummy parameter required by
2320  os_thread_create */
2321 {
2322  srv_slot_t* slot;
2323  ibool some_waits;
2324  double wait_time;
2325  ulint i;
2326  ib_int64_t sig_count;
2327 
2328 #ifdef UNIV_PFS_THREAD
2329  pfs_register_thread(srv_lock_timeout_thread_key);
2330 #endif
2331 
2332 loop:
2333 
2334  /* When someone is waiting for a lock, we wake up every second
2335  and check if a timeout has passed for a lock wait */
2336 
2337  sig_count = os_event_reset(srv_timeout_event);
2338 
2339  os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
2340 
2341  srv_lock_timeout_active = TRUE;
2342 
2343  mutex_enter(&kernel_mutex);
2344 
2345  some_waits = FALSE;
2346 
2347  /* Check of all slots if a thread is waiting there, and if it
2348  has exceeded the time limit */
2349 
2350  for (i = 0; i < OS_THREAD_MAX_N; i++) {
2351 
2352  slot = srv_mysql_table + i;
2353 
2354  if (slot->in_use) {
2355  trx_t* trx;
2356  ulong lock_wait_timeout;
2357 
2358  some_waits = TRUE;
2359 
2360  wait_time = ut_difftime(ut_time(), slot->suspend_time);
2361 
2362  trx = thr_get_trx(slot->thr);
2363  lock_wait_timeout = thd_lock_wait_timeout(
2364  trx->mysql_thd);
2365 
2366  if (trx_is_interrupted(trx)
2367  || (lock_wait_timeout < 100000000
2368  && (wait_time > (double) lock_wait_timeout
2369  || wait_time < 0))) {
2370 
2371  /* Timeout exceeded or a wrap-around in system
2372  time counter: cancel the lock request queued
2373  by the transaction and release possible
2374  other transactions waiting behind; it is
2375  possible that the lock has already been
2376  granted: in that case do nothing */
2377 
2378  if (trx->wait_lock) {
2380  trx->wait_lock);
2381  }
2382  }
2383  }
2384  }
2385 
2386  os_event_reset(srv_lock_timeout_thread_event);
2387 
2388  mutex_exit(&kernel_mutex);
2389 
2391  goto exit_func;
2392  }
2393 
2394  if (some_waits) {
2395  goto loop;
2396  }
2397 
2398  srv_lock_timeout_active = FALSE;
2399 
2400 #if 0
2401  /* The following synchronisation is disabled, since
2402  the InnoDB monitor output is to be updated every 15 seconds. */
2403  os_event_wait(srv_lock_timeout_thread_event);
2404 #endif
2405  goto loop;
2406 
2407 exit_func:
2408  srv_lock_timeout_active = FALSE;
2409 
2410  /* We count the number of threads in os_thread_exit(). A created
2411  thread should always use that to exit and not use return() to exit. */
2412 
2413  os_thread_exit(NULL);
2414 
2415  OS_THREAD_DUMMY_RETURN;
2416 }
2417 
2418 /*********************************************************************/
2422 UNIV_INTERN
2423 os_thread_ret_t
2425 /*=====================*/
2426  void* /*arg __attribute__((unused))*/)
2429 {
2430  /* number of successive fatal timeouts observed */
2431  ulint fatal_cnt = 0;
2432  ib_uint64_t old_lsn;
2433  ib_uint64_t new_lsn;
2434  ib_int64_t sig_count;
2435  /* longest waiting thread for a semaphore */
2437  os_thread_id_t old_waiter = waiter;
2438  /* the semaphore that is being waited for */
2439  const void* sema = NULL;
2440  const void* old_sema = NULL;
2441 
2442  old_lsn = srv_start_lsn;
2443 
2444 #ifdef UNIV_DEBUG_THREAD_CREATION
2445  fprintf(stderr, "Error monitor thread starts, id %lu\n",
2447 #endif
2448 
2449 #ifdef UNIV_PFS_THREAD
2450  pfs_register_thread(srv_error_monitor_thread_key);
2451 #endif
2452 
2453 loop:
2454  srv_error_monitor_active = TRUE;
2455 
2456  /* Try to track a strange bug reported by Harald Fuchs and others,
2457  where the lsn seems to decrease at times */
2458 
2459  new_lsn = log_get_lsn();
2460 
2461  if (new_lsn < old_lsn) {
2462  drizzled::errmsg_printf(drizzled::error::INFO,
2463  "InnoDB: Error: old log sequence number %"PRIu64" was greater than the new log sequence number %"PRIu64"!"
2464  "InnoDB: Please submit a bug report to http://bugs.launchpad.net/drizzle",
2465  old_lsn, new_lsn);
2466  }
2467 
2468  old_lsn = new_lsn;
2469 
2470  if (difftime(time(NULL), srv_last_monitor_time) > 60) {
2471  /* We referesh InnoDB Monitor values so that averages are
2472  printed from at most 60 last seconds */
2473 
2474  srv_refresh_innodb_monitor_stats();
2475  }
2476 
2477  /* Update the statistics collected for deciding LRU
2478  eviction policy. */
2480 
2481  /* Update the statistics collected for flush rate policy. */
2482  buf_flush_stat_update();
2483 
2484  /* In case mutex_exit is not a memory barrier, it is
2485  theoretically possible some threads are left waiting though
2486  the semaphore is already released. Wake up those threads: */
2487 
2489 
2490  if (sync_array_print_long_waits(&waiter, &sema)
2491  && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
2492  fatal_cnt++;
2493  if (fatal_cnt > 10) {
2494 
2495  fprintf(stderr,
2496  "InnoDB: Error: semaphore wait has lasted"
2497  " > %lu seconds\n"
2498  "InnoDB: We intentionally crash the server,"
2499  " because it appears to be hung.\n",
2500  (ulong) srv_fatal_semaphore_wait_threshold);
2501 
2502  ut_error;
2503  }
2504  } else {
2505  fatal_cnt = 0;
2506  old_waiter = waiter;
2507  old_sema = sema;
2508  }
2509 
2510  /* Flush stderr so that a database user gets the output
2511  to possible MySQL error file */
2512 
2513  fflush(stderr);
2514 
2515  sig_count = os_event_reset(srv_error_event);
2516 
2517  os_event_wait_time_low(srv_error_event, 1000000, sig_count);
2518 
2520 
2521  goto loop;
2522  }
2523 
2524  srv_error_monitor_active = FALSE;
2525 
2526  /* We count the number of threads in os_thread_exit(). A created
2527  thread should always use that to exit and not use return() to exit. */
2528 
2529  os_thread_exit(NULL);
2530 
2531  OS_THREAD_DUMMY_RETURN;
2532 }
2533 
2534 /*********************************************************************/
2538 UNIV_INTERN
2539 os_thread_ret_t
2541 /*====================*/
2542  void* /*arg __attribute__((unused))*/)
2545 {
2546  uint auto_lru_dump;
2547  time_t last_dump_time;
2548  time_t time_elapsed;
2549 
2550 #ifdef UNIV_DEBUG_THREAD_CREATION
2551  fprintf(stderr, "The LRU dump/restore thread has started, id %lu\n",
2553 #endif
2554 
2555  if (srv_auto_lru_dump)
2557 
2558  last_dump_time = time(NULL);
2559 
2560 loop:
2561  os_thread_sleep(5000000);
2562 
2564  goto exit_func;
2565  }
2566 
2567  time_elapsed = time(NULL) - last_dump_time;
2568  auto_lru_dump = srv_auto_lru_dump;
2569  if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
2570  last_dump_time = time(NULL);
2572  }
2573 
2574  goto loop;
2575 exit_func:
2576  /* We count the number of threads in os_thread_exit(). A created
2577  thread should always use that to exit and not use return() to exit. */
2578 
2579  os_thread_exit(NULL);
2580 
2581  OS_THREAD_DUMMY_RETURN;
2582 }
2583 
2584 /**********************************************************************/
2587 UNIV_INTERN
2588 ibool
2590 /*=====================================*/
2591 {
2592  ulint i;
2593  ibool ret = FALSE;
2594 
2595  mutex_enter(&kernel_mutex);
2596 
2597  for (i = 0; i <= SRV_MASTER; ++i) {
2598  if (srv_n_threads_active[i] != 0) {
2599  ret = TRUE;
2600  break;
2601  }
2602  }
2603 
2604  mutex_exit(&kernel_mutex);
2605 
2606  return(ret);
2607 }
2608 
2609 /*******************************************************************/
2615 UNIV_INTERN
2616 void
2618 /*===============================*/
2619 {
2620  srv_activity_count++;
2621 
2622  if (srv_n_threads_active[SRV_MASTER] == 0) {
2623 
2624  mutex_enter(&kernel_mutex);
2625 
2627 
2628  mutex_exit(&kernel_mutex);
2629  }
2630 }
2631 
2632 /*******************************************************************/
2638 UNIV_INTERN
2639 void
2641 /*=====================================*/
2642 {
2643  ut_ad(!mutex_own(&kernel_mutex));
2644 
2645  if (srv_n_purge_threads > 0
2646  && srv_n_threads_active[SRV_WORKER] == 0) {
2647 
2648  mutex_enter(&kernel_mutex);
2649 
2651 
2652  mutex_exit(&kernel_mutex);
2653  }
2654 }
2655 
2656 /*******************************************************************/
2658 UNIV_INTERN
2659 void
2661 /*========================*/
2662 {
2663  srv_activity_count++;
2664 
2665  mutex_enter(&kernel_mutex);
2666 
2668 
2669  mutex_exit(&kernel_mutex);
2670 }
2671 
2672 /*******************************************************************/
2674 UNIV_INTERN
2675 void
2677 /*=======================*/
2678 {
2679  ut_ad(!mutex_own(&kernel_mutex));
2680 
2681  if (srv_n_purge_threads > 0) {
2682 
2683  mutex_enter(&kernel_mutex);
2684 
2686 
2687  mutex_exit(&kernel_mutex);
2688  }
2689 }
2690 
2691 /**********************************************************************
2692 The master thread is tasked to ensure that flush of log file happens
2693 once every second in the background. This is to ensure that not more
2694 than one second of trxs are lost in case of crash when
2695 innodb_flush_logs_at_trx_commit != 1 */
2696 static
2697 void
2698 srv_sync_log_buffer_in_background(void)
2699 /*===================================*/
2700 {
2701  time_t current_time = time(NULL);
2702 
2703  srv_main_thread_op_info = "flushing log";
2704  if (difftime(current_time, srv_last_log_flush_time) >= 1) {
2706  srv_last_log_flush_time = current_time;
2707  srv_log_writes_and_flush++;
2708  }
2709 }
2710 
2711 /********************************************************************/
2714 static
2715 void
2716 srv_master_do_purge(void)
2717 /*=====================*/
2718 {
2719  ulint n_pages_purged = 0;
2720 
2721  ut_ad(!mutex_own(&kernel_mutex));
2722 
2723  ut_a(srv_n_purge_threads == 0);
2724 
2725  do {
2726  /* Check for shutdown and change in purge config. */
2727  if (srv_fast_shutdown && srv_shutdown_state > 0) {
2728  /* Nothing to purge. */
2729  n_pages_purged = 0;
2730  } else {
2731  n_pages_purged = trx_purge(srv_purge_batch_size);
2732  }
2733 
2734  srv_sync_log_buffer_in_background();
2735 
2736  } while (n_pages_purged > 0);
2737 }
2738 
2739 /*********************************************************************/
2742 UNIV_INTERN
2743 os_thread_ret_t
2745 /*==============*/
2746  void* /*arg __attribute__((unused))*/)
2749 {
2750  buf_pool_stat_t buf_stat;
2751  srv_slot_t* slot;
2752  ulint old_activity_count;
2753  ulint n_pages_purged = 0;
2754  ulint n_bytes_merged;
2755  ulint n_pages_flushed;
2756  uint32_t n_pages_flushed_prev = 0;
2757  ulint n_bytes_archived;
2758  ulint n_tables_to_drop;
2759  ulint n_ios;
2760  ulint n_ios_old;
2761  ulint n_ios_very_old;
2762  ulint n_pend_ios;
2763  ulint next_itr_time;
2764  uint32_t prev_adaptive_flushing_method = ULINT32_UNDEFINED;
2765  uint32_t inner_loop = 0;
2766  bool skip_sleep = false;
2767  ulint i;
2768 
2769  struct t_prev_flush_info_struct {
2770  uint32_t count;
2771  uint32_t space;
2772  uint32_t offset;
2773  uint64_t oldest_modification;
2774  } prev_flush_info[MAX_BUFFER_POOLS];
2775 
2776  uint64_t lsn_old;
2777  uint64_t oldest_lsn;
2778 
2779 #ifdef UNIV_DEBUG_THREAD_CREATION
2780  fprintf(stderr, "Master thread starts, id %lu\n",
2782 #endif
2783 
2784 #ifdef UNIV_PFS_THREAD
2785  pfs_register_thread(srv_master_thread_key);
2786 #endif
2787 
2788  srv_main_thread_process_no = os_proc_get_number();
2789  srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
2790 
2791  mutex_enter(&kernel_mutex);
2792 
2793  slot = srv_table_reserve_slot(SRV_MASTER);
2794 
2795  srv_n_threads_active[SRV_MASTER]++;
2796 
2797  mutex_exit(&kernel_mutex);
2798 
2799  mutex_enter(&(log_sys->mutex));
2800  lsn_old = log_sys->lsn;
2801  mutex_exit(&(log_sys->mutex));
2802 loop:
2803  /*****************************************************************/
2804  /* ---- When there is database activity by users, we cycle in this
2805  loop */
2806 
2807  srv_main_thread_op_info = "reserving kernel mutex";
2808 
2809  buf_get_total_stat(&buf_stat);
2810  n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
2811  + buf_stat.n_pages_written;
2812  mutex_enter(&kernel_mutex);
2813 
2814  /* Store the user activity counter at the start of this loop */
2815  old_activity_count = srv_activity_count;
2816 
2817  mutex_exit(&kernel_mutex);
2818 
2819  if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
2820 
2821  goto suspend_thread;
2822  }
2823 
2824  /* ---- We run the following loop approximately once per second
2825  when there is database activity */
2826 
2827  srv_last_log_flush_time = time(NULL);
2828 
2829  /* Sleep for 1 second on entrying the for loop below the first time. */
2830  next_itr_time = ut_time_ms() + 1000;
2831 
2832  skip_sleep = false;
2833 
2834  for (i = 0; i < 10; i++) {
2835  ulint cur_time = ut_time_ms();
2836 
2837  n_pages_flushed = 0;
2838 
2839  /* ALTER TABLE in MySQL requires on Unix that the table handler
2840  can drop tables lazily after there no longer are SELECT
2841  queries to them. */
2842 
2843  srv_main_thread_op_info = "doing background drop tables";
2844 
2846 
2847  srv_main_thread_op_info = "";
2848 
2849  if (srv_fast_shutdown && srv_shutdown_state > 0) {
2850 
2851  goto background_loop;
2852  }
2853 
2854  buf_get_total_stat(&buf_stat);
2855 
2856  n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
2857  + buf_stat.n_pages_written;
2858 
2859  srv_main_thread_op_info = "sleeping";
2860  srv_main_1_second_loops++;
2861 
2862  if (skip_sleep == false) {
2863  if (next_itr_time > cur_time
2865 
2866  /* Get sleep interval in micro seconds. We use
2867  ut_min() to avoid long sleep in case of
2868  wrap around. */
2869  os_thread_sleep(ut_min(1000000,
2870  (next_itr_time - cur_time)
2871  * 1000));
2872  srv_main_sleeps++;
2873 
2874  /*
2875  TODO: tracing code unported to Drizzle
2876  mutex_enter(&(log_sys->mutex));
2877  oldest_lsn = buf_pool_get_oldest_modification();
2878  ib_uint64_t lsn = log_sys->lsn;
2879  mutex_exit(&(log_sys->mutex));
2880 
2881  if(oldest_lsn)
2882  fprintf(stderr,
2883  "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
2884  (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
2885  lsn - lsn_old);
2886  */
2887 
2888  }
2889 
2890  /* Each iteration should happen at 1 second interval. */
2891  next_itr_time = ut_time_ms() + 1000;
2892  }
2893 
2894  skip_sleep = false;
2895 
2896  /* Flush logs if needed */
2897  srv_sync_log_buffer_in_background();
2898 
2899  srv_main_thread_op_info = "making checkpoint";
2900  log_free_check();
2901 
2902  /* If i/os during one second sleep were less than 5% of
2903  capacity, we assume that there is free disk i/o capacity
2904  available, and it makes sense to do an insert buffer merge. */
2905 
2906  buf_get_total_stat(&buf_stat);
2907  n_pend_ios = buf_get_n_pending_ios()
2908  + log_sys->n_pending_writes;
2909  n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
2910  + buf_stat.n_pages_written;
2911  if (n_pend_ios < SRV_PEND_IO_THRESHOLD
2912  && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
2913  srv_main_thread_op_info = "doing insert buffer merge";
2914  ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
2915 
2916  /* Flush logs if needed */
2917  srv_sync_log_buffer_in_background();
2918  }
2919 
2920  if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
2921  > srv_max_buf_pool_modified_pct)) {
2922 
2923  /* Try to keep the number of modified pages in the
2924  buffer pool under the limit wished by the user */
2925 
2926  srv_main_thread_op_info =
2927  "flushing buffer pool pages";
2928  n_pages_flushed = buf_flush_list(
2929  PCT_IO(100), IB_ULONGLONG_MAX);
2930 
2931  mutex_enter(&(log_sys->mutex));
2932  lsn_old = log_sys->lsn;
2933  mutex_exit(&(log_sys->mutex));
2934  prev_adaptive_flushing_method = ULINT32_UNDEFINED;
2935  } else if (srv_adaptive_flushing
2936  && srv_adaptive_flushing_method == 0) {
2937 
2938  /* Try to keep the rate of flushing of dirty
2939  pages such that redo log generation does not
2940  produce bursts of IO at checkpoint time. */
2941  ulint n_flush = buf_flush_get_desired_flush_rate();
2942 
2943  if (n_flush) {
2944  srv_main_thread_op_info =
2945  "flushing buffer pool pages";
2946  n_flush = ut_min(PCT_IO(100), n_flush);
2947  n_pages_flushed =
2949  n_flush,
2950  IB_ULONGLONG_MAX);
2951  }
2952 
2953  mutex_enter(&(log_sys->mutex));
2954  lsn_old = log_sys->lsn;
2955  mutex_exit(&(log_sys->mutex));
2956  prev_adaptive_flushing_method = ULINT32_UNDEFINED;
2957  } else if (srv_adaptive_flushing
2958  && srv_adaptive_flushing_method == 1) {
2959 
2960  /* Try to keep modified age not to exceed
2961  max_checkpoint_age * 7/8 line */
2962 
2963  mutex_enter(&(log_sys->mutex));
2964 
2965  oldest_lsn = buf_pool_get_oldest_modification();
2966  if (oldest_lsn == 0) {
2967  lsn_old = log_sys->lsn;
2968  mutex_exit(&(log_sys->mutex));
2969 
2970  } else {
2971  if ((log_sys->lsn - oldest_lsn)
2972  > (log_sys->max_checkpoint_age)
2973  - ((log_sys->max_checkpoint_age) / 8)) {
2974  /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
2975  /* We should not flush from here. */
2976  lsn_old = log_sys->lsn;
2977  mutex_exit(&(log_sys->mutex));
2978  } else if ((log_sys->lsn - oldest_lsn)
2979  > (log_sys->max_checkpoint_age)/4) {
2980 
2981  /* defence line (max_checkpoint_age * 1/2) */
2982  uint64_t lsn = log_sys->lsn;
2983 
2984  uint64_t level, bpl;
2985  buf_page_t* bpage;
2986  ulint j;
2987 
2988  mutex_exit(&(log_sys->mutex));
2989 
2990  bpl = 0;
2991 
2992  for (j = 0; j < srv_buf_pool_instances; j++) {
2993  buf_pool_t* buf_pool;
2994  uint32_t n_blocks = 0;
2995 
2996  buf_pool = buf_pool_from_array(j);
2997 
2998  /* The scanning flush_list is optimistic here */
2999 
3000  level = 0;
3001  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3002 
3003  while (bpage != NULL) {
3004  uint64_t oldest_modification = bpage->oldest_modification;
3005  if (oldest_modification != 0) {
3006  level += log_sys->max_checkpoint_age
3007  - (lsn - oldest_modification);
3008  }
3009  bpage = UT_LIST_GET_NEXT(list, bpage);
3010  n_blocks++;
3011  }
3012 
3013  if (level) {
3014  bpl += ((ib_uint64_t) n_blocks * n_blocks
3015  * (lsn - lsn_old)) / level;
3016  }
3017 
3018  }
3019 
3020  if (!srv_use_doublewrite_buf) {
3021  /* flush is faster than when doublewrite */
3022  bpl = (bpl * 7) / 8;
3023  }
3024 
3025  if (bpl) {
3026 retry_flush_batch:
3027  n_pages_flushed = buf_flush_list(bpl,
3028  oldest_lsn + (lsn - lsn_old));
3029  if (n_pages_flushed == ULINT32_UNDEFINED) {
3030  os_thread_sleep(5000);
3031  goto retry_flush_batch;
3032  }
3033  }
3034 
3035  lsn_old = lsn;
3036  /*
3037  TODO: tracing code unported to Drizzle
3038  fprintf(stderr,
3039  "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
3040  (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
3041  lsn - lsn_old, bpl);
3042  */
3043  } else {
3044  lsn_old = log_sys->lsn;
3045  mutex_exit(&(log_sys->mutex));
3046  }
3047  }
3048  prev_adaptive_flushing_method = 1;
3049  } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
3050  buf_pool_t* buf_pool;
3051  buf_page_t* bpage;
3052  uint64_t lsn;
3053  ulint j;
3054 
3055  mutex_enter(&(log_sys->mutex));
3056  oldest_lsn = buf_pool_get_oldest_modification();
3057  lsn = log_sys->lsn;
3058  mutex_exit(&(log_sys->mutex));
3059 
3060  /* upper loop/sec. (x10) */
3061  next_itr_time -= 900; /* 1000 - 900 == 100 */
3062  inner_loop++;
3063  if (inner_loop < 10) {
3064  i--;
3065  } else {
3066  inner_loop = 0;
3067  }
3068 
3069  if (prev_adaptive_flushing_method == 2) {
3070  int32_t n_flush;
3071  int32_t blocks_sum = 0;
3072  uint32_t new_blocks_sum = 0;
3073  uint32_t flushed_blocks_sum = 0;
3074 
3075  /* prev_flush_info[j] should be the previous loop's */
3076  for (j = 0; j < srv_buf_pool_instances; j++) {
3077  int32_t blocks_num, new_blocks_num, flushed_blocks_num;
3078  bool found = false;
3079 
3080  buf_pool = buf_pool_from_array(j);
3081 
3082  blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
3083  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3084  new_blocks_num = 0;
3085 
3086  while (bpage != NULL) {
3087  if (prev_flush_info[j].space == bpage->space
3088  && prev_flush_info[j].offset == bpage->offset
3089  && prev_flush_info[j].oldest_modification
3090  == bpage->oldest_modification) {
3091  found = true;
3092  break;
3093  }
3094  bpage = UT_LIST_GET_NEXT(list, bpage);
3095  new_blocks_num++;
3096  }
3097  if (!found) {
3098  new_blocks_num = blocks_num;
3099  }
3100  flushed_blocks_num = new_blocks_num
3101  + prev_flush_info[j].count
3102  - blocks_num;
3103  if (flushed_blocks_num < 0) {
3104  flushed_blocks_num = 0;
3105  }
3106 
3107  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3108 
3109  prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
3110  if (bpage) {
3111  prev_flush_info[j].space = bpage->space;
3112  prev_flush_info[j].offset = bpage->offset;
3113  prev_flush_info[j].oldest_modification = bpage->oldest_modification;
3114  } else {
3115  prev_flush_info[j].space = 0;
3116  prev_flush_info[j].offset = 0;
3117  prev_flush_info[j].oldest_modification = 0;
3118  }
3119 
3120  new_blocks_sum += new_blocks_num;
3121  flushed_blocks_sum += flushed_blocks_num;
3122  blocks_sum += blocks_num;
3123  }
3124 
3125  n_flush = blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async;
3126  if (flushed_blocks_sum > n_pages_flushed_prev) {
3127  n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
3128  }
3129 
3130  if (n_flush > 0) {
3131  n_flush++;
3132  n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
3133  } else {
3134  n_pages_flushed = 0;
3135  }
3136  } else {
3137  /* store previous first pages of the flush_list */
3138  for (j = 0; j < srv_buf_pool_instances; j++) {
3139  buf_pool = buf_pool_from_array(j);
3140 
3141  bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
3142 
3143  prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
3144  if (bpage) {
3145  prev_flush_info[j].space = bpage->space;
3146  prev_flush_info[j].offset = bpage->offset;
3147  prev_flush_info[j].oldest_modification = bpage->oldest_modification;
3148  } else {
3149  prev_flush_info[j].space = 0;
3150  prev_flush_info[j].offset = 0;
3151  prev_flush_info[j].oldest_modification = 0;
3152  }
3153  }
3154  n_pages_flushed = 0;
3155  }
3156 
3157  lsn_old = lsn;
3158  prev_adaptive_flushing_method = 2;
3159  } else {
3160  mutex_enter(&(log_sys->mutex));
3161  lsn_old = log_sys->lsn;
3162  mutex_exit(&(log_sys->mutex));
3163  prev_adaptive_flushing_method = ULINT32_UNDEFINED;
3164  }
3165 
3166  if (n_pages_flushed == ULINT_UNDEFINED) {
3167  n_pages_flushed_prev = 0;
3168  } else {
3169  n_pages_flushed_prev = n_pages_flushed;
3170  }
3171 
3172  if (srv_activity_count == old_activity_count) {
3173 
3174  /* There is no user activity at the moment, go to
3175  the background loop */
3176 
3177  goto background_loop;
3178  }
3179  }
3180 
3181  /* ---- We perform the following code approximately once per
3182  10 seconds when there is database activity */
3183 
3184 #ifdef MEM_PERIODIC_CHECK
3185  /* Check magic numbers of every allocated mem block once in 10
3186  seconds */
3187  mem_validate_all_blocks();
3188 #endif
3189  /* If i/os during the 10 second period were less than 200% of
3190  capacity, we assume that there is free disk i/o capacity
3191  available, and it makes sense to flush srv_io_capacity pages.
3192 
3193  Note that this is done regardless of the fraction of dirty
3194  pages relative to the max requested by the user. The one second
3195  loop above requests writes for that case. The writes done here
3196  are not required, and may be disabled. */
3197 
3198  buf_get_total_stat(&buf_stat);
3199  n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
3200  n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
3201  + buf_stat.n_pages_written;
3202 
3203  srv_main_10_second_loops++;
3204  if (n_pend_ios < SRV_PEND_IO_THRESHOLD
3205  && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
3206 
3207  srv_main_thread_op_info = "flushing buffer pool pages";
3208  buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
3209 
3210  /* Flush logs if needed */
3211  srv_sync_log_buffer_in_background();
3212  }
3213 
3214  /* We run a batch of insert buffer merge every 10 seconds,
3215  even if the server were active */
3216 
3217  srv_main_thread_op_info = "doing insert buffer merge";
3218  ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
3219 
3220  /* Flush logs if needed */
3221  srv_sync_log_buffer_in_background();
3222 
3223  if (srv_n_purge_threads == 0) {
3224  srv_main_thread_op_info = "master purging";
3225 
3226  srv_master_do_purge();
3227 
3228  if (srv_fast_shutdown && srv_shutdown_state > 0) {
3229 
3230  goto background_loop;
3231  }
3232  }
3233 
3234  srv_main_thread_op_info = "flushing buffer pool pages";
3235 
3236  /* Flush a few oldest pages to make a new checkpoint younger */
3237 
3238  if (buf_get_modified_ratio_pct() > 70) {
3239 
3240  /* If there are lots of modified pages in the buffer pool
3241  (> 70 %), we assume we can afford reserving the disk(s) for
3242  the time it requires to flush 100 pages */
3243 
3244  n_pages_flushed = buf_flush_list(
3245  PCT_IO(100), IB_ULONGLONG_MAX);
3246  } else {
3247  /* Otherwise, we only flush a small number of pages so that
3248  we do not unnecessarily use much disk i/o capacity from
3249  other work */
3250 
3251  n_pages_flushed = buf_flush_list(
3252  PCT_IO(10), IB_ULONGLONG_MAX);
3253  }
3254 
3255  srv_main_thread_op_info = "making checkpoint";
3256 
3257  /* Make a new checkpoint about once in 10 seconds */
3258 
3259  log_checkpoint(TRUE, FALSE);
3260 
3261  srv_main_thread_op_info = "reserving kernel mutex";
3262 
3263  mutex_enter(&kernel_mutex);
3264 
3265  /* ---- When there is database activity, we jump from here back to
3266  the start of loop */
3267 
3268  if (srv_activity_count != old_activity_count) {
3269  mutex_exit(&kernel_mutex);
3270  goto loop;
3271  }
3272 
3273  mutex_exit(&kernel_mutex);
3274 
3275  /* If the database is quiet, we enter the background loop */
3276 
3277  /*****************************************************************/
3278 background_loop:
3279  /* ---- In this loop we run background operations when the server
3280  is quiet from user activity. Also in the case of a shutdown, we
3281  loop here, flushing the buffer pool to the data files. */
3282 
3283  /* The server has been quiet for a while: start running background
3284  operations */
3285  srv_main_background_loops++;
3286  srv_main_thread_op_info = "doing background drop tables";
3287 
3288  n_tables_to_drop = row_drop_tables_for_mysql_in_background();
3289 
3290  if (n_tables_to_drop > 0) {
3291  /* Do not monopolize the CPU even if there are tables waiting
3292  in the background drop queue. (It is essentially a bug if
3293  MySQL tries to drop a table while there are still open handles
3294  to it and we had to put it to the background drop queue.) */
3295 
3297  os_thread_sleep(100000);
3298  }
3299  }
3300 
3301  if (srv_n_purge_threads == 0) {
3302  srv_main_thread_op_info = "master purging";
3303 
3304  srv_master_do_purge();
3305  }
3306 
3307  srv_main_thread_op_info = "reserving kernel mutex";
3308 
3309  mutex_enter(&kernel_mutex);
3310  if (srv_activity_count != old_activity_count) {
3311  mutex_exit(&kernel_mutex);
3312  goto loop;
3313  }
3314  mutex_exit(&kernel_mutex);
3315 
3316  srv_main_thread_op_info = "doing insert buffer merge";
3317 
3318  if (srv_fast_shutdown && srv_shutdown_state > 0) {
3319  n_bytes_merged = 0;
3320  } else {
3321  /* This should do an amount of IO similar to the number of
3322  dirty pages that will be flushed in the call to
3323  buf_flush_list below. Otherwise, the system favors
3324  clean pages over cleanup throughput. */
3325  n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
3326  PCT_IBUF_IO(100));
3327  }
3328 
3329  srv_main_thread_op_info = "reserving kernel mutex";
3330 
3331  mutex_enter(&kernel_mutex);
3332  if (srv_activity_count != old_activity_count) {
3333  mutex_exit(&kernel_mutex);
3334  goto loop;
3335  }
3336  mutex_exit(&kernel_mutex);
3337 
3338 flush_loop:
3339  srv_main_thread_op_info = "flushing buffer pool pages";
3340  srv_main_flush_loops++;
3341  if (srv_fast_shutdown < 2) {
3342  n_pages_flushed = buf_flush_list(
3343  PCT_IO(100), IB_ULONGLONG_MAX);
3344  } else {
3345  /* In the fastest shutdown we do not flush the buffer pool
3346  to data files: we set n_pages_flushed to 0 artificially. */
3347 
3348  n_pages_flushed = 0;
3349  }
3350 
3351  srv_main_thread_op_info = "reserving kernel mutex";
3352 
3353  mutex_enter(&kernel_mutex);
3354  if (srv_activity_count != old_activity_count) {
3355  mutex_exit(&kernel_mutex);
3356  goto loop;
3357  }
3358  mutex_exit(&kernel_mutex);
3359 
3360  srv_main_thread_op_info = "waiting for buffer pool flush to end";
3362 
3363  /* Flush logs if needed */
3364  srv_sync_log_buffer_in_background();
3365 
3366  srv_main_thread_op_info = "making checkpoint";
3367 
3368  log_checkpoint(TRUE, FALSE);
3369 
3370  if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
3371 
3372  /* Try to keep the number of modified pages in the
3373  buffer pool under the limit wished by the user */
3374 
3375  goto flush_loop;
3376  }
3377 
3378  srv_main_thread_op_info = "reserving kernel mutex";
3379 
3380  mutex_enter(&kernel_mutex);
3381  if (srv_activity_count != old_activity_count) {
3382  mutex_exit(&kernel_mutex);
3383  goto loop;
3384  }
3385  mutex_exit(&kernel_mutex);
3386  /*
3387  srv_main_thread_op_info = "archiving log (if log archive is on)";
3388 
3389  log_archive_do(FALSE, &n_bytes_archived);
3390  */
3391  n_bytes_archived = 0;
3392 
3393  /* Keep looping in the background loop if still work to do */
3394 
3395  if (srv_fast_shutdown && srv_shutdown_state > 0) {
3396  if (n_tables_to_drop + n_pages_flushed
3397  + n_bytes_archived != 0) {
3398 
3399  /* If we are doing a fast shutdown (= the default)
3400  we do not do purge or insert buffer merge. But we
3401  flush the buffer pool completely to disk.
3402  In a 'very fast' shutdown we do not flush the buffer
3403  pool to data files: we have set n_pages_flushed to
3404  0 artificially. */
3405 
3406  goto background_loop;
3407  }
3408  } else if (n_tables_to_drop
3409  + n_pages_purged + n_bytes_merged + n_pages_flushed
3410  + n_bytes_archived != 0) {
3411  /* In a 'slow' shutdown we run purge and the insert buffer
3412  merge to completion */
3413 
3414  goto background_loop;
3415  }
3416 
3417  /* There is no work for background operations either: suspend
3418  master thread to wait for more server activity */
3419 
3420 suspend_thread:
3421  srv_main_thread_op_info = "suspending";
3422 
3423  mutex_enter(&kernel_mutex);
3424 
3426  mutex_exit(&kernel_mutex);
3427 
3428  goto loop;
3429  }
3430 
3431  srv_suspend_thread(slot);
3432 
3433  mutex_exit(&kernel_mutex);
3434 
3435  /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
3436  waits for database activity to die down when converting < 4.1.x
3437  databases, and relies on this string being exactly as it is. InnoDB
3438  manual also mentions this string in several places. */
3439  srv_main_thread_op_info = "waiting for server activity";
3440 
3441  os_event_wait(slot->event);
3442 
3444  /* This is only extra safety, the thread should exit
3445  already when the event wait ends */
3446 
3447  os_thread_exit(NULL);
3448 
3449  }
3450 
3451  /* When there is user activity, InnoDB will set the event and the
3452  main thread goes back to loop. */
3453 
3454  goto loop;
3455 
3456  OS_THREAD_DUMMY_RETURN;
3457 }
3458 
3459 /*********************************************************************/
3462 UNIV_INTERN
3463 os_thread_ret_t
3465 /*=============*/
3466  void* /*arg __attribute__((unused))*/)
3468 {
3469  srv_slot_t* slot;
3470  ulint retries = 0;
3471  ulint n_total_purged = ULINT_UNDEFINED;
3472  ulint next_itr_time;
3473 
3474  ut_a(srv_n_purge_threads == 1);
3475 
3476 #ifdef UNIV_DEBUG_THREAD_CREATION
3477  fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
3479 #endif /* UNIV_DEBUG_THREAD_CREATION */
3480 
3481  mutex_enter(&kernel_mutex);
3482 
3483  slot = srv_table_reserve_slot(SRV_WORKER);
3484 
3485  ++srv_n_threads_active[SRV_WORKER];
3486 
3487  mutex_exit(&kernel_mutex);
3488 
3489  next_itr_time = ut_time_ms();
3490 
3492 
3493  ulint n_pages_purged;
3494  ulint cur_time;
3495 
3496  /* If there are very few records to purge or the last
3497  purge didn't purge any records then wait for activity.
3498  We peek at the history len without holding any mutex
3499  because in the worst case we will end up waiting for
3500  the next purge event. */
3501  if (trx_sys->rseg_history_len < srv_purge_batch_size
3502  || (n_total_purged == 0
3503  && retries >= TRX_SYS_N_RSEGS)) {
3504 
3505  mutex_enter(&kernel_mutex);
3506 
3507  srv_suspend_thread(slot);
3508 
3509  mutex_exit(&kernel_mutex);
3510 
3511  os_event_wait(slot->event);
3512 
3513  retries = 0;
3514  }
3515 
3516  /* Check for shutdown and whether we should do purge at all. */
3517  if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
3518  || srv_shutdown_state != 0
3519  || srv_fast_shutdown) {
3520 
3521  break;
3522  }
3523 
3524  if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
3525  ++retries;
3526  } else if (n_total_purged > 0) {
3527  retries = 0;
3528  n_total_purged = 0;
3529  }
3530 
3531  /* Purge until there are no more records to purge and there is
3532  no change in configuration or server state. */
3533  do {
3534  n_pages_purged = trx_purge(srv_purge_batch_size);
3535 
3536  n_total_purged += n_pages_purged;
3537 
3538  } while (n_pages_purged > 0 && !srv_fast_shutdown);
3539 
3540  srv_sync_log_buffer_in_background();
3541 
3542  cur_time = ut_time_ms();
3543  if (next_itr_time > cur_time) {
3544  os_thread_sleep(ut_min(1000000,
3545  (next_itr_time - cur_time)
3546  * 1000));
3547  next_itr_time = ut_time_ms() + 1000;
3548  } else {
3549  next_itr_time = cur_time + 1000;
3550  }
3551  }
3552 
3553  mutex_enter(&kernel_mutex);
3554 
3555  /* Decrement the active count. */
3556  srv_suspend_thread(slot);
3557 
3558  slot->in_use = FALSE;
3559 
3560  mutex_exit(&kernel_mutex);
3561 
3562 #ifdef UNIV_DEBUG_THREAD_CREATION
3563  fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
3565 #endif /* UNIV_DEBUG_THREAD_CREATION */
3566 
3567  /* We count the number of threads in os_thread_exit(). A created
3568  thread should always use that to exit and not use return() to exit. */
3569  os_thread_exit(NULL);
3570 
3571  OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
3572 }
3573 
3574 /**********************************************************************/
3577 UNIV_INTERN
3578 void
3580 /*=====================*/
3581  que_thr_t* thr)
3582 {
3583  ut_ad(thr);
3584 
3585  mutex_enter(&kernel_mutex);
3586 
3587  UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
3588 
3590 
3591  mutex_exit(&kernel_mutex);
3592 }
UNIV_INTERN void srv_free(void)
Definition: srv0srv.cc:1165
#define UT_LIST_GET_LEN(BASE)
Definition: ut0lst.h:217
trx_sys_t * trx_sys
Definition: trx0sys.cc:61
ulint innodb_pages_read
Definition: srv0srv.h:767
UNIV_INTERN void srv_wake_purge_thread(void)
Definition: srv0srv.cc:2676
ulint innodb_row_lock_current_waits
Definition: srv0srv.h:770
UNIV_INTERN void trx_i_s_cache_free(trx_i_s_cache_t *cache)
Definition: trx0i_s.cc:1408
UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue
UNIV_INTERN bool buf_LRU_file_restore(void)
Definition: buf0lru.cc:2231
unsigned offset
Definition: buf0buf.h:1281
UNIV_INTERN void srv_conc_enter_innodb(trx_t *trx)
Definition: srv0srv.cc:1213
ibool srv_locks_unsafe_for_binlog
Definition: srv0srv.cc:138
ulint btr_cur_n_non_sea_old
Definition: btr0cur.cc:94
que_thr_t * thr
Definition: srv0srv.cc:770
#define UT_LIST_GET_NEXT(NAME, N)
Definition: ut0lst.h:201
UNIV_INTERN void os_fast_mutex_unlock(os_fast_mutex_t *fast_mutex)
Definition: os0sync.cc:915
ulint max_checkpoint_age
Definition: log0log.h:908
The buffer pool statistics structure.
Definition: buf0buf.h:1568
ulint innodb_os_log_fsyncs
Definition: srv0srv.h:762
ulint innodb_dblwr_writes
Definition: srv0srv.h:756
UNIV_INTERN os_thread_ret_t srv_error_monitor_thread(void *arg)
Definition: srv0srv.cc:2424
ulint lock_state
Definition: que0que.h:383
UNIV_INTERN ibool fsp_validate(ulint space)
Definition: fsp0fsp.cc:3920
ulint btr_cur_n_sea
Definition: btr0cur.cc:90
ulint innodb_data_read
Definition: srv0srv.h:736
UNIV_INTERN void lock_cancel_waiting_and_release(lock_t *lock)
Definition: lock0lock.cc:4127
UNIV_INTERN ulint ut_time_ms(void)
Definition: ut0ut.cc:219
UNIV_INTERN ulint srv_get_n_threads(void)
Definition: srv0srv.cc:873
UNIV_INTERN void log_buffer_sync_in_background(ibool flush)
Definition: log0log.cc:1624
ulint innodb_rows_updated
Definition: srv0srv.h:780
pthread_mutex_t os_fast_mutex_t
Definition: os0sync.h:50
ulint innodb_buffer_pool_pages_total
Definition: srv0srv.h:740
ulint innodb_buffer_pool_read_ahead_evicted
Definition: srv0srv.h:754
UNIV_INTERN void srv_wake_master_thread(void)
Definition: srv0srv.cc:2660
time_t ib_time_t
Definition: ut0ut.h:56
UNIV_INTERN void buf_refresh_io_stats_all(void)
Definition: buf0buf.cc:5154
btr_search_sys_t * btr_search_sys
Definition: btr0sea.cc:84
unsigned type
Definition: srv0srv.cc:762
ulint innodb_buffer_pool_reads
Definition: srv0srv.h:749
UNIV_INTERN ib_time_t ut_time(void)
Definition: ut0ut.cc:138
unsigned space
Definition: buf0buf.h:1279
UNIV_INTERN void srv_set_io_thread_op_info(ulint i, const char *str)
Definition: srv0srv.cc:841
ulint innodb_row_lock_time_avg
Definition: srv0srv.h:773
UNIV_INTERN void os_fast_mutex_free(os_fast_mutex_t *fast_mutex)
Definition: os0sync.cc:930
#define mem_free(PTR)
Definition: mem0mem.h:249
UNIV_INTERN ulint srv_release_threads(enum srv_thread_type type, ulint n)
Definition: srv0srv.cc:1001
UNIV_INTERN void buf_get_total_list_len(ulint *LRU_len, ulint *free_len, ulint *flush_list_len)
Definition: buf0buf.cc:361
UNIV_INTERN void srv_active_wake_master_thread(void)
Definition: srv0srv.cc:2617
ulint innodb_row_lock_waits
Definition: srv0srv.h:769
ulint ut_total_allocated_memory
Definition: ut0mem.cc:45
UNIV_INTERN os_event_t os_event_create(const char *name)
Definition: os0sync.cc:365
ib_time_t suspend_time
Definition: srv0srv.cc:766
ulint n_log_ios
Definition: log0log.h:872
UNIV_INTERN void os_fast_mutex_init(os_fast_mutex_t *fast_mutex)
Definition: os0sync.cc:871
UNIV_INTERN void thd_set_lock_wait_time(drizzled::Session *in_session, ulint value)
Definition: ha_innodb.cc:1015
UNIV_INTERN void log_print(FILE *file)
Definition: log0log.cc:3379
unsigned suspended
Definition: srv0srv.cc:764
ulint innodb_data_pending_reads
Definition: srv0srv.h:732
UNIV_INTERN void srv_general_init(void)
Definition: srv0srv.cc:1192
UNIV_INTERN void sync_arr_wake_threads_if_sema_free(void)
Definition: sync0arr.cc:877
trx_i_s_cache_t * trx_i_s_cache
Definition: trx0i_s.cc:197
ulint innodb_buffer_pool_read_ahead
Definition: srv0srv.h:753
UNIV_INTERN void sync_init(void)
Definition: sync0sync.cc:1431
UNIV_INTERN ulong thd_lock_wait_timeout(drizzled::Session *)
Definition: ha_innodb.cc:1000
UNIV_INTERN ulint trx_purge(ulint limit)
Definition: trx0purge.cc:1124
UNIV_INTERN ibool thd_is_replication_slave_thread(drizzled::Session *)
Definition: ha_innodb.cc:887
UNIV_INTERN void ibuf_print(FILE *file)
Definition: ibuf0ibuf.cc:4926
ulint innodb_os_log_pending_writes
Definition: srv0srv.h:763
const byte * srv_latin1_ordering
Definition: srv0srv.cc:246
UNIV_INTERN void srv_conc_exit_innodb(trx_t *trx)
Definition: srv0srv.cc:1455
ulint innodb_data_written
Definition: srv0srv.h:738
UNIV_INTERN os_thread_t os_thread_get_curr(void)
Definition: os0thread.cc:230
UNIV_INTERN void os_aio_print(FILE *file)
Definition: os0file.cc:5069
ulint n_pending_writes
Definition: log0log.h:847
UNIV_INTERN os_thread_ret_t srv_monitor_thread(void *arg)
Definition: srv0srv.cc:2151
os_event_t event
Definition: srv0srv.cc:414
UNIV_INTERN void sync_print(FILE *file)
Definition: sync0sync.cc:1589
ulint srv_buf_pool_curr_size
Definition: srv0srv.cc:259
UNIV_INTERN void srv_init(void)
Definition: srv0srv.cc:1082
UNIV_INLINE ib_uint64_t log_get_lsn(void)
ulint innodb_data_reads
Definition: srv0srv.h:739
#define mem_zalloc(N)
Definition: mem0mem.h:225
UNIV_INTERN void row_mysql_unfreeze_data_dictionary(trx_t *trx)
Definition: row0mysql.cc:1752
ulint innodb_os_log_written
Definition: srv0srv.h:761
ulint max_modified_age_async
Definition: log0log.h:886
UNIV_INTERN ibool trx_is_interrupted(trx_t *trx)
Definition: ha_innodb.cc:1882
uint32_t srv_auto_lru_dump
Definition: srv0srv.cc:375
ulint innodb_dblwr_pages_written
Definition: srv0srv.h:755
The buffer pool structure.
Definition: buf0buf.h:1607
UNIV_INLINE ulint ut_min(ulint n1, ulint n2)
typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t
#define MAX_BUFFER_POOLS
Definition: buf0buf.h:80
ibool innodb_have_atomic_builtins
Definition: srv0srv.h:757
ulint innodb_buffer_pool_pages_dirty
Definition: srv0srv.h:742
ib_uint64_t lsn
Definition: log0log.h:764
UNIV_INLINE void log_free_check(void)
#define UT_LIST_REMOVE(NAME, BASE, N)
Definition: ut0lst.h:178
UNIV_INTERN ulint srv_boot(void)
Definition: srv0srv.cc:1511
UNIV_INTERN void srv_que_task_enqueue_low(que_thr_t *thr)
Definition: srv0srv.cc:3579
UNIV_INTERN ib_uint64_t buf_pool_get_oldest_modification(void)
Definition: buf0buf.cc:315
UNIV_INTERN void ha_print_info(FILE *file, hash_table_t *table)
Definition: ha0ha.cc:410
ulint innodb_buffer_pool_read_requests
Definition: srv0srv.h:748
ulint innodb_rows_deleted
Definition: srv0srv.h:781
#define DICT_TF_FORMAT_MAX
Definition: dict0mem.h:94
const char * op_info
Definition: trx0trx.h:477
UNIV_INTERN void trx_search_latch_release_if_reserved(trx_t *trx)
Definition: trx0trx.cc:244
ulint innodb_buffer_pool_write_requests
Definition: srv0srv.h:752
UNIV_INLINE buf_pool_t * buf_pool_from_array(ulint index)
UNIV_INTERN ibool os_file_set_eof(FILE *file)
Definition: os0file.cc:2055
UNIV_INTERN void buf_print_io(FILE *file)
Definition: buf0buf.cc:5080
UNIV_INTERN ulint os_proc_get_number(void)
Definition: os0proc.cc:57
UNIV_INTERN void os_sync_init(void)
Definition: os0sync.cc:304
ulint innodb_data_pending_fsyncs
Definition: srv0srv.h:734
UNIV_INTERN void trx_print(FILE *f, trx_t *trx, ulint max_query_len)
Definition: trx0trx.cc:1690
UNIV_INTERN void os_event_set(os_event_t event)
Definition: os0sync.cc:434
unsigned in_use
Definition: srv0srv.cc:763
UNIV_INTERN void os_thread_sleep(ulint tm)
Definition: os0thread.cc:265
UNIV_INTERN void log_refresh_stats(void)
Definition: log0log.cc:3429
ulint innodb_data_pending_writes
Definition: srv0srv.h:733
ulint innodb_log_waits
Definition: srv0srv.h:758
ulint innodb_data_fsyncs
Definition: srv0srv.h:735
UNIV_INTERN ib_int64_t os_event_reset(os_event_t event)
Definition: os0sync.cc:472
ulint dict_operation_lock_mode
Definition: trx0trx.h:530
#define ut_a(EXPR)
Definition: ut0dbg.h:105
lock_t * wait_lock
Definition: trx0trx.h:637
ulint rseg_history_len
Definition: trx0sys.h:599
UNIV_INLINE ulint buf_pool_get_n_pages(void)
ib_uint64_t srv_start_lsn
Definition: srv0start.cc:99
ulint innodb_pages_created
Definition: srv0srv.h:766
ulint innodb_pages_written
Definition: srv0srv.h:768
UNIV_INTERN ulint buf_get_n_pending_ios(void)
Definition: buf0buf.cc:4769
ulint innodb_buffer_pool_pages_misc
Definition: srv0srv.h:743
ulint innodb_row_lock_time_max
Definition: srv0srv.h:776
ulint fil_n_log_flushes
Definition: fil0fil.cc:115
UNIV_INTERN void dict_ind_init(void)
Definition: dict0dict.cc:4792
ulint innodb_buffer_pool_pages_free
Definition: srv0srv.h:744
UNIV_INTERN ulint buf_flush_list(ulint min_n, ib_uint64_t lsn_limit)
Definition: buf0flu.cc:1925
#define UT_LIST_ADD_LAST(NAME, BASE, N)
Definition: ut0lst.h:119
UNIV_INTERN os_thread_ret_t srv_master_thread(void *arg)
Definition: srv0srv.cc:2744
#define UT_LIST_GET_FIRST(BASE)
Definition: ut0lst.h:224
UNIV_INTERN ulint os_event_wait_time_low(os_event_t event, ulint time_in_usec, ib_int64_t reset_sig_count)
Definition: os0sync.cc:652
ulint n_ra_pages_evicted
Definition: buf0buf.h:1581
UNIV_INTERN os_thread_ret_t srv_LRU_dump_restore_thread(void *arg)
Definition: srv0srv.cc:2540
UNIV_INTERN void srv_conc_force_exit_innodb(trx_t *trx)
Definition: srv0srv.cc:1399
ulint innodb_buffer_pool_pages_flushed
Definition: srv0srv.h:751
mutex_t mutex
Definition: log0log.h:768
my_bool srv_file_per_table
Definition: srv0srv.cc:125
UNIV_INTERN void ut_mem_init(void)
Definition: ut0mem.cc:77
hash_table_t * hash_index
Definition: btr0sea.h:266
UNIV_INTERN ibool sync_array_print_long_waits(os_thread_id_t *waiter, const void **sema) __attribute__((nonnull))
Definition: sync0arr.cc:918
ulint srv_buf_pool_reads
Definition: srv0srv.cc:372
dict_sys_t * dict_sys
Definition: dict0dict.cc:63
UNIV_INTERN os_thread_ret_t srv_purge_thread(void *)
Definition: srv0srv.cc:3464
ulint srv_max_file_format_at_startup
Definition: srv0srv.cc:131
#define ut_ad(EXPR)
Definition: ut0dbg.h:127
UNIV_INTERN void mem_init(ulint size)
Definition: mem0dbg.cc:149
os_thread_t os_thread_id_t
Definition: os0thread.h:53
UNIV_INTERN void srv_suspend_mysql_thread(que_thr_t *thr)
Definition: srv0srv.cc:1610
UNIV_INTERN void fsp_print(ulint space)
Definition: fsp0fsp.cc:4172
ulint fil_n_pending_tablespace_flushes
Definition: fil0fil.cc:120
UNIV_INTERN ulint mem_pool_get_reserved(mem_pool_t *pool)
Definition: mem0pool.cc:720
UNIV_INTERN void trx_i_s_cache_init(trx_i_s_cache_t *cache)
Definition: trx0i_s.cc:1367
UNIV_INTERN ulint buf_get_modified_ratio_pct(void)
Definition: buf0buf.cc:4796
UNIV_INTERN ibool lock_print_info_summary(FILE *file, ibool nowait)
Definition: lock0lock.cc:4446
ulint innodb_truncated_status_writes
Definition: srv0srv.h:782
#define UT_LIST_INIT(BASE)
Definition: ut0lst.h:84
#define ut_error
Definition: ut0dbg.h:115
srv_sys_t * srv_sys
Definition: srv0srv.cc:785
UNIV_INTERN ulint row_drop_tables_for_mysql_in_background(void)
Definition: row0mysql.cc:2203
ulint innodb_page_size
Definition: srv0srv.h:765
ulint btr_cur_n_sea_old
Definition: btr0cur.cc:98
ulint innodb_rows_read
Definition: srv0srv.h:778
UNIV_INTERN ibool log_checkpoint(ibool sync, ibool write_always)
Definition: log0log.cc:2013
ib_int64_t innodb_row_lock_time
Definition: srv0srv.h:771
UNIV_INTERN void os_aio_refresh_stats(void)
Definition: os0file.cc:5215
UNIV_INTERN void buf_flush_wait_batch_end(buf_pool_t *buf_pool, enum buf_flush type)
Definition: buf0flu.cc:1865
os_event_t event
Definition: srv0srv.cc:768
mem_pool_t * mem_comm_pool
Definition: mem0pool.cc:116
ibool is_active
Definition: que0que.h:357
os_thread_t handle
Definition: srv0srv.cc:761
UNIV_INTERN int ut_usectime(ulint *sec, ulint *ms)
Definition: ut0ut.cc:153
#define UT_WAIT_FOR(cond, max_wait_us)
Definition: ut0ut.h:86
UNIV_INTERN bool buf_LRU_file_dump(void)
Definition: buf0lru.cc:2100
ulint innodb_rows_inserted
Definition: srv0srv.h:779
UNIV_INTERN os_thread_ret_t srv_lock_timeout_thread(void *arg)
Definition: srv0srv.cc:2316
UNIV_INTERN ibool trx_start(trx_t *trx, ulint rseg_id)
Definition: trx0trx.cc:676
UNIV_INTERN void ut_print_timestamp(FILE *file)
Definition: ut0ut.cc:247
ulint innodb_buffer_pool_pages_data
Definition: srv0srv.h:741
ulint state
Definition: que0que.h:362
UNIV_INTERN void buf_get_total_stat(buf_pool_stat_t *tot_stat)
Definition: buf0buf.cc:387
ib_uint64_t oldest_modification
Definition: buf0buf.h:1378
UNIV_INTERN ulint ibuf_contract_for_n_pages(ibool sync, ulint n_pages)
Definition: ibuf0ibuf.cc:2730
UNIV_INLINE trx_t * thr_get_trx(que_thr_t *thr)
srv_shutdown_state
Definition: srv0start.h:113
UNIV_INTERN ibool srv_is_any_background_thread_active(void)
Definition: srv0srv.cc:2589
export_struc export_vars
Definition: srv0srv.cc:378
drizzled::Session * mysql_thd
Definition: trx0trx.h:559
UNIV_INTERN os_thread_id_t os_thread_get_curr_id(void)
Definition: os0thread.cc:93
UNIV_INTERN void os_thread_exit(void *exit_value)
Definition: os0thread.cc:199
UNIV_INTERN void dict_print(void)
Definition: dict0load.cc:166
UNIV_INTERN ulint srv_thread_has_reserved_slot(enum srv_thread_type type)
Definition: srv0srv.cc:1052
os_thread_id_t id
Definition: srv0srv.cc:760
ulint innodb_os_log_pending_fsyncs
Definition: srv0srv.h:764
ulint srv_buf_pool_old_size
Definition: srv0srv.cc:257
ulint srv_file_format
Definition: srv0srv.cc:127
ulint os_n_pending_reads
Definition: os0file.cc:305
ulint innodb_buffer_pool_wait_free
Definition: srv0srv.h:750
UNIV_INTERN void srv_conc_force_enter_innodb(trx_t *trx)
Definition: srv0srv.cc:1373
UNIV_INTERN void os_fast_mutex_lock(os_fast_mutex_t *fast_mutex)
Definition: os0sync.cc:900
UNIV_INTERN void srv_export_innodb_status(void)
Definition: srv0srv.cc:2061
UNIV_INTERN void ut_copy_file(FILE *dest, FILE *src)
Definition: ut0ut.cc:573
ulint innodb_data_writes
Definition: srv0srv.h:737
UNIV_INTERN ibool srv_printf_innodb_monitor(FILE *file, ibool nowait, ulint *trx_start, ulint *trx_end)
Definition: srv0srv.cc:1869
ulint innodb_log_write_requests
Definition: srv0srv.h:759
ulint btr_cur_n_non_sea
Definition: btr0cur.cc:87
UNIV_INTERN void srv_release_mysql_thread_if_suspended(que_thr_t *thr)
Definition: srv0srv.cc:1809
UNIV_INTERN void srv_wake_purge_thread_if_not_active(void)
Definition: srv0srv.cc:2640
ulint srv_buf_pool_size
Definition: srv0srv.cc:253
UNIV_INTERN ulint row_get_background_drop_list_len_low(void)
Definition: row0mysql.cc:2277
ulint os_n_pending_writes
Definition: os0file.cc:303
ulint error_state
Definition: trx0trx.h:601
UNIV_INTERN void recv_sys_var_init(void)
Definition: log0recv.cc:262
UNIV_INTERN void row_mysql_unlock_data_dictionary(trx_t *trx)
Definition: row0mysql.cc:1790
ulint fil_n_pending_log_flushes
Definition: fil0fil.cc:118
ulint srv_buf_pool_instances
Definition: srv0srv.cc:255
UNIV_INTERN ulint os_thread_pf(os_thread_id_t a)
Definition: os0thread.cc:72
ulint innodb_log_writes
Definition: srv0srv.h:760
UNIV_INTERN ulint fil_space_get_n_reserved_extents(ulint id)
Definition: fil0fil.cc:4142
UNIV_INTERN ibool os_thread_eq(os_thread_id_t a, os_thread_id_t b)
Definition: os0thread.cc:46
UNIV_INTERN void buf_LRU_stat_update(void)
Definition: buf0lru.cc:2046
UNIV_INTERN void lock_print_info_all_transactions(FILE *file)
Definition: lock0lock.cc:4499
UNIV_INTERN double ut_difftime(ib_time_t time2, ib_time_t time1)
Definition: ut0ut.cc:235
srv_thread_type
Definition: srv0srv.h:471