Drizzled Public API Documentation

row0sel.cc
1 /*****************************************************************************
2 
3 Copyright (C) 1997, 2010, Innobase Oy. All Rights Reserved.
4 Copyright (C) 2008, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify it under
13 the terms of the GNU General Public License as published by the Free Software
14 Foundation; version 2 of the License.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License along with
21 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
22 St, Fifth Floor, Boston, MA 02110-1301 USA
23 
24 *****************************************************************************/
25 
26 /***************************************************/
33 #include "row0sel.h"
34 
35 #ifdef UNIV_NONINL
36 #include "row0sel.ic"
37 #endif
38 
39 #include "dict0dict.h"
40 #include "dict0boot.h"
41 #include "trx0undo.h"
42 #include "trx0trx.h"
43 #include "btr0btr.h"
44 #include "btr0cur.h"
45 #include "btr0sea.h"
46 #include "mach0data.h"
47 #include "que0que.h"
48 #include "row0upd.h"
49 #include "row0row.h"
50 #include "row0vers.h"
51 #include "rem0cmp.h"
52 #include "lock0lock.h"
53 #include "eval0eval.h"
54 #include "pars0sym.h"
55 #include "pars0pars.h"
56 #include "row0mysql.h"
57 #include "read0read.h"
58 #include "buf0lru.h"
59 #include "ha_prototypes.h"
60 
61 /* Maximum number of rows to prefetch; MySQL interface has another parameter */
62 #define SEL_MAX_N_PREFETCH 16
63 
64 /* Number of rows fetched, after which to start prefetching; MySQL interface
65 has another parameter */
66 #define SEL_PREFETCH_LIMIT 1
67 
68 /* When a select has accessed about this many pages, it returns control back
69 to que_run_threads: this is to allow canceling runaway queries */
70 
71 #define SEL_COST_LIMIT 100
72 
73 /* Flags for search shortcut */
74 #define SEL_FOUND 0
75 #define SEL_EXHAUSTED 1
76 #define SEL_RETRY 2
77 
78 /********************************************************************/
85 static
86 ibool
87 row_sel_sec_rec_is_for_blob(
88 /*========================*/
89  ulint mtype,
90  ulint prtype,
91  ulint mbminmaxlen,
93  const byte* clust_field,
99  ulint clust_len,
100  const byte* sec_field,
101  ulint sec_len,
102  ulint zip_size)
103 {
104  ulint len;
105  byte buf[DICT_MAX_INDEX_COL_LEN];
106 
107  ut_a(clust_len >= BTR_EXTERN_FIELD_REF_SIZE);
108 
109  if (UNIV_UNLIKELY
110  (!memcmp(clust_field + clust_len - BTR_EXTERN_FIELD_REF_SIZE,
112  /* The externally stored field was not written yet.
113  This record should only be seen by
114  recv_recovery_rollback_active() or any
115  TRX_ISO_READ_UNCOMMITTED transactions. */
116  return(FALSE);
117  }
118 
119  len = btr_copy_externally_stored_field_prefix(buf, sizeof buf,
120  zip_size,
121  clust_field, clust_len);
122 
123  if (UNIV_UNLIKELY(len == 0)) {
124  /* The BLOB was being deleted as the server crashed.
125  There should not be any secondary index records
126  referring to this clustered index record, because
127  btr_free_externally_stored_field() is called after all
128  secondary index entries of the row have been purged. */
129  return(FALSE);
130  }
131 
132  len = dtype_get_at_most_n_mbchars(prtype, mbminmaxlen,
133  sec_len, len, (const char*) buf);
134 
135  return(!cmp_data_data(mtype, prtype, buf, len, sec_field, sec_len));
136 }
137 
138 /********************************************************************/
147 static
148 ibool
149 row_sel_sec_rec_is_for_clust_rec(
150 /*=============================*/
151  const rec_t* sec_rec,
152  dict_index_t* sec_index,
153  const rec_t* clust_rec,
157  dict_index_t* clust_index)
158 {
159  const byte* sec_field;
160  ulint sec_len;
161  const byte* clust_field;
162  ulint n;
163  ulint i;
164  mem_heap_t* heap = NULL;
165  ulint clust_offsets_[REC_OFFS_NORMAL_SIZE];
166  ulint sec_offsets_[REC_OFFS_SMALL_SIZE];
167  ulint* clust_offs = clust_offsets_;
168  ulint* sec_offs = sec_offsets_;
169  ibool is_equal = TRUE;
170 
171  rec_offs_init(clust_offsets_);
172  rec_offs_init(sec_offsets_);
173 
174  if (rec_get_deleted_flag(clust_rec,
175  dict_table_is_comp(clust_index->table))) {
176 
177  /* The clustered index record is delete-marked;
178  it is not visible in the read view. Besides,
179  if there are any externally stored columns,
180  some of them may have already been purged. */
181  return(FALSE);
182  }
183 
184  clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs,
185  ULINT_UNDEFINED, &heap);
186  sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs,
187  ULINT_UNDEFINED, &heap);
188 
190 
191  for (i = 0; i < n; i++) {
192  const dict_field_t* ifield;
193  const dict_col_t* col;
194  ulint clust_pos;
195  ulint clust_len;
196  ulint len;
197 
198  ifield = dict_index_get_nth_field(sec_index, i);
199  col = dict_field_get_col(ifield);
200  clust_pos = dict_col_get_clust_pos(col, clust_index);
201 
202  clust_field = rec_get_nth_field(
203  clust_rec, clust_offs, clust_pos, &clust_len);
204  sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len);
205 
206  len = clust_len;
207 
208  if (ifield->prefix_len > 0 && len != UNIV_SQL_NULL) {
209 
210  if (rec_offs_nth_extern(clust_offs, clust_pos)) {
212  }
213 
215  col->prtype, col->mbminmaxlen,
216  ifield->prefix_len, len, (char*) clust_field);
217 
218  if (rec_offs_nth_extern(clust_offs, clust_pos)
219  && len < sec_len) {
220  if (!row_sel_sec_rec_is_for_blob(
221  col->mtype, col->prtype,
222  col->mbminmaxlen,
223  clust_field, clust_len,
224  sec_field, sec_len,
226  clust_index->table))) {
227  goto inequal;
228  }
229 
230  continue;
231  }
232  }
233 
234  if (0 != cmp_data_data(col->mtype, col->prtype,
235  clust_field, len,
236  sec_field, sec_len)) {
237 inequal:
238  is_equal = FALSE;
239  goto func_exit;
240  }
241  }
242 
243 func_exit:
244  if (UNIV_LIKELY_NULL(heap)) {
245  mem_heap_free(heap);
246  }
247  return(is_equal);
248 }
249 
250 /*********************************************************************/
253 UNIV_INTERN
254 sel_node_t*
256 /*============*/
257  mem_heap_t* heap)
258 {
259  sel_node_t* node;
260 
261  node = static_cast<sel_node_t *>(mem_heap_alloc(heap, sizeof(sel_node_t)));
262  node->common.type = QUE_NODE_SELECT;
263  node->state = SEL_NODE_OPEN;
264 
265  node->plans = NULL;
266 
267  return(node);
268 }
269 
270 /*********************************************************************/
273 UNIV_INTERN
274 void
276 /*==================*/
277  sel_node_t* node)
278 {
279  ulint i;
280  plan_t* plan;
281 
282  if (node->plans != NULL) {
283  for (i = 0; i < node->n_tables; i++) {
284  plan = sel_node_get_nth_plan(node, i);
285 
286  btr_pcur_close(&(plan->pcur));
287  btr_pcur_close(&(plan->clust_pcur));
288 
289  if (plan->old_vers_heap) {
291  }
292  }
293  }
294 }
295 
296 /*********************************************************************/
299 UNIV_INLINE
300 void
301 sel_eval_select_list(
302 /*=================*/
303  sel_node_t* node)
304 {
305  que_node_t* exp;
306 
307  exp = node->select_list;
308 
309  while (exp) {
310  eval_exp(exp);
311 
312  exp = que_node_get_next(exp);
313  }
314 }
315 
316 /*********************************************************************/
319 UNIV_INLINE
320 void
321 sel_assign_into_var_values(
322 /*=======================*/
323  sym_node_t* var,
324  sel_node_t* node)
325 {
326  que_node_t* exp;
327 
328  if (var == NULL) {
329 
330  return;
331  }
332 
333  exp = node->select_list;
334 
335  while (var) {
336  ut_ad(exp);
337 
338  eval_node_copy_val(var->alias, exp);
339 
340  exp = que_node_get_next(exp);
341  var = static_cast<sym_node_t *>(que_node_get_next(var));
342  }
343 }
344 
345 /*********************************************************************/
348 UNIV_INLINE
349 void
350 sel_reset_aggregate_vals(
351 /*=====================*/
352  sel_node_t* node)
353 {
354  func_node_t* func_node;
355 
356  ut_ad(node->is_aggregate);
357 
358  func_node = static_cast<func_node_t *>(node->select_list);
359 
360  while (func_node) {
361  eval_node_set_int_val(func_node, 0);
362 
363  func_node = static_cast<func_node_t *>(que_node_get_next(func_node));
364  }
365 
366  node->aggregate_already_fetched = FALSE;
367 }
368 
369 /*********************************************************************/
371 UNIV_INLINE
372 void
373 row_sel_copy_input_variable_vals(
374 /*=============================*/
375  sel_node_t* node)
376 {
377  sym_node_t* var;
378 
379  var = UT_LIST_GET_FIRST(node->copy_variables);
380 
381  while (var) {
382  eval_node_copy_val(var, var->alias);
383 
384  var->indirection = NULL;
385 
386  var = UT_LIST_GET_NEXT(col_var_list, var);
387  }
388 }
389 
390 /*********************************************************************/
392 static
393 void
394 row_sel_fetch_columns(
395 /*==================*/
396  dict_index_t* index,
397  const rec_t* rec,
399  const ulint* offsets,
400  sym_node_t* column)
402 {
403  dfield_t* val;
404  ulint index_type;
405  ulint field_no;
406  const byte* data;
407  ulint len;
408 
409  ut_ad(rec_offs_validate(rec, index, offsets));
410 
411  if (dict_index_is_clust(index)) {
412  index_type = SYM_CLUST_FIELD_NO;
413  } else {
414  index_type = SYM_SEC_FIELD_NO;
415  }
416 
417  while (column) {
418  mem_heap_t* heap = NULL;
419  ibool needs_copy;
420 
421  field_no = column->field_nos[index_type];
422 
423  if (field_no != ULINT_UNDEFINED) {
424 
425  if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets,
426  field_no))) {
427 
428  /* Copy an externally stored field to the
429  temporary heap, if possible. */
430 
431  heap = mem_heap_create(1);
432 
434  rec, offsets,
435  dict_table_zip_size(index->table),
436  field_no, &len, heap);
437 
438  /* data == NULL means that the
439  externally stored field was not
440  written yet. This record
441  should only be seen by
442  recv_recovery_rollback_active() or any
443  TRX_ISO_READ_UNCOMMITTED
444  transactions. The InnoDB SQL parser
445  (the sole caller of this function)
446  does not implement READ UNCOMMITTED,
447  and it is not involved during rollback. */
448  ut_a(data);
449  ut_a(len != UNIV_SQL_NULL);
450 
451  needs_copy = TRUE;
452  } else {
453  data = rec_get_nth_field(rec, offsets,
454  field_no, &len);
455 
456  needs_copy = column->copy_val;
457  }
458 
459  if (needs_copy) {
460  eval_node_copy_and_alloc_val(column, data,
461  len);
462  } else {
463  val = que_node_get_val(column);
464  dfield_set_data(val, data, len);
465  }
466 
467  if (UNIV_LIKELY_NULL(heap)) {
468  mem_heap_free(heap);
469  }
470  }
471 
472  column = UT_LIST_GET_NEXT(col_var_list, column);
473  }
474 }
475 
476 /*********************************************************************/
478 static
479 void
480 sel_col_prefetch_buf_alloc(
481 /*=======================*/
482  sym_node_t* column)
483 {
484  sel_buf_t* sel_buf;
485  ulint i;
486 
487  ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL);
488 
489  column->prefetch_buf = static_cast<sel_buf_t *>(mem_alloc(SEL_MAX_N_PREFETCH
490  * sizeof(sel_buf_t)));
491  for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
492  sel_buf = column->prefetch_buf + i;
493 
494  sel_buf->data = NULL;
495 
496  sel_buf->val_buf_size = 0;
497  }
498 }
499 
500 /*********************************************************************/
503 UNIV_INTERN
504 void
506 /*======================*/
507  sel_buf_t* prefetch_buf)
508 {
509  sel_buf_t* sel_buf;
510  ulint i;
511 
512  for (i = 0; i < SEL_MAX_N_PREFETCH; i++) {
513  sel_buf = prefetch_buf + i;
514 
515  if (sel_buf->val_buf_size > 0) {
516 
517  mem_free(sel_buf->data);
518  }
519  }
520 }
521 
522 /*********************************************************************/
525 static
526 void
527 sel_pop_prefetched_row(
528 /*===================*/
529  plan_t* plan)
530 {
531  sym_node_t* column;
532  sel_buf_t* sel_buf;
533  dfield_t* val;
534  byte* data;
535  ulint len;
536  ulint val_buf_size;
537 
538  ut_ad(plan->n_rows_prefetched > 0);
539 
540  column = UT_LIST_GET_FIRST(plan->columns);
541 
542  while (column) {
543  val = que_node_get_val(column);
544 
545  if (!column->copy_val) {
546  /* We did not really push any value for the
547  column */
548 
549  ut_ad(!column->prefetch_buf);
550  ut_ad(que_node_get_val_buf_size(column) == 0);
551  ut_d(dfield_set_null(val));
552 
553  goto next_col;
554  }
555 
556  ut_ad(column->prefetch_buf);
557  ut_ad(!dfield_is_ext(val));
558 
559  sel_buf = column->prefetch_buf + plan->first_prefetched;
560 
561  data = sel_buf->data;
562  len = sel_buf->len;
563  val_buf_size = sel_buf->val_buf_size;
564 
565  /* We must keep track of the allocated memory for
566  column values to be able to free it later: therefore
567  we swap the values for sel_buf and val */
568 
569  sel_buf->data = static_cast<byte *>(dfield_get_data(val));
570  sel_buf->len = dfield_get_len(val);
571  sel_buf->val_buf_size = que_node_get_val_buf_size(column);
572 
573  dfield_set_data(val, data, len);
574  que_node_set_val_buf_size(column, val_buf_size);
575 next_col:
576  column = UT_LIST_GET_NEXT(col_var_list, column);
577  }
578 
579  plan->n_rows_prefetched--;
580 
581  plan->first_prefetched++;
582 }
583 
584 /*********************************************************************/
587 UNIV_INLINE
588 void
589 sel_push_prefetched_row(
590 /*====================*/
591  plan_t* plan)
592 {
593  sym_node_t* column;
594  sel_buf_t* sel_buf;
595  dfield_t* val;
596  byte* data;
597  ulint len;
598  ulint pos;
599  ulint val_buf_size;
600 
601  if (plan->n_rows_prefetched == 0) {
602  pos = 0;
603  plan->first_prefetched = 0;
604  } else {
605  pos = plan->n_rows_prefetched;
606 
607  /* We have the convention that pushing new rows starts only
608  after the prefetch stack has been emptied: */
609 
610  ut_ad(plan->first_prefetched == 0);
611  }
612 
613  plan->n_rows_prefetched++;
614 
615  ut_ad(pos < SEL_MAX_N_PREFETCH);
616 
617  column = UT_LIST_GET_FIRST(plan->columns);
618 
619  while (column) {
620  if (!column->copy_val) {
621  /* There is no sense to push pointers to database
622  page fields when we do not keep latch on the page! */
623 
624  goto next_col;
625  }
626 
627  if (!column->prefetch_buf) {
628  /* Allocate a new prefetch buffer */
629 
630  sel_col_prefetch_buf_alloc(column);
631  }
632 
633  sel_buf = column->prefetch_buf + pos;
634 
635  val = que_node_get_val(column);
636 
637  data = static_cast<byte *>(dfield_get_data(val));
638  len = dfield_get_len(val);
639  val_buf_size = que_node_get_val_buf_size(column);
640 
641  /* We must keep track of the allocated memory for
642  column values to be able to free it later: therefore
643  we swap the values for sel_buf and val */
644 
645  dfield_set_data(val, sel_buf->data, sel_buf->len);
646  que_node_set_val_buf_size(column, sel_buf->val_buf_size);
647 
648  sel_buf->data = data;
649  sel_buf->len = len;
650  sel_buf->val_buf_size = val_buf_size;
651 next_col:
652  column = UT_LIST_GET_NEXT(col_var_list, column);
653  }
654 }
655 
656 /*********************************************************************/
659 static
660 ulint
661 row_sel_build_prev_vers(
662 /*====================*/
663  read_view_t* read_view,
664  dict_index_t* index,
665  rec_t* rec,
666  ulint** offsets,
668  mem_heap_t** offset_heap,
670  mem_heap_t** old_vers_heap,
671  rec_t** old_vers,
675  mtr_t* mtr)
676 {
677  ulint err;
678 
679  if (*old_vers_heap) {
680  mem_heap_empty(*old_vers_heap);
681  } else {
682  *old_vers_heap = mem_heap_create(512);
683  }
684 
686  rec, mtr, index, offsets, read_view, offset_heap,
687  *old_vers_heap, old_vers);
688  return(err);
689 }
690 
691 /*********************************************************************/
695 static
696 ulint
697 row_sel_build_committed_vers_for_mysql(
698 /*===================================*/
699  dict_index_t* clust_index,
700  row_prebuilt_t* prebuilt,
701  const rec_t* rec,
702  ulint** offsets,
704  mem_heap_t** offset_heap,
706  const rec_t** old_vers,
710  mtr_t* mtr)
711 {
712  ulint err;
713 
714  if (prebuilt->old_vers_heap) {
715  mem_heap_empty(prebuilt->old_vers_heap);
716  } else {
717  prebuilt->old_vers_heap = mem_heap_create(200);
718  }
719 
721  rec, mtr, clust_index, offsets, offset_heap,
722  prebuilt->old_vers_heap, old_vers);
723  return(err);
724 }
725 
726 /*********************************************************************/
730 UNIV_INLINE
731 ibool
732 row_sel_test_end_conds(
733 /*===================*/
734  plan_t* plan)
737 {
738  func_node_t* cond;
739 
740  /* All conditions in end_conds are comparisons of a column to an
741  expression */
742 
743  cond = UT_LIST_GET_FIRST(plan->end_conds);
744 
745  while (cond) {
746  /* Evaluate the left side of the comparison, i.e., get the
747  column value if there is an indirection */
748 
749  eval_sym(static_cast<sym_node_t *>(cond->args));
750 
751  /* Do the comparison */
752 
753  if (!eval_cmp(cond)) {
754 
755  return(FALSE);
756  }
757 
758  cond = UT_LIST_GET_NEXT(cond_list, cond);
759  }
760 
761  return(TRUE);
762 }
763 
764 /*********************************************************************/
767 UNIV_INLINE
768 ibool
769 row_sel_test_other_conds(
770 /*=====================*/
771  plan_t* plan)
773 {
774  func_node_t* cond;
775 
776  cond = UT_LIST_GET_FIRST(plan->other_conds);
777 
778  while (cond) {
779  eval_exp(cond);
780 
781  if (!eval_node_get_ibool_val(cond)) {
782 
783  return(FALSE);
784  }
785 
786  cond = UT_LIST_GET_NEXT(cond_list, cond);
787  }
788 
789  return(TRUE);
790 }
791 
792 /*********************************************************************/
796 static
797 ulint
798 row_sel_get_clust_rec(
799 /*==================*/
800  sel_node_t* node,
801  plan_t* plan,
802  rec_t* rec,
803  que_thr_t* thr,
804  rec_t** out_rec,
808  mtr_t* mtr)
811 {
812  dict_index_t* index;
813  rec_t* clust_rec;
814  rec_t* old_vers;
815  ulint err;
816  mem_heap_t* heap = NULL;
817  ulint offsets_[REC_OFFS_NORMAL_SIZE];
818  ulint* offsets = offsets_;
819  rec_offs_init(offsets_);
820 
821  *out_rec = NULL;
822 
823  offsets = rec_get_offsets(rec,
824  btr_pcur_get_btr_cur(&plan->pcur)->index,
825  offsets, ULINT_UNDEFINED, &heap);
826 
827  row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets);
828 
829  index = dict_table_get_first_index(plan->table);
830 
831  btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE,
832  BTR_SEARCH_LEAF, &plan->clust_pcur,
833  0, mtr);
834 
835  clust_rec = btr_pcur_get_rec(&(plan->clust_pcur));
836 
837  /* Note: only if the search ends up on a non-infimum record is the
838  low_match value the real match to the search tuple */
839 
840  if (!page_rec_is_user_rec(clust_rec)
842  < dict_index_get_n_unique(index)) {
843 
845  dict_table_is_comp(plan->table)));
846  ut_a(node->read_view);
847 
848  /* In a rare case it is possible that no clust rec is found
849  for a delete-marked secondary index record: if in row0umod.c
850  in row_undo_mod_remove_clust_low() we have already removed
851  the clust rec, while purge is still cleaning and removing
852  secondary index records associated with earlier versions of
853  the clustered index record. In that case we know that the
854  clustered index record did not exist in the read view of
855  trx. */
856 
857  goto func_exit;
858  }
859 
860  offsets = rec_get_offsets(clust_rec, index, offsets,
861  ULINT_UNDEFINED, &heap);
862 
863  if (!node->read_view) {
864  /* Try to place a lock on the index record */
865 
866  /* If innodb_locks_unsafe_for_binlog option is used
867  or this session is using READ COMMITTED isolation level
868  we lock only the record, i.e., next-key locking is
869  not used. */
870  ulint lock_type;
871  trx_t* trx;
872 
873  trx = thr_get_trx(thr);
874 
876  || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
877  lock_type = LOCK_REC_NOT_GAP;
878  } else {
879  lock_type = LOCK_ORDINARY;
880  }
881 
883  0, btr_pcur_get_block(&plan->clust_pcur),
884  clust_rec, index, offsets,
885  static_cast<lock_mode>(node->row_lock_mode), lock_type, thr);
886 
887  switch (err) {
888  case DB_SUCCESS:
890  /* Declare the variable uninitialized in Valgrind.
891  It should be set to DB_SUCCESS at func_exit. */
892  UNIV_MEM_INVALID(&err, sizeof err);
893  break;
894  default:
895  goto err_exit;
896  }
897  } else {
898  /* This is a non-locking consistent read: if necessary, fetch
899  a previous version of the record */
900 
901  old_vers = NULL;
902 
903  if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets,
904  node->read_view)) {
905 
906  err = row_sel_build_prev_vers(
907  node->read_view, index, clust_rec,
908  &offsets, &heap, &plan->old_vers_heap,
909  &old_vers, mtr);
910 
911  if (err != DB_SUCCESS) {
912 
913  goto err_exit;
914  }
915 
916  clust_rec = old_vers;
917 
918  if (clust_rec == NULL) {
919  goto func_exit;
920  }
921  }
922 
923  /* If we had to go to an earlier version of row or the
924  secondary index record is delete marked, then it may be that
925  the secondary index record corresponding to clust_rec
926  (or old_vers) is not rec; in that case we must ignore
927  such row because in our snapshot rec would not have existed.
928  Remember that from rec we cannot see directly which transaction
929  id corresponds to it: we have to go to the clustered index
930  record. A query where we want to fetch all rows where
931  the secondary index value is in some interval would return
932  a wrong result if we would not drop rows which we come to
933  visit through secondary index records that would not really
934  exist in our snapshot. */
935 
936  if ((old_vers
938  plan->table)))
939  && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index,
940  clust_rec, index)) {
941  goto func_exit;
942  }
943  }
944 
945  /* Fetch the columns needed in test conditions. The clustered
946  index record is protected by a page latch that was acquired
947  when plan->clust_pcur was positioned. The latch will not be
948  released until mtr_commit(mtr). */
949 
950  ut_ad(!rec_get_deleted_flag(clust_rec, rec_offs_comp(offsets)));
951  row_sel_fetch_columns(index, clust_rec, offsets,
952  UT_LIST_GET_FIRST(plan->columns));
953  *out_rec = clust_rec;
954 func_exit:
955  err = DB_SUCCESS;
956 err_exit:
957  if (UNIV_LIKELY_NULL(heap)) {
958  mem_heap_free(heap);
959  }
960  return(err);
961 }
962 
963 /*********************************************************************/
966 UNIV_INLINE
967 enum db_err
968 sel_set_rec_lock(
969 /*=============*/
970  const buf_block_t* block,
971  const rec_t* rec,
972  dict_index_t* index,
973  const ulint* offsets,
974  ulint mode,
975  ulint type,
977  que_thr_t* thr)
978 {
979  trx_t* trx;
980  enum db_err err;
981 
982  trx = thr_get_trx(thr);
983 
984  if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) {
986 
987  return(DB_LOCK_TABLE_FULL);
988  }
989  }
990 
991  if (dict_index_is_clust(index)) {
992  err = lock_clust_rec_read_check_and_lock(0, block, rec, index,
993  offsets, static_cast<lock_mode>(mode), type, thr);
994  } else {
995  err = lock_sec_rec_read_check_and_lock(0, block, rec, index,
996  offsets, static_cast<lock_mode>(mode), type, thr);
997  }
998 
999  return(err);
1000 }
1001 
1002 /*********************************************************************/
1004 static
1005 void
1006 row_sel_open_pcur(
1007 /*==============*/
1008  plan_t* plan,
1009  ibool search_latch_locked,
1013  mtr_t* mtr)
1014 {
1015  dict_index_t* index;
1016  func_node_t* cond;
1017  que_node_t* exp;
1018  ulint n_fields;
1019  ulint has_search_latch = 0; /* RW_S_LATCH or 0 */
1020  ulint i;
1021 
1022  if (search_latch_locked) {
1023  has_search_latch = RW_S_LATCH;
1024  }
1025 
1026  index = plan->index;
1027 
1028  /* Calculate the value of the search tuple: the exact match columns
1029  get their expressions evaluated when we evaluate the right sides of
1030  end_conds */
1031 
1032  cond = UT_LIST_GET_FIRST(plan->end_conds);
1033 
1034  while (cond) {
1036 
1037  cond = UT_LIST_GET_NEXT(cond_list, cond);
1038  }
1039 
1040  if (plan->tuple) {
1041  n_fields = dtuple_get_n_fields(plan->tuple);
1042 
1043  if (plan->n_exact_match < n_fields) {
1044  /* There is a non-exact match field which must be
1045  evaluated separately */
1046 
1047  eval_exp(plan->tuple_exps[n_fields - 1]);
1048  }
1049 
1050  for (i = 0; i < n_fields; i++) {
1051  exp = plan->tuple_exps[i];
1052 
1053  dfield_copy_data(dtuple_get_nth_field(plan->tuple, i),
1054  que_node_get_val(exp));
1055  }
1056 
1057  /* Open pcur to the index */
1058 
1059  btr_pcur_open_with_no_init(index, plan->tuple, plan->mode,
1060  BTR_SEARCH_LEAF, &plan->pcur,
1061  has_search_latch, mtr);
1062  } else {
1063  /* Open the cursor to the start or the end of the index
1064  (FALSE: no init) */
1065 
1067  &(plan->pcur), FALSE, mtr);
1068  }
1069 
1070  ut_ad(plan->n_rows_prefetched == 0);
1071  ut_ad(plan->n_rows_fetched == 0);
1072  ut_ad(plan->cursor_at_end == FALSE);
1073 
1074  plan->pcur_is_open = TRUE;
1075 }
1076 
1077 /*********************************************************************/
1083 static
1084 ibool
1085 row_sel_restore_pcur_pos(
1086 /*=====================*/
1087  plan_t* plan,
1088  mtr_t* mtr)
1089 {
1090  ibool equal_position;
1091  ulint relative_position;
1092 
1093  ut_ad(!plan->cursor_at_end);
1094 
1095  relative_position = btr_pcur_get_rel_pos(&(plan->pcur));
1096 
1097  equal_position = btr_pcur_restore_position(BTR_SEARCH_LEAF,
1098  &(plan->pcur), mtr);
1099 
1100  /* If the cursor is traveling upwards, and relative_position is
1101 
1102  (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock
1103  yet on the successor of the page infimum;
1104  (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1105  first record GREATER than the predecessor of a page supremum; we have
1106  not yet processed the cursor record: no need to move the cursor to the
1107  next record;
1108  (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1109  last record LESS or EQUAL to the old stored user record; (a) if
1110  equal_position is FALSE, this means that the cursor is now on a record
1111  less than the old user record, and we must move to the next record;
1112  (b) if equal_position is TRUE, then if
1113  plan->stored_cursor_rec_processed is TRUE, we must move to the next
1114  record, else there is no need to move the cursor. */
1115 
1116  if (plan->asc) {
1117  if (relative_position == BTR_PCUR_ON) {
1118 
1119  if (equal_position) {
1120 
1121  return(plan->stored_cursor_rec_processed);
1122  }
1123 
1124  return(TRUE);
1125  }
1126 
1127  ut_ad(relative_position == BTR_PCUR_AFTER
1128  || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1129 
1130  return(FALSE);
1131  }
1132 
1133  /* If the cursor is traveling downwards, and relative_position is
1134 
1135  (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on
1136  the last record LESS than the successor of a page infimum; we have not
1137  processed the cursor record: no need to move the cursor;
1138  (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the
1139  first record GREATER than the predecessor of a page supremum; we have
1140  processed the cursor record: we should move the cursor to the previous
1141  record;
1142  (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the
1143  last record LESS or EQUAL to the old stored user record; (a) if
1144  equal_position is FALSE, this means that the cursor is now on a record
1145  less than the old user record, and we need not move to the previous
1146  record; (b) if equal_position is TRUE, then if
1147  plan->stored_cursor_rec_processed is TRUE, we must move to the previous
1148  record, else there is no need to move the cursor. */
1149 
1150  if (relative_position == BTR_PCUR_BEFORE
1151  || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) {
1152 
1153  return(FALSE);
1154  }
1155 
1156  if (relative_position == BTR_PCUR_ON) {
1157 
1158  if (equal_position) {
1159 
1160  return(plan->stored_cursor_rec_processed);
1161  }
1162 
1163  return(FALSE);
1164  }
1165 
1166  ut_ad(relative_position == BTR_PCUR_AFTER
1167  || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE);
1168 
1169  return(TRUE);
1170 }
1171 
1172 /*********************************************************************/
1174 UNIV_INLINE
1175 void
1176 plan_reset_cursor(
1177 /*==============*/
1178  plan_t* plan)
1179 {
1180  plan->pcur_is_open = FALSE;
1181  plan->cursor_at_end = FALSE;
1182  plan->n_rows_fetched = 0;
1183  plan->n_rows_prefetched = 0;
1184 }
1185 
1186 /*********************************************************************/
1190 static
1191 ulint
1192 row_sel_try_search_shortcut(
1193 /*========================*/
1194  sel_node_t* node,
1195  plan_t* plan,
1197  mtr_t* mtr)
1198 {
1199  dict_index_t* index;
1200  rec_t* rec;
1201  mem_heap_t* heap = NULL;
1202  ulint offsets_[REC_OFFS_NORMAL_SIZE];
1203  ulint* offsets = offsets_;
1204  ulint ret;
1205  rec_offs_init(offsets_);
1206 
1207  index = plan->index;
1208 
1209  ut_ad(node->read_view);
1210  ut_ad(plan->unique_search);
1211  ut_ad(!plan->must_get_clust);
1212 #ifdef UNIV_SYNC_DEBUG
1213  ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
1214 #endif /* UNIV_SYNC_DEBUG */
1215 
1216  row_sel_open_pcur(plan, TRUE, mtr);
1217 
1218  rec = btr_pcur_get_rec(&(plan->pcur));
1219 
1220  if (!page_rec_is_user_rec(rec)) {
1221 
1222  return(SEL_RETRY);
1223  }
1224 
1225  ut_ad(plan->mode == PAGE_CUR_GE);
1226 
1227  /* As the cursor is now placed on a user record after a search with
1228  the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
1229  fields in the user record matched to the search tuple */
1230 
1231  if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) {
1232 
1233  return(SEL_EXHAUSTED);
1234  }
1235 
1236  /* This is a non-locking consistent read: if necessary, fetch
1237  a previous version of the record */
1238 
1239  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1240 
1241  if (dict_index_is_clust(index)) {
1242  if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1243  node->read_view)) {
1244  ret = SEL_RETRY;
1245  goto func_exit;
1246  }
1247  } else if (!lock_sec_rec_cons_read_sees(rec, node->read_view)) {
1248 
1249  ret = SEL_RETRY;
1250  goto func_exit;
1251  }
1252 
1253  /* Test the deleted flag. */
1254 
1255  if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) {
1256 
1257  ret = SEL_EXHAUSTED;
1258  goto func_exit;
1259  }
1260 
1261  /* Fetch the columns needed in test conditions. The index
1262  record is protected by a page latch that was acquired when
1263  plan->pcur was positioned. The latch will not be released
1264  until mtr_commit(mtr). */
1265 
1266  row_sel_fetch_columns(index, rec, offsets,
1267  UT_LIST_GET_FIRST(plan->columns));
1268 
1269  /* Test the rest of search conditions */
1270 
1271  if (!row_sel_test_other_conds(plan)) {
1272 
1273  ret = SEL_EXHAUSTED;
1274  goto func_exit;
1275  }
1276 
1278 
1279  plan->n_rows_fetched++;
1280  ret = SEL_FOUND;
1281 func_exit:
1282  if (UNIV_LIKELY_NULL(heap)) {
1283  mem_heap_free(heap);
1284  }
1285  return(ret);
1286 }
1287 
1288 /*********************************************************************/
1291 static
1292 ulint
1293 row_sel(
1294 /*====*/
1295  sel_node_t* node,
1296  que_thr_t* thr)
1297 {
1298  dict_index_t* index;
1299  plan_t* plan;
1300  mtr_t mtr;
1301  ibool moved;
1302  rec_t* rec;
1303  rec_t* old_vers;
1304  rec_t* clust_rec;
1305  ibool search_latch_locked;
1306  ibool consistent_read;
1307 
1308  /* The following flag becomes TRUE when we are doing a
1309  consistent read from a non-clustered index and we must look
1310  at the clustered index to find out the previous delete mark
1311  state of the non-clustered record: */
1312 
1313  ibool cons_read_requires_clust_rec = FALSE;
1314  ulint cost_counter = 0;
1315  ibool cursor_just_opened;
1316  ibool must_go_to_next;
1317  ibool mtr_has_extra_clust_latch = FALSE;
1318  /* TRUE if the search was made using
1319  a non-clustered index, and we had to
1320  access the clustered record: now &mtr
1321  contains a clustered index latch, and
1322  &mtr must be committed before we move
1323  to the next non-clustered record */
1324  ulint found_flag;
1325  ulint err;
1326  mem_heap_t* heap = NULL;
1327  ulint offsets_[REC_OFFS_NORMAL_SIZE];
1328  ulint* offsets = offsets_;
1329  rec_offs_init(offsets_);
1330 
1331  ut_ad(thr->run_node == node);
1332 
1333  search_latch_locked = FALSE;
1334 
1335  if (node->read_view) {
1336  /* In consistent reads, we try to do with the hash index and
1337  not to use the buffer page get. This is to reduce memory bus
1338  load resulting from semaphore operations. The search latch
1339  will be s-locked when we access an index with a unique search
1340  condition, but not locked when we access an index with a
1341  less selective search condition. */
1342 
1343  consistent_read = TRUE;
1344  } else {
1345  consistent_read = FALSE;
1346  }
1347 
1348 table_loop:
1349  /* TABLE LOOP
1350  ----------
1351  This is the outer major loop in calculating a join. We come here when
1352  node->fetch_table changes, and after adding a row to aggregate totals
1353  and, of course, when this function is called. */
1354 
1355  ut_ad(mtr_has_extra_clust_latch == FALSE);
1356 
1357  plan = sel_node_get_nth_plan(node, node->fetch_table);
1358  index = plan->index;
1359 
1360  if (plan->n_rows_prefetched > 0) {
1361  sel_pop_prefetched_row(plan);
1362 
1363  goto next_table_no_mtr;
1364  }
1365 
1366  if (plan->cursor_at_end) {
1367  /* The cursor has already reached the result set end: no more
1368  rows to process for this table cursor, as also the prefetch
1369  stack was empty */
1370 
1371  ut_ad(plan->pcur_is_open);
1372 
1373  goto table_exhausted_no_mtr;
1374  }
1375 
1376  /* Open a cursor to index, or restore an open cursor position */
1377 
1378  mtr_start(&mtr);
1379 
1380  if (consistent_read && plan->unique_search && !plan->pcur_is_open
1381  && !plan->must_get_clust
1382  && !plan->table->big_rows) {
1383  if (!search_latch_locked) {
1385 
1386  search_latch_locked = TRUE;
1387  } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
1388 
1389  /* There is an x-latch request waiting: release the
1390  s-latch for a moment; as an s-latch here is often
1391  kept for some 10 searches before being released,
1392  a waiting x-latch request would block other threads
1393  from acquiring an s-latch for a long time, lowering
1394  performance significantly in multiprocessors. */
1395 
1396  rw_lock_s_unlock(&btr_search_latch);
1398  }
1399 
1400  found_flag = row_sel_try_search_shortcut(node, plan, &mtr);
1401 
1402  if (found_flag == SEL_FOUND) {
1403 
1404  goto next_table;
1405 
1406  } else if (found_flag == SEL_EXHAUSTED) {
1407 
1408  goto table_exhausted;
1409  }
1410 
1411  ut_ad(found_flag == SEL_RETRY);
1412 
1413  plan_reset_cursor(plan);
1414 
1415  mtr_commit(&mtr);
1416  mtr_start(&mtr);
1417  }
1418 
1419  if (search_latch_locked) {
1420  rw_lock_s_unlock(&btr_search_latch);
1421 
1422  search_latch_locked = FALSE;
1423  }
1424 
1425  if (!plan->pcur_is_open) {
1426  /* Evaluate the expressions to build the search tuple and
1427  open the cursor */
1428 
1429  row_sel_open_pcur(plan, search_latch_locked, &mtr);
1430 
1431  cursor_just_opened = TRUE;
1432 
1433  /* A new search was made: increment the cost counter */
1434  cost_counter++;
1435  } else {
1436  /* Restore pcur position to the index */
1437 
1438  must_go_to_next = row_sel_restore_pcur_pos(plan, &mtr);
1439 
1440  cursor_just_opened = FALSE;
1441 
1442  if (must_go_to_next) {
1443  /* We have already processed the cursor record: move
1444  to the next */
1445 
1446  goto next_rec;
1447  }
1448  }
1449 
1450 rec_loop:
1451  /* RECORD LOOP
1452  -----------
1453  In this loop we use pcur and try to fetch a qualifying row, and
1454  also fill the prefetch buffer for this table if n_rows_fetched has
1455  exceeded a threshold. While we are inside this loop, the following
1456  holds:
1457  (1) &mtr is started,
1458  (2) pcur is positioned and open.
1459 
1460  NOTE that if cursor_just_opened is TRUE here, it means that we came
1461  to this point right after row_sel_open_pcur. */
1462 
1463  ut_ad(mtr_has_extra_clust_latch == FALSE);
1464 
1465  rec = btr_pcur_get_rec(&(plan->pcur));
1466 
1467  /* PHASE 1: Set a lock if specified */
1468 
1469  if (!node->asc && cursor_just_opened
1470  && !page_rec_is_supremum(rec)) {
1471 
1472  /* When we open a cursor for a descending search, we must set
1473  a next-key lock on the successor record: otherwise it would
1474  be possible to insert new records next to the cursor position,
1475  and it might be that these new records should appear in the
1476  search result set, resulting in the phantom problem. */
1477 
1478  if (!consistent_read) {
1479 
1480  /* If innodb_locks_unsafe_for_binlog option is used
1481  or this session is using READ COMMITTED isolation
1482  level, we lock only the record, i.e., next-key
1483  locking is not used. */
1484 
1485  rec_t* next_rec = page_rec_get_next(rec);
1486  ulint lock_type;
1487  trx_t* trx;
1488 
1489  trx = thr_get_trx(thr);
1490 
1491  offsets = rec_get_offsets(next_rec, index, offsets,
1492  ULINT_UNDEFINED, &heap);
1493 
1495  || trx->isolation_level
1496  <= TRX_ISO_READ_COMMITTED) {
1497 
1498  if (page_rec_is_supremum(next_rec)) {
1499 
1500  goto skip_lock;
1501  }
1502 
1503  lock_type = LOCK_REC_NOT_GAP;
1504  } else {
1505  lock_type = LOCK_ORDINARY;
1506  }
1507 
1508  err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
1509  next_rec, index, offsets,
1510  node->row_lock_mode,
1511  lock_type, thr);
1512 
1513  switch (err) {
1514  case DB_SUCCESS_LOCKED_REC:
1515  err = DB_SUCCESS;
1516  case DB_SUCCESS:
1517  break;
1518  default:
1519  /* Note that in this case we will store in pcur
1520  the PREDECESSOR of the record we are waiting
1521  the lock for */
1522  goto lock_wait_or_error;
1523  }
1524  }
1525  }
1526 
1527 skip_lock:
1528  if (page_rec_is_infimum(rec)) {
1529 
1530  /* The infimum record on a page cannot be in the result set,
1531  and neither can a record lock be placed on it: we skip such
1532  a record. We also increment the cost counter as we may have
1533  processed yet another page of index. */
1534 
1535  cost_counter++;
1536 
1537  goto next_rec;
1538  }
1539 
1540  if (!consistent_read) {
1541  /* Try to place a lock on the index record */
1542 
1543  /* If innodb_locks_unsafe_for_binlog option is used
1544  or this session is using READ COMMITTED isolation level,
1545  we lock only the record, i.e., next-key locking is
1546  not used. */
1547 
1548  ulint lock_type;
1549  trx_t* trx;
1550 
1551  offsets = rec_get_offsets(rec, index, offsets,
1552  ULINT_UNDEFINED, &heap);
1553 
1554  trx = thr_get_trx(thr);
1555 
1557  || trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
1558 
1559  if (page_rec_is_supremum(rec)) {
1560 
1561  goto next_rec;
1562  }
1563 
1564  lock_type = LOCK_REC_NOT_GAP;
1565  } else {
1566  lock_type = LOCK_ORDINARY;
1567  }
1568 
1569  err = sel_set_rec_lock(btr_pcur_get_block(&plan->pcur),
1570  rec, index, offsets,
1571  node->row_lock_mode, lock_type, thr);
1572 
1573  switch (err) {
1574  case DB_SUCCESS_LOCKED_REC:
1575  err = DB_SUCCESS;
1576  case DB_SUCCESS:
1577  break;
1578  default:
1579  goto lock_wait_or_error;
1580  }
1581  }
1582 
1583  if (page_rec_is_supremum(rec)) {
1584 
1585  /* A page supremum record cannot be in the result set: skip
1586  it now when we have placed a possible lock on it */
1587 
1588  goto next_rec;
1589  }
1590 
1592 
1593  if (cost_counter > SEL_COST_LIMIT) {
1594 
1595  /* Now that we have placed the necessary locks, we can stop
1596  for a while and store the cursor position; NOTE that if we
1597  would store the cursor position BEFORE placing a record lock,
1598  it might happen that the cursor would jump over some records
1599  that another transaction could meanwhile insert adjacent to
1600  the cursor: this would result in the phantom problem. */
1601 
1602  goto stop_for_a_while;
1603  }
1604 
1605  /* PHASE 2: Check a mixed index mix id if needed */
1606 
1607  if (plan->unique_search && cursor_just_opened) {
1608 
1609  ut_ad(plan->mode == PAGE_CUR_GE);
1610 
1611  /* As the cursor is now placed on a user record after a search
1612  with the mode PAGE_CUR_GE, the up_match field in the cursor
1613  tells how many fields in the user record matched to the search
1614  tuple */
1615 
1616  if (btr_pcur_get_up_match(&(plan->pcur))
1617  < plan->n_exact_match) {
1618  goto table_exhausted;
1619  }
1620 
1621  /* Ok, no need to test end_conds or mix id */
1622 
1623  }
1624 
1625  /* We are ready to look at a possible new index entry in the result
1626  set: the cursor is now placed on a user record */
1627 
1628  /* PHASE 3: Get previous version in a consistent read */
1629 
1630  cons_read_requires_clust_rec = FALSE;
1631  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
1632 
1633  if (consistent_read) {
1634  /* This is a non-locking consistent read: if necessary, fetch
1635  a previous version of the record */
1636 
1637  if (dict_index_is_clust(index)) {
1638 
1639  if (!lock_clust_rec_cons_read_sees(rec, index, offsets,
1640  node->read_view)) {
1641 
1642  err = row_sel_build_prev_vers(
1643  node->read_view, index, rec,
1644  &offsets, &heap, &plan->old_vers_heap,
1645  &old_vers, &mtr);
1646 
1647  if (err != DB_SUCCESS) {
1648 
1649  goto lock_wait_or_error;
1650  }
1651 
1652  if (old_vers == NULL) {
1653  /* The record does not exist
1654  in our read view. Skip it, but
1655  first attempt to determine
1656  whether the index segment we
1657  are searching through has been
1658  exhausted. */
1659 
1660  offsets = rec_get_offsets(
1661  rec, index, offsets,
1662  ULINT_UNDEFINED, &heap);
1663 
1664  /* Fetch the columns needed in
1665  test conditions. The clustered
1666  index record is protected by a
1667  page latch that was acquired
1668  by row_sel_open_pcur() or
1669  row_sel_restore_pcur_pos().
1670  The latch will not be released
1671  until mtr_commit(mtr). */
1672 
1673  row_sel_fetch_columns(
1674  index, rec, offsets,
1676  plan->columns));
1677 
1678  if (!row_sel_test_end_conds(plan)) {
1679 
1680  goto table_exhausted;
1681  }
1682 
1683  goto next_rec;
1684  }
1685 
1686  rec = old_vers;
1687  }
1688  } else if (!lock_sec_rec_cons_read_sees(rec,
1689  node->read_view)) {
1690  cons_read_requires_clust_rec = TRUE;
1691  }
1692  }
1693 
1694  /* PHASE 4: Test search end conditions and deleted flag */
1695 
1696  /* Fetch the columns needed in test conditions. The record is
1697  protected by a page latch that was acquired by
1698  row_sel_open_pcur() or row_sel_restore_pcur_pos(). The latch
1699  will not be released until mtr_commit(mtr). */
1700 
1701  row_sel_fetch_columns(index, rec, offsets,
1702  UT_LIST_GET_FIRST(plan->columns));
1703 
1704  /* Test the selection end conditions: these can only contain columns
1705  which already are found in the index, even though the index might be
1706  non-clustered */
1707 
1708  if (plan->unique_search && cursor_just_opened) {
1709 
1710  /* No test necessary: the test was already made above */
1711 
1712  } else if (!row_sel_test_end_conds(plan)) {
1713 
1714  goto table_exhausted;
1715  }
1716 
1718  && !cons_read_requires_clust_rec) {
1719 
1720  /* The record is delete marked: we can skip it if this is
1721  not a consistent read which might see an earlier version
1722  of a non-clustered index record */
1723 
1724  if (plan->unique_search) {
1725 
1726  goto table_exhausted;
1727  }
1728 
1729  goto next_rec;
1730  }
1731 
1732  /* PHASE 5: Get the clustered index record, if needed and if we did
1733  not do the search using the clustered index */
1734 
1735  if (plan->must_get_clust || cons_read_requires_clust_rec) {
1736 
1737  /* It was a non-clustered index and we must fetch also the
1738  clustered index record */
1739 
1740  err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec,
1741  &mtr);
1742  mtr_has_extra_clust_latch = TRUE;
1743 
1744  switch (err) {
1745  case DB_SUCCESS_LOCKED_REC:
1746  err = DB_SUCCESS;
1747  case DB_SUCCESS:
1748  break;
1749  default:
1750  goto lock_wait_or_error;
1751  }
1752 
1753  /* Retrieving the clustered record required a search:
1754  increment the cost counter */
1755 
1756  cost_counter++;
1757 
1758  if (clust_rec == NULL) {
1759  /* The record did not exist in the read view */
1760  ut_ad(consistent_read);
1761 
1762  goto next_rec;
1763  }
1764 
1765  if (rec_get_deleted_flag(clust_rec,
1766  dict_table_is_comp(plan->table))) {
1767 
1768  /* The record is delete marked: we can skip it */
1769 
1770  goto next_rec;
1771  }
1772 
1773  if (node->can_get_updated) {
1774 
1775  btr_pcur_store_position(&(plan->clust_pcur), &mtr);
1776  }
1777  }
1778 
1779  /* PHASE 6: Test the rest of search conditions */
1780 
1781  if (!row_sel_test_other_conds(plan)) {
1782 
1783  if (plan->unique_search) {
1784 
1785  goto table_exhausted;
1786  }
1787 
1788  goto next_rec;
1789  }
1790 
1791  /* PHASE 7: We found a new qualifying row for the current table; push
1792  the row if prefetch is on, or move to the next table in the join */
1793 
1794  plan->n_rows_fetched++;
1795 
1797 
1798  if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT)
1799  || plan->unique_search || plan->no_prefetch
1800  || plan->table->big_rows) {
1801 
1802  /* No prefetch in operation: go to the next table */
1803 
1804  goto next_table;
1805  }
1806 
1807  sel_push_prefetched_row(plan);
1808 
1809  if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) {
1810 
1811  /* The prefetch buffer is now full */
1812 
1813  sel_pop_prefetched_row(plan);
1814 
1815  goto next_table;
1816  }
1817 
1818 next_rec:
1819  ut_ad(!search_latch_locked);
1820 
1821  if (mtr_has_extra_clust_latch) {
1822 
1823  /* We must commit &mtr if we are moving to the next
1824  non-clustered index record, because we could break the
1825  latching order if we would access a different clustered
1826  index page right away without releasing the previous. */
1827 
1828  goto commit_mtr_for_a_while;
1829  }
1830 
1831  if (node->asc) {
1832  moved = btr_pcur_move_to_next(&(plan->pcur), &mtr);
1833  } else {
1834  moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr);
1835  }
1836 
1837  if (!moved) {
1838 
1839  goto table_exhausted;
1840  }
1841 
1842  cursor_just_opened = FALSE;
1843 
1844  /* END OF RECORD LOOP
1845  ------------------ */
1846  goto rec_loop;
1847 
1848 next_table:
1849  /* We found a record which satisfies the conditions: we can move to
1850  the next table or return a row in the result set */
1851 
1853 
1854  if (plan->unique_search && !node->can_get_updated) {
1855 
1856  plan->cursor_at_end = TRUE;
1857  } else {
1858  ut_ad(!search_latch_locked);
1859 
1860  plan->stored_cursor_rec_processed = TRUE;
1861 
1862  btr_pcur_store_position(&(plan->pcur), &mtr);
1863  }
1864 
1865  mtr_commit(&mtr);
1866 
1867  mtr_has_extra_clust_latch = FALSE;
1868 
1869 next_table_no_mtr:
1870  /* If we use 'goto' to this label, it means that the row was popped
1871  from the prefetched rows stack, and &mtr is already committed */
1872 
1873  if (node->fetch_table + 1 == node->n_tables) {
1874 
1875  sel_eval_select_list(node);
1876 
1877  if (node->is_aggregate) {
1878 
1879  goto table_loop;
1880  }
1881 
1882  sel_assign_into_var_values(node->into_list, node);
1883 
1884  thr->run_node = que_node_get_parent(node);
1885 
1886  err = DB_SUCCESS;
1887  goto func_exit;
1888  }
1889 
1890  node->fetch_table++;
1891 
1892  /* When we move to the next table, we first reset the plan cursor:
1893  we do not care about resetting it when we backtrack from a table */
1894 
1895  plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table));
1896 
1897  goto table_loop;
1898 
1899 table_exhausted:
1900  /* The table cursor pcur reached the result set end: backtrack to the
1901  previous table in the join if we do not have cached prefetched rows */
1902 
1903  plan->cursor_at_end = TRUE;
1904 
1905  mtr_commit(&mtr);
1906 
1907  mtr_has_extra_clust_latch = FALSE;
1908 
1909  if (plan->n_rows_prefetched > 0) {
1910  /* The table became exhausted during a prefetch */
1911 
1912  sel_pop_prefetched_row(plan);
1913 
1914  goto next_table_no_mtr;
1915  }
1916 
1917 table_exhausted_no_mtr:
1918  if (node->fetch_table == 0) {
1919  err = DB_SUCCESS;
1920 
1921  if (node->is_aggregate && !node->aggregate_already_fetched) {
1922 
1923  node->aggregate_already_fetched = TRUE;
1924 
1925  sel_assign_into_var_values(node->into_list, node);
1926 
1927  thr->run_node = que_node_get_parent(node);
1928  } else {
1929  node->state = SEL_NODE_NO_MORE_ROWS;
1930 
1931  thr->run_node = que_node_get_parent(node);
1932  }
1933 
1934  err = DB_SUCCESS;
1935  goto func_exit;
1936  }
1937 
1938  node->fetch_table--;
1939 
1940  goto table_loop;
1941 
1942 stop_for_a_while:
1943  /* Return control for a while to que_run_threads, so that runaway
1944  queries can be canceled. NOTE that when we come here, we must, in a
1945  locking read, have placed the necessary (possibly waiting request)
1946  record lock on the cursor record or its successor: when we reposition
1947  the cursor, this record lock guarantees that nobody can meanwhile have
1948  inserted new records which should have appeared in the result set,
1949  which would result in the phantom problem. */
1950 
1951  ut_ad(!search_latch_locked);
1952 
1953  plan->stored_cursor_rec_processed = FALSE;
1954  btr_pcur_store_position(&(plan->pcur), &mtr);
1955 
1956  mtr_commit(&mtr);
1957 
1958 #ifdef UNIV_SYNC_DEBUG
1959  ut_ad(sync_thread_levels_empty_gen(TRUE));
1960 #endif /* UNIV_SYNC_DEBUG */
1961  err = DB_SUCCESS;
1962  goto func_exit;
1963 
1964 commit_mtr_for_a_while:
1965  /* Stores the cursor position and commits &mtr; this is used if
1966  &mtr may contain latches which would break the latching order if
1967  &mtr would not be committed and the latches released. */
1968 
1969  plan->stored_cursor_rec_processed = TRUE;
1970 
1971  ut_ad(!search_latch_locked);
1972  btr_pcur_store_position(&(plan->pcur), &mtr);
1973 
1974  mtr_commit(&mtr);
1975 
1976  mtr_has_extra_clust_latch = FALSE;
1977 
1978 #ifdef UNIV_SYNC_DEBUG
1979  ut_ad(sync_thread_levels_empty_gen(TRUE));
1980 #endif /* UNIV_SYNC_DEBUG */
1981 
1982  goto table_loop;
1983 
1984 lock_wait_or_error:
1985  /* See the note at stop_for_a_while: the same holds for this case */
1986 
1987  ut_ad(!btr_pcur_is_before_first_on_page(&plan->pcur) || !node->asc);
1988  ut_ad(!search_latch_locked);
1989 
1990  plan->stored_cursor_rec_processed = FALSE;
1991  btr_pcur_store_position(&(plan->pcur), &mtr);
1992 
1993  mtr_commit(&mtr);
1994 
1995 #ifdef UNIV_SYNC_DEBUG
1996  ut_ad(sync_thread_levels_empty_gen(TRUE));
1997 #endif /* UNIV_SYNC_DEBUG */
1998 
1999 func_exit:
2000  if (search_latch_locked) {
2001  rw_lock_s_unlock(&btr_search_latch);
2002  }
2003  if (UNIV_LIKELY_NULL(heap)) {
2004  mem_heap_free(heap);
2005  }
2006  return(err);
2007 }
2008 
2009 /**********************************************************************/
2013 UNIV_INTERN
2014 que_thr_t*
2016 /*=========*/
2017  que_thr_t* thr)
2018 {
2019  ulint i_lock_mode;
2020  sym_node_t* table_node;
2021  sel_node_t* node;
2022  ulint err;
2023 
2024  ut_ad(thr);
2025 
2026  node = static_cast<sel_node_t *>(thr->run_node);
2027 
2028  ut_ad(que_node_get_type(node) == QUE_NODE_SELECT);
2029 
2030  /* If this is a new time this node is executed (or when execution
2031  resumes after wait for a table intention lock), set intention locks
2032  on the tables, or assign a read view */
2033 
2034  if (node->into_list && (thr->prev_node == que_node_get_parent(node))) {
2035 
2036  node->state = SEL_NODE_OPEN;
2037  }
2038 
2039  if (node->state == SEL_NODE_OPEN) {
2040 
2041  /* It may be that the current session has not yet started
2042  its transaction, or it has been committed: */
2043 
2045 
2046  plan_reset_cursor(sel_node_get_nth_plan(node, 0));
2047 
2048  if (node->consistent_read) {
2049  /* Assign a read view for the query */
2051  thr_get_trx(thr));
2052  } else {
2053  if (node->set_x_locks) {
2054  i_lock_mode = LOCK_IX;
2055  } else {
2056  i_lock_mode = LOCK_IS;
2057  }
2058 
2059  table_node = node->table_list;
2060 
2061  while (table_node) {
2062  err = lock_table(0, table_node->table,
2063  static_cast<lock_mode>(i_lock_mode), thr);
2064  if (err != DB_SUCCESS) {
2065  thr_get_trx(thr)->error_state = err;
2066 
2067  return(NULL);
2068  }
2069 
2070  table_node = static_cast<sym_node_t *>(que_node_get_next(table_node));
2071  }
2072  }
2073 
2074  /* If this is an explicit cursor, copy stored procedure
2075  variable values, so that the values cannot change between
2076  fetches (currently, we copy them also for non-explicit
2077  cursors) */
2078 
2079  if (node->explicit_cursor
2080  && UT_LIST_GET_FIRST(node->copy_variables)) {
2081 
2082  row_sel_copy_input_variable_vals(node);
2083  }
2084 
2085  node->state = SEL_NODE_FETCH;
2086  node->fetch_table = 0;
2087 
2088  if (node->is_aggregate) {
2089  /* Reset the aggregate total values */
2090  sel_reset_aggregate_vals(node);
2091  }
2092 
2093  err = DB_SUCCESS;
2094  }
2095 
2096  err = row_sel(node, thr);
2097 
2098  /* NOTE! if queries are parallelized, the following assignment may
2099  have problems; the assignment should be made only if thr is the
2100  only top-level thr in the graph: */
2101 
2102  thr->graph->last_sel_node = node;
2103 
2104  if (err != DB_SUCCESS) {
2105  thr_get_trx(thr)->error_state = err;
2106 
2107  return(NULL);
2108  }
2109 
2110  return(thr);
2111 }
2112 
2113 /**********************************************************************/
2116 UNIV_INTERN
2117 que_thr_t*
2119 /*=======*/
2120  que_thr_t* thr)
2121 {
2122  sel_node_t* sel_node;
2123  fetch_node_t* node;
2124 
2125  ut_ad(thr);
2126 
2127  node = static_cast<fetch_node_t *>(thr->run_node);
2128  sel_node = node->cursor_def;
2129 
2130  ut_ad(que_node_get_type(node) == QUE_NODE_FETCH);
2131 
2132  if (thr->prev_node != que_node_get_parent(node)) {
2133 
2134  if (sel_node->state != SEL_NODE_NO_MORE_ROWS) {
2135 
2136  if (node->into_list) {
2137  sel_assign_into_var_values(node->into_list,
2138  sel_node);
2139  } else {
2140  void* ret = (*node->func->func)(
2141  sel_node, node->func->arg);
2142 
2143  if (!ret) {
2144  sel_node->state
2146  }
2147  }
2148  }
2149 
2150  thr->run_node = que_node_get_parent(node);
2151 
2152  return(thr);
2153  }
2154 
2155  /* Make the fetch node the parent of the cursor definition for
2156  the time of the fetch, so that execution knows to return to this
2157  fetch node after a row has been selected or we know that there is
2158  no row left */
2159 
2160  sel_node->common.parent = node;
2161 
2162  if (sel_node->state == SEL_NODE_CLOSED) {
2163  fprintf(stderr,
2164  "InnoDB: Error: fetch called on a closed cursor\n");
2165 
2166  thr_get_trx(thr)->error_state = DB_ERROR;
2167 
2168  return(NULL);
2169  }
2170 
2171  thr->run_node = sel_node;
2172 
2173  return(thr);
2174 }
2175 
2176 /****************************************************************/
2179 UNIV_INTERN
2180 void*
2182 /*============*/
2183  void* row,
2184  void* user_arg)
2185 {
2186  sel_node_t *node = static_cast<sel_node_t *>(row);
2187  que_node_t* exp;
2188  ulint i = 0;
2189 
2190  UT_NOT_USED(user_arg);
2191 
2192  fprintf(stderr, "row_fetch_print: row %p\n", row);
2193 
2194  exp = node->select_list;
2195 
2196  while (exp) {
2197  dfield_t* dfield = que_node_get_val(exp);
2198  const dtype_t* type = dfield_get_type(dfield);
2199 
2200  fprintf(stderr, " column %lu:\n", (ulong)i);
2201 
2202  dtype_print(type);
2203  putc('\n', stderr);
2204 
2205  if (dfield_get_len(dfield) != UNIV_SQL_NULL) {
2206  ut_print_buf(stderr, dfield_get_data(dfield),
2207  dfield_get_len(dfield));
2208  putc('\n', stderr);
2209  } else {
2210  fputs(" <NULL>;\n", stderr);
2211  }
2212 
2213  exp = que_node_get_next(exp);
2214  i++;
2215  }
2216 
2217  return((void*)42);
2218 }
2219 
2220 /***********************************************************/
2223 UNIV_INTERN
2224 que_thr_t*
2226 /*============*/
2227  que_thr_t* thr)
2228 {
2229  row_printf_node_t* node;
2230  sel_node_t* sel_node;
2231  que_node_t* arg;
2232 
2233  ut_ad(thr);
2234 
2235  node = static_cast<row_printf_node_t *>(thr->run_node);
2236 
2237  sel_node = node->sel_node;
2238 
2239  ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF);
2240 
2241  if (thr->prev_node == que_node_get_parent(node)) {
2242 
2243  /* Reset the cursor */
2244  sel_node->state = SEL_NODE_OPEN;
2245 
2246  /* Fetch next row to print */
2247 
2248  thr->run_node = sel_node;
2249 
2250  return(thr);
2251  }
2252 
2253  if (sel_node->state != SEL_NODE_FETCH) {
2254 
2255  ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS);
2256 
2257  /* No more rows to print */
2258 
2259  thr->run_node = que_node_get_parent(node);
2260 
2261  return(thr);
2262  }
2263 
2264  arg = sel_node->select_list;
2265 
2266  while (arg) {
2268 
2269  fputs(" ::: ", stderr);
2270 
2271  arg = que_node_get_next(arg);
2272  }
2273 
2274  putc('\n', stderr);
2275 
2276  /* Fetch next row to print */
2277 
2278  thr->run_node = sel_node;
2279 
2280  return(thr);
2281 }
2282 
2283 /****************************************************************/
2290 UNIV_INTERN
2291 void
2293 /*==================================*/
2294  dtuple_t* tuple,
2298  byte* buf,
2300  ulint buf_len,
2301  dict_index_t* index,
2302  const byte* key_ptr,
2303  ulint key_len,
2304  trx_t* trx)
2305 {
2306  byte* original_buf = buf;
2307  const byte* original_key_ptr = key_ptr;
2308  dict_field_t* field;
2309  dfield_t* dfield;
2310  ulint data_offset;
2311  ulint data_len;
2312  ulint data_field_len;
2313  ibool is_null;
2314  const byte* key_end;
2315  ulint n_fields = 0;
2316 
2317  /* For documentation of the key value storage format in MySQL, see
2318  ha_innobase::store_key_val_for_row() in ha_innodb.cc. */
2319 
2320  key_end = key_ptr + key_len;
2321 
2322  /* Permit us to access any field in the tuple (ULINT_MAX): */
2323 
2324  dtuple_set_n_fields(tuple, ULINT_MAX);
2325 
2326  dfield = dtuple_get_nth_field(tuple, 0);
2327  field = dict_index_get_nth_field(index, 0);
2328 
2329  if (UNIV_UNLIKELY(dfield_get_type(dfield)->mtype == DATA_SYS)) {
2330  /* A special case: we are looking for a position in the
2331  generated clustered index which InnoDB automatically added
2332  to a table with no primary key: the first and the only
2333  ordering column is ROW_ID which InnoDB stored to the key_ptr
2334  buffer. */
2335 
2336  ut_a(key_len == DATA_ROW_ID_LEN);
2337 
2338  dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN);
2339 
2340  dtuple_set_n_fields(tuple, 1);
2341 
2342  return;
2343  }
2344 
2345  while (key_ptr < key_end) {
2346 
2347  ulint type = dfield_get_type(dfield)->mtype;
2348  ut_a(field->col->mtype == type);
2349 
2350  data_offset = 0;
2351  is_null = FALSE;
2352 
2353  if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) {
2354  /* The first byte in the field tells if this is
2355  an SQL NULL value */
2356 
2357  data_offset = 1;
2358 
2359  if (*key_ptr != 0) {
2360  dfield_set_null(dfield);
2361 
2362  is_null = TRUE;
2363  }
2364  }
2365 
2366  /* Calculate data length and data field total length */
2367 
2368  if (type == DATA_BLOB) {
2369  /* The key field is a column prefix of a BLOB or
2370  TEXT */
2371 
2372  ut_a(field->prefix_len > 0);
2373 
2374  /* MySQL stores the actual data length to the first 2
2375  bytes after the optional SQL NULL marker byte. The
2376  storage format is little-endian, that is, the most
2377  significant byte at a higher address. In UTF-8, MySQL
2378  seems to reserve field->prefix_len bytes for
2379  storing this field in the key value buffer, even
2380  though the actual value only takes data_len bytes
2381  from the start. */
2382 
2383  data_len = key_ptr[data_offset]
2384  + 256 * key_ptr[data_offset + 1];
2385  data_field_len = data_offset + 2 + field->prefix_len;
2386 
2387  data_offset += 2;
2388 
2389  /* Now that we know the length, we store the column
2390  value like it would be a fixed char field */
2391 
2392  } else if (field->prefix_len > 0) {
2393  /* Looks like MySQL pads unused end bytes in the
2394  prefix with space. Therefore, also in UTF-8, it is ok
2395  to compare with a prefix containing full prefix_len
2396  bytes, and no need to take at most prefix_len / 3
2397  UTF-8 characters from the start.
2398  If the prefix is used as the upper end of a LIKE
2399  'abc%' query, then MySQL pads the end with chars
2400  0xff. TODO: in that case does it any harm to compare
2401  with the full prefix_len bytes. How do characters
2402  0xff in UTF-8 behave? */
2403 
2404  data_len = field->prefix_len;
2405  data_field_len = data_offset + data_len;
2406  } else {
2407  data_len = dfield_get_type(dfield)->len;
2408  data_field_len = data_offset + data_len;
2409  }
2410 
2411  if (UNIV_UNLIKELY
2412  (dtype_get_mysql_type(dfield_get_type(dfield))
2413  == DATA_MYSQL_TRUE_VARCHAR)
2414  && UNIV_LIKELY(type != DATA_INT)) {
2415  /* In a MySQL key value format, a true VARCHAR is
2416  always preceded by 2 bytes of a length field.
2417  dfield_get_type(dfield)->len returns the maximum
2418  'payload' len in bytes. That does not include the
2419  2 bytes that tell the actual data length.
2420 
2421  We added the check != DATA_INT to make sure we do
2422  not treat MySQL ENUM or SET as a true VARCHAR! */
2423 
2424  data_len += 2;
2425  data_field_len += 2;
2426  }
2427 
2428  /* Storing may use at most data_len bytes of buf */
2429 
2430  if (UNIV_LIKELY(!is_null)) {
2432  dfield, buf,
2433  FALSE, /* MySQL key value format col */
2434  key_ptr + data_offset, data_len,
2435  dict_table_is_comp(index->table));
2436  buf += data_len;
2437  }
2438 
2439  key_ptr += data_field_len;
2440 
2441  if (UNIV_UNLIKELY(key_ptr > key_end)) {
2442  /* The last field in key was not a complete key field
2443  but a prefix of it.
2444 
2445  Print a warning about this! HA_READ_PREFIX_LAST does
2446  not currently work in InnoDB with partial-field key
2447  value prefixes. Since MySQL currently uses a padding
2448  trick to calculate LIKE 'abc%' type queries there
2449  should never be partial-field prefixes in searches. */
2450 
2451  ut_print_timestamp(stderr);
2452 
2453  fputs(" InnoDB: Warning: using a partial-field"
2454  " key prefix in search.\n"
2455  "InnoDB: ", stderr);
2456  dict_index_name_print(stderr, trx, index);
2457  fprintf(stderr, ". Last data field length %lu bytes,\n"
2458  "InnoDB: key ptr now exceeds"
2459  " key end by %lu bytes.\n"
2460  "InnoDB: Key value in the MySQL format:\n",
2461  (ulong) data_field_len,
2462  (ulong) (key_ptr - key_end));
2463  fflush(stderr);
2464  ut_print_buf(stderr, original_key_ptr, key_len);
2465  putc('\n', stderr);
2466 
2467  if (!is_null) {
2468  ulint len = dfield_get_len(dfield);
2469  dfield_set_len(dfield, len
2470  - (ulint) (key_ptr - key_end));
2471  }
2472  }
2473 
2474  n_fields++;
2475  field++;
2476  dfield++;
2477  }
2478 
2479  ut_a(buf <= original_buf + buf_len);
2480 
2481  /* We set the length of tuple to n_fields: we assume that the memory
2482  area allocated for it is big enough (usually bigger than n_fields). */
2483 
2484  dtuple_set_n_fields(tuple, n_fields);
2485 }
2486 
2487 /**************************************************************/
2489 static
2490 void
2491 row_sel_store_row_id_to_prebuilt(
2492 /*=============================*/
2493  row_prebuilt_t* prebuilt,
2494  const rec_t* index_rec,
2495  const dict_index_t* index,
2496  const ulint* offsets)
2498 {
2499  const byte* data;
2500  ulint len;
2501 
2502  ut_ad(rec_offs_validate(index_rec, index, offsets));
2503 
2504  data = rec_get_nth_field(
2505  index_rec, offsets,
2506  dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len);
2507 
2508  if (UNIV_UNLIKELY(len != DATA_ROW_ID_LEN)) {
2509  fprintf(stderr,
2510  "InnoDB: Error: Row id field is"
2511  " wrong length %lu in ", (ulong) len);
2512  dict_index_name_print(stderr, prebuilt->trx, index);
2513  fprintf(stderr, "\n"
2514  "InnoDB: Field number %lu, record:\n",
2515  (ulong) dict_index_get_sys_col_pos(index,
2516  DATA_ROW_ID));
2517  rec_print_new(stderr, index_rec, offsets);
2518  putc('\n', stderr);
2519  ut_error;
2520  }
2521 
2522  ut_memcpy(prebuilt->row_id, data, len);
2523 }
2524 
2525 /**************************************************************/
2528 static
2529 void
2530 row_sel_field_store_in_mysql_format(
2531 /*================================*/
2532  byte* dest,
2538  const mysql_row_templ_t* templ,
2543  const byte* data,
2544  ulint len)
2545 {
2546  byte* ptr;
2547 
2548  ut_ad(len != UNIV_SQL_NULL);
2549  UNIV_MEM_ASSERT_RW(data, len);
2550 
2551  switch (templ->type) {
2552  const byte* field_end;
2553  byte* pad;
2554  case DATA_INT:
2555  /* Convert integer data from Innobase to a little-endian
2556  format, sign bit restored to normal */
2557 
2558  ptr = dest + len;
2559 
2560  for (;;) {
2561  ptr--;
2562  *ptr = *data;
2563  if (ptr == dest) {
2564  break;
2565  }
2566  data++;
2567  }
2568 
2569  if (!templ->is_unsigned) {
2570  dest[len - 1] = (byte) (dest[len - 1] ^ 128);
2571  }
2572 
2573  ut_ad(templ->mysql_col_len == len);
2574  break;
2575 
2576  case DATA_VARCHAR:
2577  case DATA_VARMYSQL:
2578  case DATA_BINARY:
2579  field_end = dest + templ->mysql_col_len;
2580 
2581  if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
2582  /* This is a >= 5.0.3 type true VARCHAR. Store the
2583  length of the data to the first byte or the first
2584  two bytes of dest. */
2585 
2587  dest, len, templ->mysql_length_bytes);
2588  }
2589 
2590  /* Copy the actual data */
2591  ut_memcpy(dest, data, len);
2592 
2593  /* Pad with trailing spaces. We pad with spaces also the
2594  unused end of a >= 5.0.3 true VARCHAR column, just in case
2595  MySQL expects its contents to be deterministic. */
2596 
2597  pad = dest + len;
2598 
2599  ut_ad(templ->mbminlen <= templ->mbmaxlen);
2600 
2601  /* We treat some Unicode charset strings specially. */
2602  switch (templ->mbminlen) {
2603  case 4:
2604  /* InnoDB should never have stripped partial
2605  UTF-32 characters. */
2606  ut_a(!(len & 3));
2607  break;
2608  case 2:
2609  /* A space char is two bytes,
2610  0x0020 in UCS2 and UTF-16 */
2611 
2612  if (UNIV_UNLIKELY(len & 1)) {
2613  /* A 0x20 has been stripped from the column.
2614  Pad it back. */
2615 
2616  if (pad < field_end) {
2617  *pad++ = 0x20;
2618  }
2619  }
2620  }
2621 
2622  row_mysql_pad_col(templ->mbminlen, pad, field_end - pad);
2623  break;
2624 
2625  case DATA_BLOB:
2626  /* Store a pointer to the BLOB buffer to dest: the BLOB was
2627  already copied to the buffer in row_sel_store_mysql_rec */
2628 
2629  row_mysql_store_blob_ref(dest, templ->mysql_col_len, data,
2630  len);
2631  break;
2632 
2633  case DATA_MYSQL:
2634  memcpy(dest, data, len);
2635 
2636  ut_ad(templ->mysql_col_len >= len);
2637  ut_ad(templ->mbmaxlen >= templ->mbminlen);
2638 
2639  ut_ad(templ->mbmaxlen > templ->mbminlen
2640  || templ->mysql_col_len == len);
2641  /* The following assertion would fail for old tables
2642  containing UTF-8 ENUM columns due to Bug #9526. */
2643  ut_ad(!templ->mbmaxlen
2644  || !(templ->mysql_col_len % templ->mbmaxlen));
2645  ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len);
2646 
2647  if (templ->mbminlen == 1 && templ->mbmaxlen != 1) {
2648  /* Pad with spaces. This undoes the stripping
2649  done in row0mysql.c, function
2650  row_mysql_store_col_in_innobase_format(). */
2651 
2652  memset(dest + len, 0x20, templ->mysql_col_len - len);
2653  }
2654  break;
2655 
2656  default:
2657 #ifdef UNIV_DEBUG
2658  case DATA_SYS_CHILD:
2659  case DATA_SYS:
2660  /* These column types should never be shipped to MySQL. */
2661  ut_ad(0);
2662 
2663  case DATA_CHAR:
2664  case DATA_FIXBINARY:
2665  case DATA_FLOAT:
2666  case DATA_DOUBLE:
2667  case DATA_DECIMAL:
2668  /* Above are the valid column types for MySQL data. */
2669 #endif /* UNIV_DEBUG */
2670  ut_ad(templ->mysql_col_len == len);
2671  memcpy(dest, data, len);
2672  }
2673 }
2674 
2675 /**************************************************************/
2681 static
2682 #ifdef __GNUC__
2683  __attribute__((warn_unused_result))
2684 #endif
2685 ibool
2686 row_sel_store_mysql_rec(
2687 /*====================*/
2688  byte* mysql_rec,
2689  row_prebuilt_t* prebuilt,
2690  const rec_t* rec,
2694  ibool rec_clust,
2697  const ulint* offsets)
2699 {
2700  mem_heap_t* extern_field_heap = NULL;
2701  mem_heap_t* heap;
2702  ulint i;
2703 
2704  ut_ad(prebuilt->mysql_template);
2705  ut_ad(prebuilt->default_rec);
2706  ut_ad(rec_offs_validate(rec, NULL, offsets));
2707  ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
2708 
2709  if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) {
2710  mem_heap_free(prebuilt->blob_heap);
2711  prebuilt->blob_heap = NULL;
2712  }
2713 
2714  for (i = 0; i < prebuilt->n_template ; i++) {
2715 
2716  const mysql_row_templ_t*templ = prebuilt->mysql_template + i;
2717  const byte* data;
2718  ulint len;
2719  ulint field_no;
2720 
2721  field_no = rec_clust
2722  ? templ->clust_rec_field_no : templ->rec_field_no;
2723 
2724  if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, field_no))) {
2725 
2726  /* Copy an externally stored field to the temporary
2727  heap */
2728 
2729  ut_a(!prebuilt->trx->has_search_latch);
2730 
2731  if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) {
2732  if (prebuilt->blob_heap == NULL) {
2733  prebuilt->blob_heap = mem_heap_create(
2734  UNIV_PAGE_SIZE);
2735  }
2736 
2737  heap = prebuilt->blob_heap;
2738  } else {
2739  extern_field_heap
2740  = mem_heap_create(UNIV_PAGE_SIZE);
2741 
2742  heap = extern_field_heap;
2743  }
2744 
2745  /* NOTE: if we are retrieving a big BLOB, we may
2746  already run out of memory in the next call, which
2747  causes an assert */
2748 
2750  rec, offsets,
2751  dict_table_zip_size(prebuilt->table),
2752  field_no, &len, heap);
2753 
2754  if (UNIV_UNLIKELY(!data)) {
2755  /* The externally stored field
2756  was not written yet. This
2757  record should only be seen by
2758  recv_recovery_rollback_active()
2759  or any TRX_ISO_READ_UNCOMMITTED
2760  transactions. */
2761 
2762  if (extern_field_heap) {
2763  mem_heap_free(extern_field_heap);
2764  }
2765 
2766  return(FALSE);
2767  }
2768 
2769  if (UNIV_UNLIKELY(!data)) {
2770  /* The externally stored field
2771  was not written yet. This
2772  record should only be seen by
2773  recv_recovery_rollback_active()
2774  or any TRX_ISO_READ_UNCOMMITTED
2775  transactions. */
2776 
2777  if (extern_field_heap) {
2778  mem_heap_free(extern_field_heap);
2779  }
2780 
2781  return(FALSE);
2782  }
2783 
2784  ut_a(len != UNIV_SQL_NULL);
2785  } else {
2786  /* Field is stored in the row. */
2787 
2788  data = rec_get_nth_field(rec, offsets, field_no, &len);
2789 
2790  if (UNIV_UNLIKELY(templ->type == DATA_BLOB)
2791  && len != UNIV_SQL_NULL) {
2792 
2793  /* It is a BLOB field locally stored in the
2794  InnoDB record: we MUST copy its contents to
2795  prebuilt->blob_heap here because later code
2796  assumes all BLOB values have been copied to a
2797  safe place. */
2798 
2799  if (prebuilt->blob_heap == NULL) {
2800  prebuilt->blob_heap = mem_heap_create(
2801  UNIV_PAGE_SIZE);
2802  }
2803 
2804  data = static_cast<byte *>(memcpy(mem_heap_alloc(
2805  prebuilt->blob_heap, len),
2806  data, len));
2807  }
2808  }
2809 
2810  if (len != UNIV_SQL_NULL) {
2811  row_sel_field_store_in_mysql_format(
2812  mysql_rec + templ->mysql_col_offset,
2813  templ, data, len);
2814 
2815  /* Cleanup */
2816  if (extern_field_heap) {
2817  mem_heap_free(extern_field_heap);
2818  extern_field_heap = NULL;
2819  }
2820 
2821  if (templ->mysql_null_bit_mask) {
2822  /* It is a nullable column with a non-NULL
2823  value */
2824  mysql_rec[templ->mysql_null_byte_offset]
2825  &= ~(byte) templ->mysql_null_bit_mask;
2826  }
2827  } else {
2828  /* MySQL assumes that the field for an SQL
2829  NULL value is set to the default value. */
2830 
2831  UNIV_MEM_ASSERT_RW(prebuilt->default_rec
2832  + templ->mysql_col_offset,
2833  templ->mysql_col_len);
2834  mysql_rec[templ->mysql_null_byte_offset]
2835  |= (byte) templ->mysql_null_bit_mask;
2836  memcpy(mysql_rec + templ->mysql_col_offset,
2837  (const byte*) prebuilt->default_rec
2838  + templ->mysql_col_offset,
2839  templ->mysql_col_len);
2840  }
2841  }
2842 
2843  return(TRUE);
2844 }
2845 
2846 /*********************************************************************/
2849 static
2850 ulint
2851 row_sel_build_prev_vers_for_mysql(
2852 /*==============================*/
2853  read_view_t* read_view,
2854  dict_index_t* clust_index,
2855  row_prebuilt_t* prebuilt,
2856  const rec_t* rec,
2857  ulint** offsets,
2859  mem_heap_t** offset_heap,
2861  rec_t** old_vers,
2865  mtr_t* mtr)
2866 {
2867  ulint err;
2868 
2869  if (prebuilt->old_vers_heap) {
2870  mem_heap_empty(prebuilt->old_vers_heap);
2871  } else {
2872  prebuilt->old_vers_heap = mem_heap_create(200);
2873  }
2874 
2876  rec, mtr, clust_index, offsets, read_view, offset_heap,
2877  prebuilt->old_vers_heap, old_vers);
2878  return(err);
2879 }
2880 
2881 /*********************************************************************/
2886 static
2887 enum db_err
2888 row_sel_get_clust_rec_for_mysql(
2889 /*============================*/
2890  row_prebuilt_t* prebuilt,
2891  dict_index_t* sec_index,
2892  const rec_t* rec,
2896  que_thr_t* thr,
2897  const rec_t** out_rec,
2901  ulint** offsets,
2905  mem_heap_t** offset_heap,
2907  mtr_t* mtr)
2910 {
2911  dict_index_t* clust_index;
2912  const rec_t* clust_rec;
2913  rec_t* old_vers;
2914  enum db_err err;
2915  trx_t* trx;
2916 
2917  *out_rec = NULL;
2918  trx = thr_get_trx(thr);
2919 
2920  row_build_row_ref_in_tuple(prebuilt->clust_ref, rec,
2921  sec_index, *offsets, trx);
2922 
2923  clust_index = dict_table_get_first_index(sec_index->table);
2924 
2925  btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref,
2926  PAGE_CUR_LE, BTR_SEARCH_LEAF,
2927  prebuilt->clust_pcur, 0, mtr);
2928 
2929  clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur);
2930 
2931  prebuilt->clust_pcur->trx_if_known = trx;
2932 
2933  /* Note: only if the search ends up on a non-infimum record is the
2934  low_match value the real match to the search tuple */
2935 
2936  if (!page_rec_is_user_rec(clust_rec)
2937  || btr_pcur_get_low_match(prebuilt->clust_pcur)
2938  < dict_index_get_n_unique(clust_index)) {
2939 
2940  /* In a rare case it is possible that no clust rec is found
2941  for a delete-marked secondary index record: if in row0umod.c
2942  in row_undo_mod_remove_clust_low() we have already removed
2943  the clust rec, while purge is still cleaning and removing
2944  secondary index records associated with earlier versions of
2945  the clustered index record. In that case we know that the
2946  clustered index record did not exist in the read view of
2947  trx. */
2948 
2949  if (!rec_get_deleted_flag(rec,
2950  dict_table_is_comp(sec_index->table))
2951  || prebuilt->select_lock_type != LOCK_NONE) {
2952  ut_print_timestamp(stderr);
2953  fputs(" InnoDB: error clustered record"
2954  " for sec rec not found\n"
2955  "InnoDB: ", stderr);
2956  dict_index_name_print(stderr, trx, sec_index);
2957  fputs("\n"
2958  "InnoDB: sec index record ", stderr);
2959  rec_print(stderr, rec, sec_index);
2960  fputs("\n"
2961  "InnoDB: clust index record ", stderr);
2962  rec_print(stderr, clust_rec, clust_index);
2963  putc('\n', stderr);
2964  trx_print(stderr, trx, 600);
2965 
2966  fputs("\n"
2967  "InnoDB: Submit a detailed bug report"
2968  " to http://bugs.mysql.com\n", stderr);
2969  }
2970 
2971  clust_rec = NULL;
2972 
2973  err = DB_SUCCESS;
2974  goto func_exit;
2975  }
2976 
2977  *offsets = rec_get_offsets(clust_rec, clust_index, *offsets,
2978  ULINT_UNDEFINED, offset_heap);
2979 
2980  if (prebuilt->select_lock_type != LOCK_NONE) {
2981  /* Try to place a lock on the index record; we are searching
2982  the clust rec with a unique condition, hence
2983  we set a LOCK_REC_NOT_GAP type lock */
2984 
2986  0, btr_pcur_get_block(prebuilt->clust_pcur),
2987  clust_rec, clust_index, *offsets,
2988  static_cast<lock_mode>(prebuilt->select_lock_type),
2989  LOCK_REC_NOT_GAP, thr);
2990  switch (err) {
2991  case DB_SUCCESS:
2992  case DB_SUCCESS_LOCKED_REC:
2993  break;
2994  default:
2995  goto err_exit;
2996  }
2997  } else {
2998  /* This is a non-locking consistent read: if necessary, fetch
2999  a previous version of the record */
3000 
3001  old_vers = NULL;
3002 
3003  /* If the isolation level allows reading of uncommitted data,
3004  then we never look for an earlier version */
3005 
3006  if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
3008  clust_rec, clust_index, *offsets,
3009  trx->read_view)) {
3010 
3011  /* The following call returns 'offsets' associated with
3012  'old_vers' */
3013  err = static_cast<db_err>(row_sel_build_prev_vers_for_mysql(
3014  trx->read_view, clust_index, prebuilt,
3015  clust_rec, offsets, offset_heap, &old_vers,
3016  mtr));
3017 
3018  if (err != DB_SUCCESS || old_vers == NULL) {
3019 
3020  goto err_exit;
3021  }
3022 
3023  clust_rec = old_vers;
3024  }
3025 
3026  /* If we had to go to an earlier version of row or the
3027  secondary index record is delete marked, then it may be that
3028  the secondary index record corresponding to clust_rec
3029  (or old_vers) is not rec; in that case we must ignore
3030  such row because in our snapshot rec would not have existed.
3031  Remember that from rec we cannot see directly which transaction
3032  id corresponds to it: we have to go to the clustered index
3033  record. A query where we want to fetch all rows where
3034  the secondary index value is in some interval would return
3035  a wrong result if we would not drop rows which we come to
3036  visit through secondary index records that would not really
3037  exist in our snapshot. */
3038 
3039  if (clust_rec
3040  && (old_vers
3041  || trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
3043  sec_index->table)))
3044  && !row_sel_sec_rec_is_for_clust_rec(
3045  rec, sec_index, clust_rec, clust_index)) {
3046  clust_rec = NULL;
3047 #ifdef UNIV_SEARCH_DEBUG
3048  } else {
3049  ut_a(clust_rec == NULL
3050  || row_sel_sec_rec_is_for_clust_rec(
3051  rec, sec_index, clust_rec, clust_index));
3052 #endif
3053  }
3054 
3055  err = DB_SUCCESS;
3056  }
3057 
3058 func_exit:
3059  *out_rec = clust_rec;
3060 
3061  if (prebuilt->select_lock_type != LOCK_NONE) {
3062  /* We may use the cursor in update or in unlock_row():
3063  store its position */
3064 
3065  btr_pcur_store_position(prebuilt->clust_pcur, mtr);
3066  }
3067 
3068 err_exit:
3069  return(err);
3070 }
3071 
3072 /********************************************************************/
3078 static
3079 ibool
3080 sel_restore_position_for_mysql(
3081 /*===========================*/
3082  ibool* same_user_rec,
3086  ulint latch_mode,
3088  btr_pcur_t* pcur,
3090  ibool moves_up,
3092  mtr_t* mtr)
3094 {
3095  ibool success;
3096  ulint relative_position;
3097 
3098  relative_position = pcur->rel_pos;
3099 
3100  success = btr_pcur_restore_position(latch_mode, pcur, mtr);
3101 
3102  *same_user_rec = success;
3103 
3104  if (relative_position == BTR_PCUR_ON) {
3105  if (success) {
3106  return(FALSE);
3107  }
3108 
3109  if (moves_up) {
3110  btr_pcur_move_to_next(pcur, mtr);
3111  }
3112 
3113  return(TRUE);
3114  }
3115 
3116  if (relative_position == BTR_PCUR_AFTER
3117  || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) {
3118 
3119  if (moves_up) {
3120  return(TRUE);
3121  }
3122 
3123  if (btr_pcur_is_on_user_rec(pcur)) {
3124  btr_pcur_move_to_prev(pcur, mtr);
3125  }
3126 
3127  return(TRUE);
3128  }
3129 
3130  ut_ad(relative_position == BTR_PCUR_BEFORE
3131  || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE);
3132 
3133  if (moves_up && btr_pcur_is_on_user_rec(pcur)) {
3134  btr_pcur_move_to_next(pcur, mtr);
3135  }
3136 
3137  return(TRUE);
3138 }
3139 
3140 /********************************************************************/
3142 UNIV_INLINE
3143 void
3144 row_sel_pop_cached_row_for_mysql(
3145 /*=============================*/
3146  byte* buf,
3148  row_prebuilt_t* prebuilt)
3149 {
3150  ulint i;
3151  const mysql_row_templ_t*templ;
3152  byte* cached_rec;
3153  ut_ad(prebuilt->n_fetch_cached > 0);
3154  ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len);
3155 
3156  if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) {
3157  /* Copy cache record field by field, don't touch fields that
3158  are not covered by current key */
3159  cached_rec = prebuilt->fetch_cache[
3160  prebuilt->fetch_cache_first];
3161 
3162  for (i = 0; i < prebuilt->n_template; i++) {
3163  templ = prebuilt->mysql_template + i;
3164 #if 0 /* Some of the cached_rec may legitimately be uninitialized. */
3165  UNIV_MEM_ASSERT_RW(cached_rec
3166  + templ->mysql_col_offset,
3167  templ->mysql_col_len);
3168 #endif
3169  ut_memcpy(buf + templ->mysql_col_offset,
3170  cached_rec + templ->mysql_col_offset,
3171  templ->mysql_col_len);
3172  /* Copy NULL bit of the current field from cached_rec
3173  to buf */
3174  if (templ->mysql_null_bit_mask) {
3175  buf[templ->mysql_null_byte_offset]
3176  ^= (buf[templ->mysql_null_byte_offset]
3177  ^ cached_rec[templ->mysql_null_byte_offset])
3178  & (byte)templ->mysql_null_bit_mask;
3179  }
3180  }
3181  }
3182  else {
3183 #if 0 /* Some of the cached_rec may legitimately be uninitialized. */
3184  UNIV_MEM_ASSERT_RW(prebuilt->fetch_cache
3185  [prebuilt->fetch_cache_first],
3186  prebuilt->mysql_prefix_len);
3187 #endif
3188  ut_memcpy(buf,
3189  prebuilt->fetch_cache[prebuilt->fetch_cache_first],
3190  prebuilt->mysql_prefix_len);
3191  }
3192  prebuilt->n_fetch_cached--;
3193  prebuilt->fetch_cache_first++;
3194 
3195  if (prebuilt->n_fetch_cached == 0) {
3196  prebuilt->fetch_cache_first = 0;
3197  }
3198 }
3199 
3200 /********************************************************************/
3203 UNIV_INLINE
3204 #ifdef __GNUC__
3205 __attribute__((warn_unused_result))
3206 #endif
3207 ibool
3208 row_sel_push_cache_row_for_mysql(
3209 /*=============================*/
3210  row_prebuilt_t* prebuilt,
3211  const rec_t* rec,
3215  ibool rec_clust,
3218  const ulint* offsets)
3219 {
3220  byte* buf;
3221  ulint i;
3222 
3223  ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE);
3224  ut_ad(rec_offs_validate(rec, NULL, offsets));
3225  ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets)));
3226  ut_a(!prebuilt->templ_contains_blob);
3227 
3228  if (prebuilt->fetch_cache[0] == NULL) {
3229  /* Allocate memory for the fetch cache */
3230 
3231  for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) {
3232 
3233  /* A user has reported memory corruption in these
3234  buffers in Linux. Put magic numbers there to help
3235  to track a possible bug. */
3236 
3237  buf = static_cast<byte *>(mem_alloc(prebuilt->mysql_row_len + 8));
3238 
3239  prebuilt->fetch_cache[i] = buf + 4;
3240 
3241  mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N);
3242  mach_write_to_4(buf + 4 + prebuilt->mysql_row_len,
3243  ROW_PREBUILT_FETCH_MAGIC_N);
3244  }
3245  }
3246 
3247  ut_ad(prebuilt->fetch_cache_first == 0);
3248  UNIV_MEM_INVALID(prebuilt->fetch_cache[prebuilt->n_fetch_cached],
3249  prebuilt->mysql_row_len);
3250 
3251  if (UNIV_UNLIKELY(!row_sel_store_mysql_rec(
3252  prebuilt->fetch_cache[
3253  prebuilt->n_fetch_cached],
3254  prebuilt, rec, rec_clust, offsets))) {
3255  return(FALSE);
3256  }
3257 
3258  prebuilt->n_fetch_cached++;
3259  return(TRUE);
3260 }
3261 
3262 /*********************************************************************/
3268 static
3269 ulint
3270 row_sel_try_search_shortcut_for_mysql(
3271 /*==================================*/
3272  const rec_t** out_rec,
3273  row_prebuilt_t* prebuilt,
3274  ulint** offsets,
3275  mem_heap_t** heap,
3276  mtr_t* mtr)
3277 {
3278  dict_index_t* index = prebuilt->index;
3279  const dtuple_t* search_tuple = prebuilt->search_tuple;
3280  btr_pcur_t* pcur = prebuilt->pcur;
3281  trx_t* trx = prebuilt->trx;
3282  const rec_t* rec;
3283 
3284  ut_ad(dict_index_is_clust(index));
3285  ut_ad(!prebuilt->templ_contains_blob);
3286 
3287 #ifndef UNIV_SEARCH_DEBUG
3288  btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3289  BTR_SEARCH_LEAF, pcur,
3290  RW_S_LATCH,
3291  mtr);
3292 #else /* UNIV_SEARCH_DEBUG */
3293  btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
3294  BTR_SEARCH_LEAF, pcur,
3295  0,
3296  mtr);
3297 #endif /* UNIV_SEARCH_DEBUG */
3298  rec = btr_pcur_get_rec(pcur);
3299 
3300  if (!page_rec_is_user_rec(rec)) {
3301 
3302  return(SEL_RETRY);
3303  }
3304 
3305  /* As the cursor is now placed on a user record after a search with
3306  the mode PAGE_CUR_GE, the up_match field in the cursor tells how many
3307  fields in the user record matched to the search tuple */
3308 
3309  if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) {
3310 
3311  return(SEL_EXHAUSTED);
3312  }
3313 
3314  /* This is a non-locking consistent read: if necessary, fetch
3315  a previous version of the record */
3316 
3317  *offsets = rec_get_offsets(rec, index, *offsets,
3318  ULINT_UNDEFINED, heap);
3319 
3320  if (!lock_clust_rec_cons_read_sees(rec, index,
3321  *offsets, trx->read_view)) {
3322 
3323  return(SEL_RETRY);
3324  }
3325 
3326  if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) {
3327 
3328  return(SEL_EXHAUSTED);
3329  }
3330 
3331  *out_rec = rec;
3332 
3333  return(SEL_FOUND);
3334 }
3335 
3336 /********************************************************************/
3344 UNIV_INTERN
3345 ulint
3347 /*=================*/
3348  byte* buf,
3350  ulint mode,
3351  row_prebuilt_t* prebuilt,
3358  ulint match_mode,
3360  ulint direction)
3365 {
3366  dict_index_t* index = prebuilt->index;
3367  ibool comp = dict_table_is_comp(index->table);
3368  const dtuple_t* search_tuple = prebuilt->search_tuple;
3369  btr_pcur_t* pcur = prebuilt->pcur;
3370  trx_t* trx = prebuilt->trx;
3371  dict_index_t* clust_index;
3372  que_thr_t* thr;
3373  const rec_t* rec;
3374  const rec_t* result_rec;
3375  const rec_t* clust_rec;
3376  ulint err = DB_SUCCESS;
3377  ibool unique_search = FALSE;
3378  ibool unique_search_from_clust_index = FALSE;
3379  ibool mtr_has_extra_clust_latch = FALSE;
3380  ibool moves_up = FALSE;
3381  ibool set_also_gap_locks = TRUE;
3382  /* if the query is a plain locking SELECT, and the isolation level
3383  is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */
3384  ibool did_semi_consistent_read = FALSE;
3385  /* if the returned record was locked and we did a semi-consistent
3386  read (fetch the newest committed version), then this is set to
3387  TRUE */
3388 #ifdef UNIV_SEARCH_DEBUG
3389  ulint cnt = 0;
3390 #endif /* UNIV_SEARCH_DEBUG */
3391  ulint next_offs;
3392  ibool same_user_rec;
3393  mtr_t mtr;
3394  mem_heap_t* heap = NULL;
3395  ulint offsets_[REC_OFFS_NORMAL_SIZE];
3396  ulint* offsets = offsets_;
3397  ibool table_lock_waited = FALSE;
3398 
3399  rec_offs_init(offsets_);
3400 
3401  ut_ad(index && pcur && search_tuple);
3402  ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
3403 
3404  if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) {
3405  ut_print_timestamp(stderr);
3406  fprintf(stderr, " InnoDB: Error:\n"
3407  "InnoDB: MySQL is trying to use a table handle"
3408  " but the .ibd file for\n"
3409  "InnoDB: table %s does not exist.\n"
3410  "InnoDB: Have you deleted the .ibd file"
3411  " from the database directory under\n"
3412  "InnoDB: the MySQL datadir, or have you used"
3413  " DISCARD TABLESPACE?\n"
3414  "InnoDB: Look from\n"
3415  "InnoDB: " REFMAN "innodb-troubleshooting.html\n"
3416  "InnoDB: how you can resolve the problem.\n",
3417  prebuilt->table->name);
3418 
3419  return(DB_ERROR);
3420  }
3421 
3422  if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
3423 
3424  return(DB_MISSING_HISTORY);
3425  }
3426 
3427  if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) {
3428  fprintf(stderr,
3429  "InnoDB: Error: trying to free a corrupt\n"
3430  "InnoDB: table handle. Magic n %lu, table name ",
3431  (ulong) prebuilt->magic_n);
3432  ut_print_name(stderr, trx, TRUE, prebuilt->table->name);
3433  putc('\n', stderr);
3434 
3435  mem_analyze_corruption(prebuilt);
3436 
3437  ut_error;
3438  }
3439 
3440 #if 0
3441  fprintf(stderr, "Match mode %lu\n search tuple ",
3442  (ulong) match_mode);
3443  dtuple_print(search_tuple);
3444  fprintf(stderr, "N tables locked %lu\n",
3445  (ulong) trx->mysql_n_tables_locked);
3446 #endif
3447  /*-------------------------------------------------------------*/
3448  /* PHASE 0: Release a possible s-latch we are holding on the
3449  adaptive hash index latch if there is someone waiting behind */
3450 
3451  if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
3452  && trx->has_search_latch) {
3453 
3454  /* There is an x-latch request on the adaptive hash index:
3455  release the s-latch to reduce starvation and wait for
3456  BTR_SEA_TIMEOUT rounds before trying to keep it again over
3457  calls from MySQL */
3458 
3459  rw_lock_s_unlock(&btr_search_latch);
3460  trx->has_search_latch = FALSE;
3461 
3462  trx->search_latch_timeout = BTR_SEA_TIMEOUT;
3463  }
3464 
3465  /* Reset the new record lock info if srv_locks_unsafe_for_binlog
3466  is set or session is using a READ COMMITED isolation level. Then
3467  we are able to remove the record locks set here on an individual
3468  row. */
3469  prebuilt->new_rec_locks = 0;
3470 
3471  /*-------------------------------------------------------------*/
3472  /* PHASE 1: Try to pop the row from the prefetch cache */
3473 
3474  if (UNIV_UNLIKELY(direction == 0)) {
3475  trx->op_info = "starting index read";
3476 
3477  prebuilt->n_rows_fetched = 0;
3478  prebuilt->n_fetch_cached = 0;
3479  prebuilt->fetch_cache_first = 0;
3480 
3481  if (prebuilt->sel_graph == NULL) {
3482  /* Build a dummy select query graph */
3483  row_prebuild_sel_graph(prebuilt);
3484  }
3485  } else {
3486  trx->op_info = "fetching rows";
3487 
3488  if (prebuilt->n_rows_fetched == 0) {
3489  prebuilt->fetch_direction = direction;
3490  }
3491 
3492  if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) {
3493  if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) {
3494  ut_error;
3495  /* TODO: scrollable cursor: restore cursor to
3496  the place of the latest returned row,
3497  or better: prevent caching for a scroll
3498  cursor! */
3499  }
3500 
3501  prebuilt->n_rows_fetched = 0;
3502  prebuilt->n_fetch_cached = 0;
3503  prebuilt->fetch_cache_first = 0;
3504 
3505  } else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) {
3506  row_sel_pop_cached_row_for_mysql(buf, prebuilt);
3507 
3508  prebuilt->n_rows_fetched++;
3509 
3510  srv_n_rows_read++;
3511  err = DB_SUCCESS;
3512  goto func_exit;
3513  }
3514 
3515  if (prebuilt->fetch_cache_first > 0
3516  && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) {
3517 
3518  /* The previous returned row was popped from the fetch
3519  cache, but the cache was not full at the time of the
3520  popping: no more rows can exist in the result set */
3521 
3522  err = DB_RECORD_NOT_FOUND;
3523  goto func_exit;
3524  }
3525 
3526  prebuilt->n_rows_fetched++;
3527 
3528  if (prebuilt->n_rows_fetched > 1000000000) {
3529  /* Prevent wrap-over */
3530  prebuilt->n_rows_fetched = 500000000;
3531  }
3532 
3533  mode = pcur->search_mode;
3534  }
3535 
3536  /* In a search where at most one record in the index may match, we
3537  can use a LOCK_REC_NOT_GAP type record lock when locking a
3538  non-delete-marked matching record.
3539 
3540  Note that in a unique secondary index there may be different
3541  delete-marked versions of a record where only the primary key
3542  values differ: thus in a secondary index we must use next-key
3543  locks when locking delete-marked records. */
3544 
3545  if (match_mode == ROW_SEL_EXACT
3546  && dict_index_is_unique(index)
3547  && dtuple_get_n_fields(search_tuple)
3548  == dict_index_get_n_unique(index)
3549  && (dict_index_is_clust(index)
3550  || !dtuple_contains_null(search_tuple))) {
3551 
3552  /* Note above that a UNIQUE secondary index can contain many
3553  rows with the same key value if one of the columns is the SQL
3554  null. A clustered index under MySQL can never contain null
3555  columns because we demand that all the columns in primary key
3556  are non-null. */
3557 
3558  unique_search = TRUE;
3559 
3560  /* Even if the condition is unique, MySQL seems to try to
3561  retrieve also a second row if a primary key contains more than
3562  1 column.*/
3563 
3564  if (UNIV_UNLIKELY(direction != 0)) {
3565 
3566  err = DB_RECORD_NOT_FOUND;
3567  goto func_exit;
3568  }
3569  }
3570 
3571  mtr_start(&mtr);
3572 
3573  /*-------------------------------------------------------------*/
3574  /* PHASE 2: Try fast adaptive hash index search if possible */
3575 
3576  /* Next test if this is the special case where we can use the fast
3577  adaptive hash index to try the search. Since we must release the
3578  search system latch when we retrieve an externally stored field, we
3579  cannot use the adaptive hash index in a search in the case the row
3580  may be long and there may be externally stored fields */
3581 
3582  if (UNIV_UNLIKELY(direction == 0)
3583  && unique_search
3584  && dict_index_is_clust(index)
3585  && !prebuilt->templ_contains_blob
3586  && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) {
3587 
3588  mode = PAGE_CUR_GE;
3589 
3590  unique_search_from_clust_index = TRUE;
3591 
3592  if (trx->mysql_n_tables_locked == 0
3593  && prebuilt->select_lock_type == LOCK_NONE
3594  && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED
3595  && trx->read_view) {
3596 
3597  /* This is a SELECT query done as a consistent read,
3598  and the read view has already been allocated:
3599  let us try a search shortcut through the hash
3600  index.
3601  NOTE that we must also test that
3602  mysql_n_tables_locked == 0, because this might
3603  also be INSERT INTO ... SELECT ... or
3604  CREATE TABLE ... SELECT ... . Our algorithm is
3605  NOT prepared to inserts interleaved with the SELECT,
3606  and if we try that, we can deadlock on the adaptive
3607  hash index semaphore! */
3608 
3609 #ifndef UNIV_SEARCH_DEBUG
3610  if (!trx->has_search_latch) {
3612  trx->has_search_latch = TRUE;
3613  }
3614 #endif
3615  switch (row_sel_try_search_shortcut_for_mysql(
3616  &rec, prebuilt, &offsets, &heap,
3617  &mtr)) {
3618  case SEL_FOUND:
3619 #ifdef UNIV_SEARCH_DEBUG
3620  ut_a(0 == cmp_dtuple_rec(search_tuple,
3621  rec, offsets));
3622 #endif
3623  /* At this point, rec is protected by
3624  a page latch that was acquired by
3625  row_sel_try_search_shortcut_for_mysql().
3626  The latch will not be released until
3627  mtr_commit(&mtr). */
3628  ut_ad(!rec_get_deleted_flag(rec, comp));
3629 
3630  if (!row_sel_store_mysql_rec(buf, prebuilt,
3631  rec, FALSE,
3632  offsets)) {
3633  /* Only fresh inserts may contain
3634  incomplete externally stored
3635  columns. Pretend that such
3636  records do not exist. Such
3637  records may only be accessed
3638  at the READ UNCOMMITTED
3639  isolation level or when
3640  rolling back a recovered
3641  transaction. Rollback happens
3642  at a lower level, not here. */
3643  ut_a(trx->isolation_level
3644  == TRX_ISO_READ_UNCOMMITTED);
3645 
3646  /* Proceed as in case SEL_RETRY. */
3647  break;
3648  }
3649 
3650  mtr_commit(&mtr);
3651 
3652  /* ut_print_name(stderr, index->name);
3653  fputs(" shortcut\n", stderr); */
3654 
3655  srv_n_rows_read++;
3656 
3657  err = DB_SUCCESS;
3658  goto release_search_latch_if_needed;
3659 
3660  case SEL_EXHAUSTED:
3661  mtr_commit(&mtr);
3662 
3663  /* ut_print_name(stderr, index->name);
3664  fputs(" record not found 2\n", stderr); */
3665 
3666  err = DB_RECORD_NOT_FOUND;
3667 release_search_latch_if_needed:
3668  if (trx->search_latch_timeout > 0
3669  && trx->has_search_latch) {
3670 
3671  trx->search_latch_timeout--;
3672 
3673  rw_lock_s_unlock(&btr_search_latch);
3674  trx->has_search_latch = FALSE;
3675  }
3676 
3677  /* NOTE that we do NOT store the cursor
3678  position */
3679  goto func_exit;
3680 
3681  case SEL_RETRY:
3682  break;
3683 
3684  default:
3685  ut_ad(0);
3686  }
3687 
3688  mtr_commit(&mtr);
3689  mtr_start(&mtr);
3690  }
3691  }
3692 
3693  /*-------------------------------------------------------------*/
3694  /* PHASE 3: Open or restore index cursor position */
3695 
3696  if (trx->has_search_latch) {
3697  rw_lock_s_unlock(&btr_search_latch);
3698  trx->has_search_latch = FALSE;
3699  }
3700 
3701  ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE);
3702  ut_ad(trx->conc_state == TRX_NOT_STARTED
3703  || trx->conc_state == TRX_ACTIVE);
3704  ut_ad(prebuilt->sql_stat_start
3705  || prebuilt->select_lock_type != LOCK_NONE
3706  || trx->read_view);
3707 
3708  ut_ad(prebuilt->sql_stat_start || trx->conc_state == TRX_ACTIVE);
3709  ut_ad(trx->conc_state == TRX_NOT_STARTED
3710  || trx->conc_state == TRX_ACTIVE);
3711  ut_ad(prebuilt->sql_stat_start
3712  || prebuilt->select_lock_type != LOCK_NONE
3713  || trx->read_view);
3714 
3716 
3717  if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
3718  && prebuilt->select_lock_type != LOCK_NONE
3719  && trx->mysql_thd != NULL
3720  && thd_is_select(trx->mysql_thd)) {
3721  /* It is a plain locking SELECT and the isolation
3722  level is low: do not lock gaps */
3723 
3724  set_also_gap_locks = FALSE;
3725  }
3726 
3727  /* Note that if the search mode was GE or G, then the cursor
3728  naturally moves upward (in fetch next) in alphabetical order,
3729  otherwise downward */
3730 
3731  if (UNIV_UNLIKELY(direction == 0)) {
3732  if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) {
3733  moves_up = TRUE;
3734  }
3735  } else if (direction == ROW_SEL_NEXT) {
3736  moves_up = TRUE;
3737  }
3738 
3739  thr = que_fork_get_first_thr(prebuilt->sel_graph);
3740 
3742 
3743  clust_index = dict_table_get_first_index(index->table);
3744 
3745  /* Do some start-of-statement preparations */
3746 
3747  if (!prebuilt->sql_stat_start) {
3748  /* No need to set an intention lock or assign a read view */
3749 
3750  if (trx->read_view == NULL
3751  && prebuilt->select_lock_type == LOCK_NONE) {
3752 
3753  fputs("InnoDB: Error: MySQL is trying to"
3754  " perform a consistent read\n"
3755  "InnoDB: but the read view is not assigned!\n",
3756  stderr);
3757  trx_print(stderr, trx, 600);
3758  fputc('\n', stderr);
3759  ut_error;
3760  }
3761  } else if (prebuilt->select_lock_type == LOCK_NONE) {
3762  /* This is a consistent read */
3763  /* Assign a read view for the query */
3764 
3765  trx_assign_read_view(trx);
3766  prebuilt->sql_stat_start = FALSE;
3767  } else {
3768 wait_table_again:
3769  err = lock_table(0, index->table,
3770  prebuilt->select_lock_type == LOCK_S
3771  ? LOCK_IS : LOCK_IX, thr);
3772 
3773  if (err != DB_SUCCESS) {
3774 
3775  table_lock_waited = TRUE;
3776  goto lock_table_wait;
3777  }
3778  prebuilt->sql_stat_start = FALSE;
3779  }
3780 
3781  /* Open or restore index cursor position */
3782 
3783  if (UNIV_LIKELY(direction != 0)) {
3784  ibool need_to_process = sel_restore_position_for_mysql(
3785  &same_user_rec, BTR_SEARCH_LEAF,
3786  pcur, moves_up, &mtr);
3787 
3788  if (UNIV_UNLIKELY(need_to_process)) {
3789  if (UNIV_UNLIKELY(prebuilt->row_read_type
3790  == ROW_READ_DID_SEMI_CONSISTENT)) {
3791  /* We did a semi-consistent read,
3792  but the record was removed in
3793  the meantime. */
3794  prebuilt->row_read_type
3795  = ROW_READ_TRY_SEMI_CONSISTENT;
3796  }
3797  } else if (UNIV_LIKELY(prebuilt->row_read_type
3798  != ROW_READ_DID_SEMI_CONSISTENT)) {
3799 
3800  /* The cursor was positioned on the record
3801  that we returned previously. If we need
3802  to repeat a semi-consistent read as a
3803  pessimistic locking read, the record
3804  cannot be skipped. */
3805 
3806  goto next_rec;
3807  }
3808 
3809  } else if (dtuple_get_n_fields(search_tuple) > 0) {
3810 
3811  btr_pcur_open_with_no_init(index, search_tuple, mode,
3813  pcur, 0, &mtr);
3814 
3815  pcur->trx_if_known = trx;
3816 
3817  rec = btr_pcur_get_rec(pcur);
3818 
3819  if (!moves_up
3820  && !page_rec_is_supremum(rec)
3821  && set_also_gap_locks
3823  || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
3824  && prebuilt->select_lock_type != LOCK_NONE) {
3825 
3826  /* Try to place a gap lock on the next index record
3827  to prevent phantoms in ORDER BY ... DESC queries */
3828  const rec_t* next = page_rec_get_next_const(rec);
3829 
3830  offsets = rec_get_offsets(next, index, offsets,
3831  ULINT_UNDEFINED, &heap);
3832  err = sel_set_rec_lock(btr_pcur_get_block(pcur),
3833  next, index, offsets,
3834  prebuilt->select_lock_type,
3835  LOCK_GAP, thr);
3836 
3837  switch (err) {
3838  case DB_SUCCESS_LOCKED_REC:
3839  err = DB_SUCCESS;
3840  case DB_SUCCESS:
3841  break;
3842  default:
3843  goto lock_wait_or_error;
3844  }
3845  }
3846  } else {
3847  if (mode == PAGE_CUR_G) {
3849  TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3850  &mtr);
3851  } else if (mode == PAGE_CUR_L) {
3853  FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE,
3854  &mtr);
3855  }
3856  }
3857 
3858 rec_loop:
3859  /*-------------------------------------------------------------*/
3860  /* PHASE 4: Look for matching records in a loop */
3861 
3862  rec = btr_pcur_get_rec(pcur);
3863  ut_ad(!!page_rec_is_comp(rec) == comp);
3864 #ifdef UNIV_SEARCH_DEBUG
3865  /*
3866  fputs("Using ", stderr);
3867  dict_index_name_print(stderr, index);
3868  fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
3869  page_get_page_no(page_align(rec)));
3870  rec_print(rec);
3871  */
3872 #endif /* UNIV_SEARCH_DEBUG */
3873 
3874  if (page_rec_is_infimum(rec)) {
3875 
3876  /* The infimum record on a page cannot be in the result set,
3877  and neither can a record lock be placed on it: we skip such
3878  a record. */
3879 
3880  goto next_rec;
3881  }
3882 
3883  if (page_rec_is_supremum(rec)) {
3884 
3885  if (set_also_gap_locks
3887  || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
3888  && prebuilt->select_lock_type != LOCK_NONE) {
3889 
3890  /* Try to place a lock on the index record */
3891 
3892  /* If innodb_locks_unsafe_for_binlog option is used
3893  or this session is using a READ COMMITTED isolation
3894  level we do not lock gaps. Supremum record is really
3895  a gap and therefore we do not set locks there. */
3896 
3897  offsets = rec_get_offsets(rec, index, offsets,
3898  ULINT_UNDEFINED, &heap);
3899  err = sel_set_rec_lock(btr_pcur_get_block(pcur),
3900  rec, index, offsets,
3901  prebuilt->select_lock_type,
3902  LOCK_ORDINARY, thr);
3903 
3904  switch (err) {
3905  case DB_SUCCESS_LOCKED_REC:
3906  err = DB_SUCCESS;
3907  case DB_SUCCESS:
3908  break;
3909  default:
3910  goto lock_wait_or_error;
3911  }
3912  }
3913  /* A page supremum record cannot be in the result set: skip
3914  it now that we have placed a possible lock on it */
3915 
3916  goto next_rec;
3917  }
3918 
3919  /*-------------------------------------------------------------*/
3920  /* Do sanity checks in case our cursor has bumped into page
3921  corruption */
3922 
3923  if (comp) {
3924  next_offs = rec_get_next_offs(rec, TRUE);
3925  if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) {
3926 
3927  goto wrong_offs;
3928  }
3929  } else {
3930  next_offs = rec_get_next_offs(rec, FALSE);
3931  if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) {
3932 
3933  goto wrong_offs;
3934  }
3935  }
3936 
3937  if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) {
3938 
3939 wrong_offs:
3940  if (srv_force_recovery == 0 || moves_up == FALSE) {
3941  ut_print_timestamp(stderr);
3942  buf_page_print(page_align(rec), 0);
3943  fprintf(stderr,
3944  "\nInnoDB: rec address %p,"
3945  " buf block fix count %lu\n",
3946  (void*) rec, (ulong)
3947  btr_cur_get_block(btr_pcur_get_btr_cur(pcur))
3948  ->page.buf_fix_count);
3949  fprintf(stderr,
3950  "InnoDB: Index corruption: rec offs %lu"
3951  " next offs %lu, page no %lu,\n"
3952  "InnoDB: ",
3953  (ulong) page_offset(rec),
3954  (ulong) next_offs,
3955  (ulong) page_get_page_no(page_align(rec)));
3956  dict_index_name_print(stderr, trx, index);
3957  fputs(". Run CHECK TABLE. You may need to\n"
3958  "InnoDB: restore from a backup, or"
3959  " dump + drop + reimport the table.\n",
3960  stderr);
3961 
3962  err = DB_CORRUPTION;
3963 
3964  goto lock_wait_or_error;
3965  } else {
3966  /* The user may be dumping a corrupt table. Jump
3967  over the corruption to recover as much as possible. */
3968 
3969  fprintf(stderr,
3970  "InnoDB: Index corruption: rec offs %lu"
3971  " next offs %lu, page no %lu,\n"
3972  "InnoDB: ",
3973  (ulong) page_offset(rec),
3974  (ulong) next_offs,
3975  (ulong) page_get_page_no(page_align(rec)));
3976  dict_index_name_print(stderr, trx, index);
3977  fputs(". We try to skip the rest of the page.\n",
3978  stderr);
3979 
3980  btr_pcur_move_to_last_on_page(pcur, &mtr);
3981 
3982  goto next_rec;
3983  }
3984  }
3985  /*-------------------------------------------------------------*/
3986 
3987  /* Calculate the 'offsets' associated with 'rec' */
3988 
3989  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
3990 
3991  if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
3992  if (!rec_validate(rec, offsets)
3993  || !btr_index_rec_validate(rec, index, FALSE)) {
3994  fprintf(stderr,
3995  "InnoDB: Index corruption: rec offs %lu"
3996  " next offs %lu, page no %lu,\n"
3997  "InnoDB: ",
3998  (ulong) page_offset(rec),
3999  (ulong) next_offs,
4000  (ulong) page_get_page_no(page_align(rec)));
4001  dict_index_name_print(stderr, trx, index);
4002  fputs(". We try to skip the record.\n",
4003  stderr);
4004 
4005  goto next_rec;
4006  }
4007  }
4008 
4009  /* Note that we cannot trust the up_match value in the cursor at this
4010  place because we can arrive here after moving the cursor! Thus
4011  we have to recompare rec and search_tuple to determine if they
4012  match enough. */
4013 
4014  if (match_mode == ROW_SEL_EXACT) {
4015  /* Test if the index record matches completely to search_tuple
4016  in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
4017 
4018  /* fputs("Comparing rec and search tuple\n", stderr); */
4019 
4020  if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) {
4021 
4022  if (set_also_gap_locks
4024  || trx->isolation_level
4025  <= TRX_ISO_READ_COMMITTED)
4026  && prebuilt->select_lock_type != LOCK_NONE) {
4027 
4028  /* Try to place a gap lock on the index
4029  record only if innodb_locks_unsafe_for_binlog
4030  option is not set or this session is not
4031  using a READ COMMITTED isolation level. */
4032 
4033  err = sel_set_rec_lock(
4034  btr_pcur_get_block(pcur),
4035  rec, index, offsets,
4036  prebuilt->select_lock_type, LOCK_GAP,
4037  thr);
4038 
4039  switch (err) {
4040  case DB_SUCCESS_LOCKED_REC:
4041  case DB_SUCCESS:
4042  break;
4043  default:
4044  goto lock_wait_or_error;
4045  }
4046  }
4047 
4048  btr_pcur_store_position(pcur, &mtr);
4049 
4050  err = DB_RECORD_NOT_FOUND;
4051  /* ut_print_name(stderr, index->name);
4052  fputs(" record not found 3\n", stderr); */
4053 
4054  goto normal_return;
4055  }
4056 
4057  } else if (match_mode == ROW_SEL_EXACT_PREFIX) {
4058 
4059  if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) {
4060 
4061  if (set_also_gap_locks
4063  || trx->isolation_level
4064  <= TRX_ISO_READ_COMMITTED)
4065  && prebuilt->select_lock_type != LOCK_NONE) {
4066 
4067  /* Try to place a gap lock on the index
4068  record only if innodb_locks_unsafe_for_binlog
4069  option is not set or this session is not
4070  using a READ COMMITTED isolation level. */
4071 
4072  err = sel_set_rec_lock(
4073  btr_pcur_get_block(pcur),
4074  rec, index, offsets,
4075  prebuilt->select_lock_type, LOCK_GAP,
4076  thr);
4077 
4078  switch (err) {
4079  case DB_SUCCESS_LOCKED_REC:
4080  case DB_SUCCESS:
4081  break;
4082  default:
4083  goto lock_wait_or_error;
4084  }
4085  }
4086 
4087  btr_pcur_store_position(pcur, &mtr);
4088 
4089  err = DB_RECORD_NOT_FOUND;
4090  /* ut_print_name(stderr, index->name);
4091  fputs(" record not found 4\n", stderr); */
4092 
4093  goto normal_return;
4094  }
4095  }
4096 
4097  /* We are ready to look at a possible new index entry in the result
4098  set: the cursor is now placed on a user record */
4099 
4100  if (prebuilt->select_lock_type != LOCK_NONE) {
4101  /* Try to place a lock on the index record; note that delete
4102  marked records are a special case in a unique search. If there
4103  is a non-delete marked record, then it is enough to lock its
4104  existence with LOCK_REC_NOT_GAP. */
4105 
4106  /* If innodb_locks_unsafe_for_binlog option is used
4107  or this session is using a READ COMMITED isolation
4108  level we lock only the record, i.e., next-key locking is
4109  not used. */
4110 
4111  ulint lock_type;
4112 
4113  if (!set_also_gap_locks
4115  || trx->isolation_level <= TRX_ISO_READ_COMMITTED
4116  || (unique_search
4117  && !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) {
4118 
4119  goto no_gap_lock;
4120  } else {
4121  lock_type = LOCK_ORDINARY;
4122  }
4123 
4124  /* If we are doing a 'greater or equal than a primary key
4125  value' search from a clustered index, and we find a record
4126  that has that exact primary key value, then there is no need
4127  to lock the gap before the record, because no insert in the
4128  gap can be in our search range. That is, no phantom row can
4129  appear that way.
4130 
4131  An example: if col1 is the primary key, the search is WHERE
4132  col1 >= 100, and we find a record where col1 = 100, then no
4133  need to lock the gap before that record. */
4134 
4135  if (index == clust_index
4136  && mode == PAGE_CUR_GE
4137  && direction == 0
4138  && dtuple_get_n_fields_cmp(search_tuple)
4139  == dict_index_get_n_unique(index)
4140  && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) {
4141 no_gap_lock:
4142  lock_type = LOCK_REC_NOT_GAP;
4143  }
4144 
4145  err = sel_set_rec_lock(btr_pcur_get_block(pcur),
4146  rec, index, offsets,
4147  prebuilt->select_lock_type,
4148  lock_type, thr);
4149 
4150  switch (err) {
4151  const rec_t* old_vers;
4152  case DB_SUCCESS_LOCKED_REC:
4154  || trx->isolation_level
4155  <= TRX_ISO_READ_COMMITTED) {
4156  /* Note that a record of
4157  prebuilt->index was locked. */
4158  prebuilt->new_rec_locks = 1;
4159  }
4160  err = DB_SUCCESS;
4161  case DB_SUCCESS:
4162  break;
4163  case DB_LOCK_WAIT:
4164  /* Never unlock rows that were part of a conflict. */
4165  prebuilt->new_rec_locks = 0;
4166 
4167  if (UNIV_LIKELY(prebuilt->row_read_type
4168  != ROW_READ_TRY_SEMI_CONSISTENT)
4169  || unique_search
4170  || index != clust_index) {
4171 
4172  goto lock_wait_or_error;
4173  }
4174 
4175  /* The following call returns 'offsets'
4176  associated with 'old_vers' */
4177  err = row_sel_build_committed_vers_for_mysql(
4178  clust_index, prebuilt, rec,
4179  &offsets, &heap, &old_vers, &mtr);
4180 
4181  switch (err) {
4182  case DB_SUCCESS_LOCKED_REC:
4183  err = DB_SUCCESS;
4184  case DB_SUCCESS:
4185  break;
4186  default:
4187  goto lock_wait_or_error;
4188  }
4189 
4190  mutex_enter(&kernel_mutex);
4191  if (trx->was_chosen_as_deadlock_victim) {
4192  mutex_exit(&kernel_mutex);
4193  err = DB_DEADLOCK;
4194 
4195  goto lock_wait_or_error;
4196  }
4197  if (UNIV_LIKELY(trx->wait_lock != NULL)) {
4199  trx->wait_lock);
4200  } else {
4201  mutex_exit(&kernel_mutex);
4202 
4203  /* The lock was granted while we were
4204  searching for the last committed version.
4205  Do a normal locking read. */
4206 
4207  offsets = rec_get_offsets(rec, index, offsets,
4208  ULINT_UNDEFINED,
4209  &heap);
4210  err = DB_SUCCESS;
4211  break;
4212  }
4213  mutex_exit(&kernel_mutex);
4214 
4215  if (old_vers == NULL) {
4216  /* The row was not yet committed */
4217 
4218  goto next_rec;
4219  }
4220 
4221  did_semi_consistent_read = TRUE;
4222  rec = old_vers;
4223  break;
4224  default:
4225 
4226  goto lock_wait_or_error;
4227  }
4228  } else {
4229  /* This is a non-locking consistent read: if necessary, fetch
4230  a previous version of the record */
4231 
4232  if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) {
4233 
4234  /* Do nothing: we let a non-locking SELECT read the
4235  latest version of the record */
4236 
4237  } else if (index == clust_index) {
4238 
4239  /* Fetch a previous version of the row if the current
4240  one is not visible in the snapshot; if we have a very
4241  high force recovery level set, we try to avoid crashes
4242  by skipping this lookup */
4243 
4244  if (UNIV_LIKELY(srv_force_recovery < 5)
4246  rec, index, offsets, trx->read_view)) {
4247 
4248  rec_t* old_vers;
4249  /* The following call returns 'offsets'
4250  associated with 'old_vers' */
4251  err = row_sel_build_prev_vers_for_mysql(
4252  trx->read_view, clust_index,
4253  prebuilt, rec, &offsets, &heap,
4254  &old_vers, &mtr);
4255 
4256  switch (err) {
4257  case DB_SUCCESS_LOCKED_REC:
4258  case DB_SUCCESS:
4259  break;
4260  default:
4261  goto lock_wait_or_error;
4262  }
4263 
4264  if (old_vers == NULL) {
4265  /* The row did not exist yet in
4266  the read view */
4267 
4268  goto next_rec;
4269  }
4270 
4271  rec = old_vers;
4272  }
4273  } else {
4274  /* We are looking into a non-clustered index,
4275  and to get the right version of the record we
4276  have to look also into the clustered index: this
4277  is necessary, because we can only get the undo
4278  information via the clustered index record. */
4279 
4280  ut_ad(!dict_index_is_clust(index));
4282  rec, trx->read_view)) {
4283  goto requires_clust_rec;
4284  }
4285  }
4286  }
4287 
4288  /* NOTE that at this point rec can be an old version of a clustered
4289  index record built for a consistent read. We cannot assume after this
4290  point that rec is on a buffer pool page. Functions like
4291  page_rec_is_comp() cannot be used! */
4292 
4293  if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) {
4294 
4295  /* The record is delete-marked: we can skip it */
4296 
4298  || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4299  && prebuilt->select_lock_type != LOCK_NONE
4300  && !did_semi_consistent_read) {
4301 
4302  /* No need to keep a lock on a delete-marked record
4303  if we do not want to use next-key locking. */
4304 
4305  row_unlock_for_mysql(prebuilt, TRUE);
4306  }
4307 
4308  /* This is an optimization to skip setting the next key lock
4309  on the record that follows this delete-marked record. This
4310  optimization works because of the unique search criteria
4311  which precludes the presence of a range lock between this
4312  delete marked record and the record following it.
4313 
4314  For now this is applicable only to clustered indexes while
4315  doing a unique search. There is scope for further optimization
4316  applicable to unique secondary indexes. Current behaviour is
4317  to widen the scope of a lock on an already delete marked record
4318  if the same record is deleted twice by the same transaction */
4319  if (index == clust_index && unique_search) {
4320  err = DB_RECORD_NOT_FOUND;
4321 
4322  goto normal_return;
4323  }
4324 
4325  goto next_rec;
4326  }
4327 
4328  /* Get the clustered index record if needed, if we did not do the
4329  search using the clustered index. */
4330 
4331  if (index != clust_index && prebuilt->need_to_access_clustered) {
4332 
4333 requires_clust_rec:
4334  /* We use a 'goto' to the preceding label if a consistent
4335  read of a secondary index record requires us to look up old
4336  versions of the associated clustered index record. */
4337 
4338  ut_ad(rec_offs_validate(rec, index, offsets));
4339 
4340  /* It was a non-clustered index and we must fetch also the
4341  clustered index record */
4342 
4343  mtr_has_extra_clust_latch = TRUE;
4344 
4345  /* The following call returns 'offsets' associated with
4346  'clust_rec'. Note that 'clust_rec' can be an old version
4347  built for a consistent read. */
4348 
4349  err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec,
4350  thr, &clust_rec,
4351  &offsets, &heap, &mtr);
4352  switch (err) {
4353  case DB_SUCCESS:
4354  if (clust_rec == NULL) {
4355  /* The record did not exist in the read view */
4356  ut_ad(prebuilt->select_lock_type == LOCK_NONE);
4357 
4358  goto next_rec;
4359  }
4360  break;
4361  case DB_SUCCESS_LOCKED_REC:
4362  ut_a(clust_rec != NULL);
4364  || trx->isolation_level
4365  <= TRX_ISO_READ_COMMITTED) {
4366  /* Note that the clustered index record
4367  was locked. */
4368  prebuilt->new_rec_locks = 2;
4369  }
4370  err = DB_SUCCESS;
4371  break;
4372  default:
4373  goto lock_wait_or_error;
4374  }
4375 
4376  if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) {
4377 
4378  /* The record is delete marked: we can skip it */
4379 
4381  || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4382  && prebuilt->select_lock_type != LOCK_NONE) {
4383 
4384  /* No need to keep a lock on a delete-marked
4385  record if we do not want to use next-key
4386  locking. */
4387 
4388  row_unlock_for_mysql(prebuilt, TRUE);
4389  }
4390 
4391  goto next_rec;
4392  }
4393 
4394  result_rec = clust_rec;
4395  ut_ad(rec_offs_validate(result_rec, clust_index, offsets));
4396  } else {
4397  result_rec = rec;
4398  }
4399 
4400  /* We found a qualifying record 'result_rec'. At this point,
4401  'offsets' are associated with 'result_rec'. */
4402 
4403  ut_ad(rec_offs_validate(result_rec,
4404  result_rec != rec ? clust_index : index,
4405  offsets));
4406  ut_ad(!rec_get_deleted_flag(result_rec, comp));
4407 
4408  /* At this point, the clustered index record is protected
4409  by a page latch that was acquired when pcur was positioned.
4410  The latch will not be released until mtr_commit(&mtr). */
4411 
4412  if ((match_mode == ROW_SEL_EXACT
4413  || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD)
4414  && prebuilt->select_lock_type == LOCK_NONE
4415  && !prebuilt->templ_contains_blob
4416  && !prebuilt->clust_index_was_generated
4417  && prebuilt->template_type
4418  != ROW_MYSQL_DUMMY_TEMPLATE) {
4419 
4420  /* Inside an update, for example, we do not cache rows,
4421  since we may use the cursor position to do the actual
4422  update, that is why we require ...lock_type == LOCK_NONE.
4423  Since we keep space in prebuilt only for the BLOBs of
4424  a single row, we cannot cache rows in the case there
4425  are BLOBs in the fields to be fetched. In HANDLER we do
4426  not cache rows because there the cursor is a scrollable
4427  cursor. */
4428 
4429  if (!row_sel_push_cache_row_for_mysql(prebuilt, result_rec,
4430  result_rec != rec,
4431  offsets)) {
4432  /* Only fresh inserts may contain incomplete
4433  externally stored columns. Pretend that such
4434  records do not exist. Such records may only be
4435  accessed at the READ UNCOMMITTED isolation
4436  level or when rolling back a recovered
4437  transaction. Rollback happens at a lower
4438  level, not here. */
4439  ut_a(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED);
4440  } else if (prebuilt->n_fetch_cached
4441  == MYSQL_FETCH_CACHE_SIZE) {
4442 
4443  goto got_row;
4444  }
4445 
4446  goto next_rec;
4447  } else {
4448  if (UNIV_UNLIKELY
4449  (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE)) {
4450  /* CHECK TABLE: fetch the row */
4451 
4452  if (result_rec != rec
4453  && !prebuilt->need_to_access_clustered) {
4454  /* We used 'offsets' for the clust
4455  rec, recalculate them for 'rec' */
4456  offsets = rec_get_offsets(rec, index, offsets,
4457  ULINT_UNDEFINED,
4458  &heap);
4459  result_rec = rec;
4460  }
4461 
4462  memcpy(buf + 4, result_rec
4463  - rec_offs_extra_size(offsets),
4464  rec_offs_size(offsets));
4465  mach_write_to_4(buf,
4466  rec_offs_extra_size(offsets) + 4);
4467  } else {
4468  /* Returning a row to MySQL */
4469 
4470  if (!row_sel_store_mysql_rec(buf, prebuilt, result_rec,
4471  result_rec != rec,
4472  offsets)) {
4473  /* Only fresh inserts may contain
4474  incomplete externally stored
4475  columns. Pretend that such records do
4476  not exist. Such records may only be
4477  accessed at the READ UNCOMMITTED
4478  isolation level or when rolling back a
4479  recovered transaction. Rollback
4480  happens at a lower level, not here. */
4481  ut_a(trx->isolation_level
4482  == TRX_ISO_READ_UNCOMMITTED);
4483  goto next_rec;
4484  }
4485  }
4486 
4487  if (prebuilt->clust_index_was_generated) {
4488  if (result_rec != rec) {
4489  offsets = rec_get_offsets(
4490  rec, index, offsets, ULINT_UNDEFINED,
4491  &heap);
4492  }
4493  row_sel_store_row_id_to_prebuilt(prebuilt, rec,
4494  index, offsets);
4495  }
4496  }
4497 
4498  /* From this point on, 'offsets' are invalid. */
4499 
4500 got_row:
4501  /* We have an optimization to save CPU time: if this is a consistent
4502  read on a unique condition on the clustered index, then we do not
4503  store the pcur position, because any fetch next or prev will anyway
4504  return 'end of file'. Exceptions are locking reads and the MySQL
4505  HANDLER command where the user can move the cursor with PREV or NEXT
4506  even after a unique search. */
4507 
4508  if (!unique_search_from_clust_index
4509  || prebuilt->select_lock_type != LOCK_NONE) {
4510 
4511  /* Inside an update always store the cursor position */
4512 
4513  btr_pcur_store_position(pcur, &mtr);
4514  }
4515 
4516  err = DB_SUCCESS;
4517 
4518  goto normal_return;
4519 
4520 next_rec:
4521  /* Reset the old and new "did semi-consistent read" flags. */
4522  if (UNIV_UNLIKELY(prebuilt->row_read_type
4523  == ROW_READ_DID_SEMI_CONSISTENT)) {
4524  prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4525  }
4526  did_semi_consistent_read = FALSE;
4527  prebuilt->new_rec_locks = 0;
4528 
4529  /*-------------------------------------------------------------*/
4530  /* PHASE 5: Move the cursor to the next index record */
4531 
4532  if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) {
4533  /* We must commit mtr if we are moving to the next
4534  non-clustered index record, because we could break the
4535  latching order if we would access a different clustered
4536  index page right away without releasing the previous. */
4537 
4538  btr_pcur_store_position(pcur, &mtr);
4539 
4540  mtr_commit(&mtr);
4541  mtr_has_extra_clust_latch = FALSE;
4542 
4543  mtr_start(&mtr);
4544  if (sel_restore_position_for_mysql(&same_user_rec,
4546  pcur, moves_up, &mtr)) {
4547 #ifdef UNIV_SEARCH_DEBUG
4548  cnt++;
4549 #endif /* UNIV_SEARCH_DEBUG */
4550 
4551  goto rec_loop;
4552  }
4553  }
4554 
4555  if (moves_up) {
4556  if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) {
4557 not_moved:
4558  btr_pcur_store_position(pcur, &mtr);
4559 
4560  if (match_mode != 0) {
4561  err = DB_RECORD_NOT_FOUND;
4562  } else {
4563  err = DB_END_OF_INDEX;
4564  }
4565 
4566  goto normal_return;
4567  }
4568  } else {
4569  if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) {
4570  goto not_moved;
4571  }
4572  }
4573 
4574 #ifdef UNIV_SEARCH_DEBUG
4575  cnt++;
4576 #endif /* UNIV_SEARCH_DEBUG */
4577 
4578  goto rec_loop;
4579 
4580 lock_wait_or_error:
4581  /* Reset the old and new "did semi-consistent read" flags. */
4582  if (UNIV_UNLIKELY(prebuilt->row_read_type
4583  == ROW_READ_DID_SEMI_CONSISTENT)) {
4584  prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4585  }
4586  did_semi_consistent_read = FALSE;
4587 
4588  /*-------------------------------------------------------------*/
4589 
4590  btr_pcur_store_position(pcur, &mtr);
4591 
4592 lock_table_wait:
4593  mtr_commit(&mtr);
4594  mtr_has_extra_clust_latch = FALSE;
4595 
4596  trx->error_state = err;
4597 
4598  /* The following is a patch for MySQL */
4599 
4601 
4602  thr->lock_state = QUE_THR_LOCK_ROW;
4603 
4604  if (row_mysql_handle_errors(&err, trx, thr, NULL)) {
4605  /* It was a lock wait, and it ended */
4606 
4607  thr->lock_state = QUE_THR_LOCK_NOLOCK;
4608  mtr_start(&mtr);
4609 
4610  /* Table lock waited, go try to obtain table lock
4611  again */
4612  if (table_lock_waited) {
4613  table_lock_waited = FALSE;
4614 
4615  goto wait_table_again;
4616  }
4617 
4618  sel_restore_position_for_mysql(&same_user_rec,
4619  BTR_SEARCH_LEAF, pcur,
4620  moves_up, &mtr);
4621 
4623  || trx->isolation_level <= TRX_ISO_READ_COMMITTED)
4624  && !same_user_rec) {
4625 
4626  /* Since we were not able to restore the cursor
4627  on the same user record, we cannot use
4628  row_unlock_for_mysql() to unlock any records, and
4629  we must thus reset the new rec lock info. Since
4630  in lock0lock.c we have blocked the inheriting of gap
4631  X-locks, we actually do not have any new record locks
4632  set in this case.
4633 
4634  Note that if we were able to restore on the 'same'
4635  user record, it is still possible that we were actually
4636  waiting on a delete-marked record, and meanwhile
4637  it was removed by purge and inserted again by some
4638  other user. But that is no problem, because in
4639  rec_loop we will again try to set a lock, and
4640  new_rec_lock_info in trx will be right at the end. */
4641 
4642  prebuilt->new_rec_locks = 0;
4643  }
4644 
4645  mode = pcur->search_mode;
4646 
4647  goto rec_loop;
4648  }
4649 
4650  thr->lock_state = QUE_THR_LOCK_NOLOCK;
4651 
4652 #ifdef UNIV_SEARCH_DEBUG
4653  /* fputs("Using ", stderr);
4654  dict_index_name_print(stderr, index);
4655  fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4656 #endif /* UNIV_SEARCH_DEBUG */
4657  goto func_exit;
4658 
4659 normal_return:
4660  /*-------------------------------------------------------------*/
4662 
4663  mtr_commit(&mtr);
4664 
4665  if (prebuilt->n_fetch_cached > 0) {
4666  row_sel_pop_cached_row_for_mysql(buf, prebuilt);
4667 
4668  err = DB_SUCCESS;
4669  }
4670 
4671 #ifdef UNIV_SEARCH_DEBUG
4672  /* fputs("Using ", stderr);
4673  dict_index_name_print(stderr, index);
4674  fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */
4675 #endif /* UNIV_SEARCH_DEBUG */
4676  if (err == DB_SUCCESS) {
4677  srv_n_rows_read++;
4678  }
4679 
4680 func_exit:
4681  trx->op_info = "";
4682  if (UNIV_LIKELY_NULL(heap)) {
4683  mem_heap_free(heap);
4684  }
4685 
4686  /* Set or reset the "did semi-consistent read" flag on return.
4687  The flag did_semi_consistent_read is set if and only if
4688  the record being returned was fetched with a semi-consistent read. */
4689  ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS
4690  || !did_semi_consistent_read);
4691 
4692  if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) {
4693  if (UNIV_UNLIKELY(did_semi_consistent_read)) {
4694  prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT;
4695  } else {
4696  prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
4697  }
4698  }
4699  return(err);
4700 }
4701 
4702 /*******************************************************************/
4706 UNIV_INTERN
4707 ibool
4709 /*======================================*/
4710  trx_t* trx,
4711  const char* norm_name)
4713 {
4714  dict_table_t* table;
4715  ibool ret = FALSE;
4716 
4717  table = dict_table_get(norm_name, FALSE);
4718 
4719  if (table == NULL) {
4720 
4721  return(FALSE);
4722  }
4723 
4724  mutex_enter(&kernel_mutex);
4725 
4726  /* Start the transaction if it is not started yet */
4727 
4729 
4730  /* If there are locks on the table or some trx has invalidated the
4731  cache up to our trx id, then ret = FALSE.
4732  We do not check what type locks there are on the table, though only
4733  IX type locks actually would require ret = FALSE. */
4734 
4735  if (UT_LIST_GET_LEN(table->locks) == 0
4736  && trx->id >= table->query_cache_inv_trx_id) {
4737 
4738  ret = TRUE;
4739 
4740  /* If the isolation level is high, assign a read view for the
4741  transaction if it does not yet have one */
4742 
4743  if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ
4744  && !trx->read_view) {
4745 
4747  trx->id, trx->global_read_view_heap);
4748  trx->global_read_view = trx->read_view;
4749  }
4750  }
4751 
4752  mutex_exit(&kernel_mutex);
4753 
4754  return(ret);
4755 }
4756 
4757 /*******************************************************************/
4761 static
4762 ib_uint64_t
4763 row_search_autoinc_read_column(
4764 /*===========================*/
4765  dict_index_t* index,
4766  const rec_t* rec,
4767  ulint col_no,
4768  ulint mtype,
4769  ibool unsigned_type)
4770 {
4771  ulint len;
4772  const byte* data;
4773  ib_uint64_t value;
4774  mem_heap_t* heap = NULL;
4775  ulint offsets_[REC_OFFS_NORMAL_SIZE];
4776  ulint* offsets = offsets_;
4777 
4778  rec_offs_init(offsets_);
4779 
4780  offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
4781 
4782  data = rec_get_nth_field(rec, offsets, col_no, &len);
4783 
4784  ut_a(len != UNIV_SQL_NULL);
4785 
4786  switch (mtype) {
4787  case DATA_INT:
4788  ut_a(len <= sizeof value);
4789  value = mach_read_int_type(data, len, unsigned_type);
4790  break;
4791 
4792  case DATA_FLOAT:
4793  ut_a(len == sizeof(float));
4794  value = (ib_uint64_t) mach_float_read(data);
4795  break;
4796 
4797  case DATA_DOUBLE:
4798  ut_a(len == sizeof(double));
4799  value = (ib_uint64_t) mach_double_read(data);
4800  break;
4801 
4802  default:
4803  ut_error;
4804  }
4805 
4806  if (UNIV_LIKELY_NULL(heap)) {
4807  mem_heap_free(heap);
4808  }
4809 
4810  if (!unsigned_type && (ib_int64_t) value < 0) {
4811  value = 0;
4812  }
4813 
4814  return(value);
4815 }
4816 
4817 /*******************************************************************/
4820 static
4821 const rec_t*
4822 row_search_autoinc_get_rec(
4823 /*=======================*/
4824  btr_pcur_t* pcur,
4825  mtr_t* mtr)
4826 {
4827  do {
4828  const rec_t* rec = btr_pcur_get_rec(pcur);
4829 
4830  if (page_rec_is_user_rec(rec)) {
4831  return(rec);
4832  }
4833  } while (btr_pcur_move_to_prev(pcur, mtr));
4834 
4835  return(NULL);
4836 }
4837 
4838 /*******************************************************************/
4842 UNIV_INTERN
4843 ulint
4845 /*===================*/
4846  dict_index_t* index,
4847  const char* col_name,
4848  ib_uint64_t* value)
4849 {
4850  ulint i;
4851  ulint n_cols;
4852  dict_field_t* dfield = NULL;
4853  ulint error = DB_SUCCESS;
4854 
4856 
4857  /* Search the index for the AUTOINC column name */
4858  for (i = 0; i < n_cols; ++i) {
4859  dfield = dict_index_get_nth_field(index, i);
4860 
4861  if (strcmp(col_name, dfield->name) == 0) {
4862  break;
4863  }
4864  }
4865 
4866  *value = 0;
4867 
4868  /* Must find the AUTOINC column name */
4869  if (i < n_cols && dfield) {
4870  mtr_t mtr;
4871  btr_pcur_t pcur;
4872 
4873  mtr_start(&mtr);
4874 
4875  /* Open at the high/right end (FALSE), and INIT
4876  cursor (TRUE) */
4878  FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
4879 
4880  if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
4881  const rec_t* rec;
4882 
4883  rec = row_search_autoinc_get_rec(&pcur, &mtr);
4884 
4885  if (rec != NULL) {
4886  ibool unsigned_type = (
4887  dfield->col->prtype & DATA_UNSIGNED);
4888 
4889  *value = row_search_autoinc_read_column(
4890  index, rec, i,
4891  dfield->col->mtype, unsigned_type);
4892  }
4893  }
4894 
4895  btr_pcur_close(&pcur);
4896 
4897  mtr_commit(&mtr);
4898  } else {
4899  error = DB_RECORD_NOT_FOUND;
4900  }
4901 
4902  return(error);
4903 }
UNIV_INTERN void row_mysql_store_blob_ref(byte *dest, ulint col_len, const void *data, ulint len)
Definition: row0mysql.cc:196
#define UT_LIST_GET_LEN(BASE)
Definition: ut0lst.h:217
sel_node_t * sel_node
Definition: row0sel.h:380
UNIV_INLINE void trx_start_if_not_started_low(trx_t *trx)
ulint val_buf_size
Definition: row0sel.h:196
UNIV_INTERN void que_thr_stop_for_mysql_no_error(que_thr_t *thr, trx_t *trx)
Definition: que0que.cc:1033
UNIV_INTERN ulint row_search_for_mysql(byte *buf, ulint mode, row_prebuilt_t *prebuilt, ulint match_mode, ulint direction)
Definition: row0sel.cc:3346
UNIV_INTERN ibool thd_is_select(const drizzled::Session *session)
Definition: ha_innodb.cc:973
ibool pcur_is_open
Definition: row0sel.h:210
UNIV_INLINE ulint btr_pcur_get_low_match(const btr_pcur_t *cursor)
ibool srv_locks_unsafe_for_binlog
Definition: srv0srv.cc:138
UNIV_INTERN byte * btr_rec_copy_externally_stored_field(const rec_t *rec, const ulint *offsets, ulint zip_size, ulint no, ulint *len, mem_heap_t *heap)
Definition: btr0cur.cc:5207
ulint first_prefetched
Definition: row0sel.h:241
plan_t * plans
Definition: row0sel.h:305
UNIV_INTERN void row_prebuild_sel_graph(row_prebuilt_t *prebuilt)
Definition: row0mysql.cc:1234
#define UT_LIST_GET_NEXT(NAME, N)
Definition: ut0lst.h:201
UNIV_INLINE void trx_start_if_not_started(trx_t *trx)
unsigned clust_index_was_generated
Definition: row0mysql.h:604
#define btr_search_latch
Definition: btr0sea.h:290
ulint lock_state
Definition: que0que.h:383
dict_table_t * table
Definition: row0mysql.h:592
UNIV_INLINE float mach_float_read(const byte *b) __attribute__((nonnull
UNIV_INLINE ibool btr_pcur_is_on_user_rec(const btr_pcur_t *cursor)
UNIV_INLINE ibool dtuple_contains_null(const dtuple_t *tuple)
UNIV_INTERN void sel_col_prefetch_buf_free(sel_buf_t *prefetch_buf)
Definition: row0sel.cc:505
ulint mode
Definition: row0sel.h:230
dict_index_t * index
Definition: row0mysql.h:593
ibool copy_val
Definition: pars0sym.h:187
trx_id_t id
Definition: trx0trx.h:548
UNIV_INLINE ulint que_node_get_val_buf_size(que_node_t *node)
UNIV_INTERN void * row_fetch_print(void *row, void *user_arg)
Definition: row0sel.cc:2181
sym_node_t * into_list
Definition: row0sel.h:294
enum sel_node_state state
Definition: row0sel.h:291
UNIV_INTERN read_view_t * trx_assign_read_view(trx_t *trx)
Definition: trx0trx.cc:1028
#define SYM_SEC_FIELD_NO
Definition: pars0sym.h:134
UNIV_INLINE page_t * page_align(const void *ptr) __attribute__((const ))
UNIV_INLINE void mach_write_to_4(byte *b, ulint n)
UNIV_INTERN void lock_cancel_waiting_and_release(lock_t *lock)
Definition: lock0lock.cc:4127
UNIV_INLINE ulint page_get_page_no(const page_t *page)
UNIV_INLINE void dfield_set_len(dfield_t *field, ulint len)
UNIV_INTERN int cmp_dtuple_rec(const dtuple_t *dtuple, const rec_t *rec, const ulint *offsets)
Definition: rem0cmp.cc:642
UNIV_INTERN ulint lock_table(ulint flags, dict_table_t *table, enum lock_mode mode, que_thr_t *thr)
Definition: lock0lock.cc:3866
UNIV_INLINE void que_node_set_val_buf_size(que_node_t *node, ulint size)
UNIV_INLINE plan_t * sel_node_get_nth_plan(sel_node_t *node, ulint i)
UNIV_INTERN ulint row_search_max_autoinc(dict_index_t *index, const char *col_name, ib_uint64_t *value)
Definition: row0sel.cc:4844
unsigned templ_contains_blob
Definition: row0mysql.h:633
UNIV_INLINE ulint page_get_n_recs(const page_t *page)
sym_node_t * table_list
Definition: row0sel.h:295
UNIV_INTERN void buf_page_print(const byte *read_buf, ulint zip_size)
Definition: buf0buf.cc:601
UNIV_INTERN ulint btr_copy_externally_stored_field_prefix(byte *buf, ulint len, ulint zip_size, const byte *data, ulint local_len)
Definition: btr0cur.cc:5098
UNIV_INLINE ulint dfield_is_ext(const dfield_t *field)
#define mem_free(PTR)
Definition: mem0mem.h:249
que_t * graph
Definition: que0que.h:356
ulint n_rows_prefetched
Definition: row0sel.h:238
UNIV_INLINE ulint rec_get_next_offs(const rec_t *rec, ulint comp)
#define SYM_CLUST_FIELD_NO
Definition: pars0sym.h:132
UNIV_INLINE void * ut_memcpy(void *dest, const void *sour, ulint n)
UNIV_INTERN void ut_print_buf(FILE *file, const void *buf, ulint len)
Definition: ut0ut.cc:444
UNIV_INTERN ulint dtype_get_at_most_n_mbchars(ulint prtype, ulint mbminmaxlen, ulint prefix_len, ulint data_len, const char *str)
Definition: data0type.cc:49
UNIV_INLINE ibool dict_table_is_comp(const dict_table_t *table)
UNIV_INLINE ulint rec_offs_nth_extern(const ulint *offsets, ulint n)
UNIV_INTERN ibool row_mysql_handle_errors(ulint *new_err, trx_t *trx, que_thr_t *thr, trx_savept_t *savept)
Definition: row0mysql.cc:533
UNIV_INTERN void rec_print_new(FILE *file, const rec_t *rec, const ulint *offsets)
Definition: rem0rec.cc:1722
UNIV_INLINE ulint rec_offs_extra_size(const ulint *offsets)
UNIV_INLINE ulint dict_col_get_clust_pos(const dict_col_t *col, const dict_index_t *clust_index)
unsigned sql_stat_start
Definition: row0mysql.h:596
UNIV_INTERN ibool btr_index_rec_validate(const rec_t *rec, const dict_index_t *index, ibool dump_on_error)
Definition: btr0btr.cc:3772
unsigned prtype
Definition: dict0mem.h:273
que_node_t * run_node
Definition: que0que.h:376
unsigned index_usable
Definition: row0mysql.h:611
ulint conc_state
Definition: trx0trx.h:480
UNIV_INLINE rec_t * page_rec_get_next(rec_t *rec)
UNIV_INLINE ulint btr_pcur_get_up_match(const btr_pcur_t *cursor)
#define ut_d(EXPR)
Definition: ut0dbg.h:129
UNIV_INTERN void rec_print(FILE *file, const rec_t *rec, const dict_index_t *index)
Definition: rem0rec.cc:1750
UNIV_INLINE ibool btr_pcur_is_before_first_on_page(const btr_pcur_t *cursor)
#define mem_heap_free(heap)
Definition: mem0mem.h:117
trx_t * trx_if_known
Definition: btr0pcur.h:492
UNIV_INTERN ulint lock_sec_rec_cons_read_sees(const rec_t *rec, const read_view_t *view)
Definition: lock0lock.cc:535
que_node_t ** tuple_exps
Definition: row0sel.h:223
#define LOCK_ORDINARY
Definition: lock0lock.h:779
ulint n_rows_fetched
Definition: row0sel.h:236
UNIV_INLINE ulint rw_lock_get_writer(const rw_lock_t *lock)
UNIV_INTERN byte * row_mysql_store_col_in_innobase_format(dfield_t *dfield, byte *buf, ibool row_format_col, const byte *mysql_data, ulint col_len, ulint comp)
Definition: row0mysql.cc:301
ibool unique_search
Definition: row0sel.h:234
UNIV_INTERN void dtuple_print(FILE *f, const dtuple_t *tuple)
Definition: data0data.cc:532
UNIV_INLINE void dfield_copy_data(dfield_t *field1, const dfield_t *field2)
ulint n_exact_match
Definition: row0sel.h:231
dict_index_t * index
Definition: row0sel.h:206
UNIV_INTERN void row_mysql_pad_col(ulint mbminlen, byte *pad, ulint len)
Definition: row0mysql.cc:255
ibool consistent_read
Definition: row0sel.h:312
UNIV_INLINE ulint dtuple_get_n_fields(const dtuple_t *tuple)
que_common_t common
Definition: row0sel.h:290
ibool set_x_locks
Definition: row0sel.h:298
UNIV_INTERN read_view_t * read_view_open_now(trx_id_t cr_trx_id, mem_heap_t *heap)
Definition: read0read.cc:250
UNIV_INLINE void btr_pcur_close(btr_pcur_t *cursor)
#define DICT_MAX_INDEX_COL_LEN
DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum indexed column length (or indexed pref...
Definition: dict0mem.h:316
que_fork_t * sel_graph
Definition: row0mysql.h:673
dtuple_t * clust_ref
Definition: row0mysql.h:681
UNIV_INLINE ulint dtype_get_mysql_type(const dtype_t *type)
UNIV_INTERN void sel_node_free_private(sel_node_t *node)
Definition: row0sel.cc:275
ibool no_prefetch
Definition: row0sel.h:243
db_err
Definition: db0err.h:31
UNIV_INTERN sel_node_t * sel_node_create(mem_heap_t *heap)
Definition: row0sel.cc:255
que_node_t * parent
Definition: que0types.h:49
UNIV_INLINE ulint rec_get_deleted_flag(const rec_t *rec, ulint comp)
unsigned need_to_access_clustered
Definition: row0mysql.h:628
sym_node_t * into_list
Definition: row0sel.h:345
UNIV_INLINE void eval_exp(que_node_t *exp_node)
ibool must_get_clust
Definition: row0sel.h:258
UNIV_INTERN void mtr_commit(mtr_t *mtr) __attribute__((nonnull))
Definition: mtr0mtr.cc:247
UNIV_INLINE void dfield_set_data(dfield_t *field, const void *data, ulint len)
UNIV_INLINE void row_build_row_ref_fast(dtuple_t *ref, const ulint *map, const rec_t *rec, const ulint *offsets)
UNIV_INLINE ulint dict_table_zip_size(const dict_table_t *table)
UNIV_INLINE ulint dfield_get_len(const dfield_t *field)
mem_heap_t * blob_heap
Definition: row0mysql.h:753
const char * op_info
Definition: trx0trx.h:477
UNIV_INLINE ulint page_rec_is_comp(const rec_t *rec)
UNIV_INTERN void dtuple_set_n_fields(dtuple_t *tuple, ulint n_fields)
Definition: data0data.cc:132
UNIV_INLINE void eval_node_copy_val(que_node_t *node1, que_node_t *node2)
UNIV_INLINE ulint rec_offs_comp(const ulint *offsets)
UNIV_INTERN enum db_err lock_sec_rec_read_check_and_lock(ulint flags, const buf_block_t *block, const rec_t *rec, dict_index_t *index, const ulint *offsets, enum lock_mode mode, ulint gap_mode, que_thr_t *thr)
Definition: lock0lock.cc:5361
dict_table_t * table
Definition: pars0sym.h:212
UNIV_INTERN void row_sel_convert_mysql_key_to_innobase(dtuple_t *tuple, byte *buf, ulint buf_len, dict_index_t *index, const byte *key_ptr, ulint key_len, trx_t *trx)
Definition: row0sel.cc:2292
UNIV_INTERN void trx_print(FILE *f, trx_t *trx, ulint max_query_len)
Definition: trx0trx.cc:1690
UNIV_INLINE int cmp_data_data(ulint mtype, ulint prtype, const byte *data1, ulint len1, const byte *data2, ulint len2)
unsigned prefix_len
Definition: dict0mem.h:322
UNIV_INLINE void eval_sym(sym_node_t *sym_node)
UNIV_INTERN ibool lock_clust_rec_cons_read_sees(const rec_t *rec, dict_index_t *index, const ulint *offsets, read_view_t *view)
Definition: lock0lock.cc:500
que_node_t * args
Definition: pars0pars.h:649
UNIV_INLINE void eval_node_copy_and_alloc_val(que_node_t *node, const byte *str, ulint len)
que_node_t * select_list
Definition: row0sel.h:293
sel_buf_t * prefetch_buf
Definition: pars0sym.h:217
const byte * default_rec
Definition: row0mysql.h:652
UNIV_INLINE ulint dict_index_is_clust(const dict_index_t *index) __attribute__((pure))
const char * name
Definition: dict0mem.h:321
btr_pcur_t clust_pcur
Definition: row0sel.h:272
UNIV_INLINE ulint dtuple_get_n_fields_cmp(const dtuple_t *tuple)
UNIV_INTERN int row_unlock_for_mysql(row_prebuilt_t *prebuilt, ibool has_latches_on_recs)
Definition: row0mysql.cc:1510
UNIV_INLINE ulint page_offset(const void *ptr) __attribute__((const ))
UNIV_INLINE double mach_double_read(const byte *b) __attribute__((nonnull
UNIV_INTERN void btr_pcur_store_position(btr_pcur_t *cursor, mtr_t *mtr)
Definition: btr0pcur.cc:89
dict_table_t * table
Definition: row0sel.h:204
ibool stored_cursor_rec_processed
Definition: row0sel.h:219
#define ut_a(EXPR)
Definition: ut0dbg.h:105
mysql_row_templ_t * mysql_template
Definition: row0mysql.h:639
lock_t * wait_lock
Definition: trx0trx.h:637
UNIV_INLINE ulint dict_index_get_n_ordering_defined_by_user(const dict_index_t *index)
UNIV_INTERN enum db_err lock_clust_rec_read_check_and_lock(ulint flags, const buf_block_t *block, const rec_t *rec, dict_index_t *index, const ulint *offsets, enum lock_mode mode, ulint gap_mode, que_thr_t *thr)
Definition: lock0lock.cc:5440
UNIV_INTERN void ut_print_name(FILE *f, struct trx_struct *trx, ibool table_id, const char *name)
Definition: ut0ut.cc:528
UNIV_INLINE const dict_col_t * dict_field_get_col(const dict_field_t *field)
btr_pcur_t pcur
Definition: row0sel.h:207
#define LOCK_GAP
Definition: lock0lock.h:784
ibool can_get_updated
Definition: row0sel.h:322
UNIV_INTERN ibool eval_cmp(func_node_t *cmp_node)
Definition: eval0eval.cc:119
UNIV_INLINE void * mem_heap_alloc(mem_heap_t *heap, ulint n)
#define BTR_SEA_TIMEOUT
Definition: btr0sea.h:315
pars_user_func_t * func
Definition: row0sel.h:348
#define mem_heap_create(N)
Definition: mem0mem.h:97
#define UT_NOT_USED(A)
Definition: ut0dbg.h:134
#define LOCK_REC_NOT_GAP
Definition: lock0lock.h:797
mem_heap_t * old_vers_heap
Definition: row0sel.h:275
UNIV_INLINE ibool page_rec_is_supremum(const rec_t *rec) __attribute__((const ))
dict_table_t * table
Definition: dict0mem.h:341
UNIV_INLINE que_thr_t * que_fork_get_first_thr(que_fork_t *fork)
UNIV_INTERN void row_build_row_ref_in_tuple(dtuple_t *ref, const rec_t *rec, const dict_index_t *index, ulint *offsets, trx_t *trx)
Definition: row0row.cc:543
sym_node_list_t columns
Definition: row0sel.h:244
UNIV_INTERN ulint row_vers_build_for_consistent_read(const rec_t *rec, mtr_t *mtr, dict_index_t *index, ulint **offsets, read_view_t *view, mem_heap_t **offset_heap, mem_heap_t *in_heap, rec_t **old_vers)
Definition: row0vers.cc:484
#define UT_LIST_GET_FIRST(BASE)
Definition: ut0lst.h:224
byte * data
Definition: row0sel.h:191
ibool is_aggregate
Definition: row0sel.h:316
btr_pcur_t * pcur
Definition: row0mysql.h:669
UNIV_INTERN void que_thr_stop_for_mysql(que_thr_t *thr)
Definition: que0que.cc:955
UNIV_INLINE void mem_heap_empty(mem_heap_t *heap)
#define rw_lock_s_lock(M)
Definition: sync0rw.h:155
UNIV_INTERN que_thr_t * row_printf_step(que_thr_t *thr)
Definition: row0sel.cc:2225
ibool aggregate_already_fetched
Definition: row0sel.h:318
dtuple_t * search_tuple
Definition: row0mysql.h:675
unsigned template_type
Definition: row0mysql.h:619
byte row_id[DATA_ROW_ID_LEN]
Definition: row0mysql.h:676
#define ut_ad(EXPR)
Definition: ut0dbg.h:127
UNIV_INTERN ibool row_search_check_if_query_cache_permitted(trx_t *trx, const char *norm_name)
Definition: row0sel.cc:4708
ulint search_mode
Definition: btr0pcur.h:491
UNIV_INLINE ibool eval_node_get_ibool_val(que_node_t *node)
unsigned ibd_file_missing
Definition: dict0mem.h:490
sym_node_t * alias
Definition: pars0sym.h:178
UNIV_INTERN void que_thr_move_to_run_state_for_mysql(que_thr_t *thr, trx_t *trx)
Definition: que0que.cc:1001
#define ut_error
Definition: ut0dbg.h:115
UNIV_INLINE que_node_t * que_node_get_parent(que_node_t *node)
#define BTR_EXTERN_FIELD_REF_SIZE
Definition: btr0types.h:170
ulint * clust_map
Definition: row0sel.h:266
UNIV_INLINE ulint dict_index_get_n_unique(const dict_index_t *index)
ulint fetch_table
Definition: row0sel.h:303
UNIV_INLINE const rec_t * page_rec_get_next_const(const rec_t *rec)
UNIV_INLINE ulint que_node_get_type(que_node_t *node)
ibool keep_other_fields_on_keyread
Definition: row0mysql.h:745
read_view_t * read_view
Definition: trx0trx.h:664
UNIV_INTERN ibool rec_validate(const rec_t *rec, const ulint *offsets)
Definition: rem0rec.cc:1564
ulint n_tables
Definition: row0sel.h:302
UNIV_INLINE ibool page_rec_is_user_rec(const rec_t *rec) __attribute__((const ))
dtuple_t * tuple
Definition: row0sel.h:229
UNIV_INLINE void dfield_set_null(dfield_t *field)
mem_heap_t * old_vers_heap
Definition: row0mysql.h:755
UNIV_INLINE ulint dict_index_get_sys_col_pos(const dict_index_t *index, ulint type)
UNIV_INTERN ulint row_vers_build_for_semi_consistent_read(const rec_t *rec, mtr_t *mtr, dict_index_t *index, ulint **offsets, mem_heap_t **offset_heap, mem_heap_t *in_heap, const rec_t **old_vers)
Definition: row0vers.cc:616
UNIV_INTERN que_thr_t * fetch_step(que_thr_t *thr)
Definition: row0sel.cc:2118
UNIV_INLINE ulint btr_pcur_get_rel_pos(const btr_pcur_t *cursor)
UNIV_INTERN void ut_print_timestamp(FILE *file)
Definition: ut0ut.cc:247
sel_node_t * last_sel_node
Definition: que0que.h:428
UNIV_INLINE ulint dict_index_is_unique(const dict_index_t *index) __attribute__((pure))
ulint row_lock_mode
Definition: row0sel.h:301
UNIV_INLINE trx_t * thr_get_trx(que_thr_t *thr)
unsigned n_template
Definition: row0mysql.h:623
UNIV_INLINE buf_block_t * btr_cur_get_block(btr_cur_t *cursor)
que_node_t * prev_node
Definition: que0que.h:379
ibool cursor_at_end
Definition: row0sel.h:212
btr_pcur_t * clust_pcur
Definition: row0mysql.h:671
dtuple_t * clust_ref
Definition: row0sel.h:269
drizzled::Session * mysql_thd
Definition: trx0trx.h:559
UNIV_INTERN os_thread_id_t os_thread_get_curr_id(void)
Definition: os0thread.cc:93
ulint field_nos[2]
Definition: pars0sym.h:191
sel_node_t * cursor_def
Definition: row0sel.h:344
trx_id_t query_cache_inv_trx_id
Definition: dict0mem.h:545
UNIV_INTERN ibool cmp_dtuple_is_prefix_of_rec(const dtuple_t *dtuple, const rec_t *rec, const ulint *offsets)
Definition: rem0cmp.cc:662
UNIV_INTERN void mem_analyze_corruption(void *ptr)
Definition: mem0dbg.cc:822
byte * fetch_cache[MYSQL_FETCH_CACHE_SIZE]
Definition: row0mysql.h:735
UNIV_INLINE void mtr_start(mtr_t *mtr) __attribute__((nonnull))
UNIV_INLINE void btr_pcur_move_to_last_on_page(btr_pcur_t *cursor, mtr_t *mtr)
UNIV_INLINE void eval_node_set_int_val(que_node_t *node, lint val)
UNIV_INTERN ibool buf_LRU_buf_pool_running_out(void)
Definition: buf0lru.cc:792
dict_col_t * col
Definition: dict0mem.h:320
UNIV_INTERN void dtype_print(const dtype_t *type)
Definition: data0type.cc:195
UNIV_INLINE ibool page_rec_is_infimum(const rec_t *rec) __attribute__((const ))
UNIV_INTERN dict_table_t * dict_table_get(const char *table_name, ibool inc_mysql_count)
Definition: dict0dict.cc:746
UNIV_INLINE ulint rec_offs_size(const ulint *offsets)
UNIV_INTERN void dfield_print_also_hex(const dfield_t *dfield)
Definition: data0data.cc:346
UNIV_INLINE dfield_t * que_node_get_val(que_node_t *node)
ibool asc
Definition: row0sel.h:209
UNIV_INTERN byte * row_mysql_store_true_var_len(byte *dest, ulint len, ulint lenlen)
Definition: row0mysql.cc:143
const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]
Definition: btr0cur.cc:135
unsigned mbminmaxlen
Definition: dict0mem.h:292
UNIV_INTERN void dict_index_name_print(FILE *file, trx_t *trx, const dict_index_t *index)
Definition: dict0dict.cc:4775
UNIV_INLINE que_node_t * que_node_get_next(que_node_t *node)
UNIV_INLINE void btr_pcur_open_at_index_side(ibool from_left, dict_index_t *index, ulint latch_mode, btr_pcur_t *pcur, ibool do_init, mtr_t *mtr)
ulint error_state
Definition: trx0trx.h:601
ulint latch_mode
Definition: btr0pcur.h:460
sym_node_t * indirection
Definition: pars0sym.h:173
UNIV_INLINE ullint mach_read_int_type(const byte *src, ulint len, ibool unsigned_type)
sym_node_t * explicit_cursor
Definition: row0sel.h:333
UNIV_INLINE ibool btr_pcur_move_to_next(btr_pcur_t *cursor, mtr_t *mtr)
UNIV_INTERN ibool btr_pcur_move_to_prev(btr_pcur_t *cursor, mtr_t *mtr)
Definition: btr0pcur.cc:525
read_view_t * read_view
Definition: row0sel.h:309
pars_user_func_cb_t func
Definition: pars0pars.h:618
unsigned mtype
Definition: dict0mem.h:272
unsigned big_rows
Definition: dict0mem.h:570
UNIV_INLINE ibool rec_offs_validate(const rec_t *rec, const dict_index_t *index, const ulint *offsets)
UNIV_INTERN que_thr_t * row_sel_step(que_thr_t *thr)
Definition: row0sel.cc:2015