Drizzled Public API Documentation

sel_arg.h
1 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008-2009 Sun Microsystems, Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation; version 2 of the License.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18  */
19 
20 #pragma once
21 
22 namespace drizzled {
23 namespace optimizer {
24 
25 /*
26  A construction block of the SEL_ARG-graph.
27 
28  The following description only covers graphs of SEL_ARG objects with
29  sel_arg->type==KEY_RANGE:
30 
31  One SEL_ARG object represents an "elementary interval" in form
32 
33  min_value <=? table.keypartX <=? max_value
34 
35  The interval is a non-empty interval of any kind: with[out] minimum/maximum
36  bound, [half]open/closed, single-point interval, etc.
37 
38  1. SEL_ARG GRAPH STRUCTURE
39 
40  SEL_ARG objects are linked together in a graph. The meaning of the graph
41  is better demostrated by an example:
42 
43  tree->keys[i]
44  |
45  | $ $
46  | part=1 $ part=2 $ part=3
47  | $ $
48  | +-------+ $ +-------+ $ +--------+
49  | | kp1<1 |--$-->| kp2=5 |--$-->| kp3=10 |
50  | +-------+ $ +-------+ $ +--------+
51  | | $ $ |
52  | | $ $ +--------+
53  | | $ $ | kp3=12 |
54  | | $ $ +--------+
55  | +-------+ $ $
56  \->| kp1=2 |--$--------------$-+
57  +-------+ $ $ | +--------+
58  | $ $ ==>| kp3=11 |
59  +-------+ $ $ | +--------+
60  | kp1=3 |--$--------------$-+ |
61  +-------+ $ $ +--------+
62  | $ $ | kp3=14 |
63  ... $ $ +--------+
64 
65  The entire graph is partitioned into "interval lists".
66 
67  An interval list is a sequence of ordered disjoint intervals over the same
68  key part. SEL_ARG are linked via "next" and "prev" pointers. Additionally,
69  all intervals in the list form an RB-tree, linked via left/right/parent
70  pointers. The RB-tree root SEL_ARG object will be further called "root of the
71  interval list".
72 
73  In the example pic, there are 4 interval lists:
74  "kp<1 OR kp1=2 OR kp1=3", "kp2=5", "kp3=10 OR kp3=12", "kp3=11 OR kp3=13".
75  The vertical lines represent SEL_ARG::next/prev pointers.
76 
77  In an interval list, each member X may have SEL_ARG::next_key_part pointer
78  pointing to the root of another interval list Y. The pointed interval list
79  must cover a key part with greater number (i.e. Y->part > X->part).
80 
81  In the example pic, the next_key_part pointers are represented by
82  horisontal lines.
83 
84  2. SEL_ARG GRAPH SEMANTICS
85 
86  It represents a condition in a special form (we don't have a name for it ATM)
87  The SEL_ARG::next/prev is "OR", and next_key_part is "AND".
88 
89  For example, the picture represents the condition in form:
90  (kp1 < 1 AND kp2=5 AND (kp3=10 OR kp3=12)) OR
91  (kp1=2 AND (kp3=11 OR kp3=14)) OR
92  (kp1=3 AND (kp3=11 OR kp3=14))
93 
94 
95  3. SEL_ARG GRAPH USE
96 
97  Use get_mm_tree() to construct SEL_ARG graph from WHERE condition.
98  Then walk the SEL_ARG graph and get a list of dijsoint ordered key
99  intervals (i.e. intervals in form
100 
101  (constA1, .., const1_K) < (keypart1,.., keypartK) < (constB1, .., constB_K)
102 
103  Those intervals can be used to access the index. The uses are in:
104  - check_quick_select() - Walk the SEL_ARG graph and find an estimate of
105  how many table records are contained within all
106  intervals.
107  - get_quick_select() - Walk the SEL_ARG, materialize the key intervals,
108  and create QuickRangeSelect object that will
109  read records within these intervals.
110 
111  4. SPACE COMPLEXITY NOTES
112 
113  SEL_ARG graph is a representation of an ordered disjoint sequence of
114  intervals over the ordered set of index tuple values.
115 
116  For multi-part keys, one can construct a WHERE expression such that its
117  list of intervals will be of combinatorial size. Here is an example:
118 
119  (keypart1 IN (1,2, ..., n1)) AND
120  (keypart2 IN (1,2, ..., n2)) AND
121  (keypart3 IN (1,2, ..., n3))
122 
123  For this WHERE clause the list of intervals will have n1*n2*n3 intervals
124  of form
125 
126  (keypart1, keypart2, keypart3) = (k1, k2, k3), where 1 <= k{i} <= n{i}
127 
128  SEL_ARG graph structure aims to reduce the amount of required space by
129  "sharing" the elementary intervals when possible (the pic at the
130  beginning of this comment has examples of such sharing). The sharing may
131  prevent combinatorial blowup:
132 
133  There are WHERE clauses that have combinatorial-size interval lists but
134  will be represented by a compact SEL_ARG graph.
135  Example:
136  (keypartN IN (1,2, ..., n1)) AND
137  ...
138  (keypart2 IN (1,2, ..., n2)) AND
139  (keypart1 IN (1,2, ..., n3))
140 
141  but not in all cases:
142 
143  - There are WHERE clauses that do have a compact SEL_ARG-graph
144  representation but get_mm_tree() and its callees will construct a
145  graph of combinatorial size.
146  Example:
147  (keypart1 IN (1,2, ..., n1)) AND
148  (keypart2 IN (1,2, ..., n2)) AND
149  ...
150  (keypartN IN (1,2, ..., n3))
151 
152  - There are WHERE clauses for which the minimal possible SEL_ARG graph
153  representation will have combinatorial size.
154  Example:
155  By induction: Let's take any interval on some keypart in the middle:
156 
157  kp15=c0
158 
159  Then let's AND it with this interval 'structure' from preceding and
160  following keyparts:
161 
162  (kp14=c1 AND kp16=c3) OR keypart14=c2) (*)
163 
164  We will obtain this SEL_ARG graph:
165 
166  kp14 $ kp15 $ kp16
167  $ $
168  +---------+ $ +---------+ $ +---------+
169  | kp14=c1 |--$-->| kp15=c0 |--$-->| kp16=c3 |
170  +---------+ $ +---------+ $ +---------+
171  | $ $
172  +---------+ $ +---------+ $
173  | kp14=c2 |--$-->| kp15=c0 | $
174  +---------+ $ +---------+ $
175  $ $
176 
177  Note that we had to duplicate "kp15=c0" and there was no way to avoid
178  that.
179  The induction step: AND the obtained expression with another "wrapping"
180  expression like (*).
181  When the process ends because of the limit on max. number of keyparts
182  we'll have:
183 
184  WHERE clause length is O(3*#max_keyparts)
185  SEL_ARG graph size is O(2^(#max_keyparts/2))
186 
187  (it is also possible to construct a case where instead of 2 in 2^n we
188  have a bigger constant, e.g. 4, and get a graph with 4^(31/2)= 2^31
189  nodes)
190 
191  We avoid consuming too much memory by setting a limit on the number of
192  SEL_ARG object we can construct during one range analysis invocation.
193 */
194 
196 {
197 public:
198  uint8_t min_flag,max_flag,maybe_flag;
199  uint8_t part; // Which key part
200  uint8_t maybe_null;
201  /*
202  Number of children of this element in the RB-tree, plus 1 for this
203  element itself.
204  */
205  uint16_t elements;
206  /*
207  Valid only for elements which are RB-tree roots: Number of times this
208  RB-tree is referred to (it is referred by SEL_ARG::next_key_part or by
209  SEL_TREE::keys[i] or by a temporary SEL_ARG* variable)
210  */
211  ulong use_count;
212 
213  Field *field;
214  unsigned char *min_value,*max_value; // Pointer to range
215 
216  /*
217  eq_tree() requires that left == right == 0 if the type is MAYBE_KEY.
218  */
219  SEL_ARG *left,*right; /* R-B tree children */
220  SEL_ARG *next,*prev; /* Links for bi-directional interval list */
221  SEL_ARG *parent; /* R-B tree parent */
222  SEL_ARG *next_key_part;
223  enum leaf_color { BLACK,RED } color;
224  enum Type { IMPOSSIBLE, MAYBE, MAYBE_KEY, KEY_RANGE } type;
225 
226  enum
227  {
228  MAX_SEL_ARGS = 16000
229  };
230 
231  SEL_ARG() {}
232 
233  SEL_ARG(SEL_ARG &);
234 
235  SEL_ARG(Field *,const unsigned char *, const unsigned char *);
236 
237  SEL_ARG(Field *field,
238  uint8_t part,
239  unsigned char *min_value,
240  unsigned char *max_value,
241  uint8_t min_flag,
242  uint8_t max_flag,
243  uint8_t maybe_flag);
244 
245  SEL_ARG(enum Type type_arg)
246  :
247  min_flag(0),
248  elements(1),
249  use_count(1),
250  left(0),
251  right(0),
252  next_key_part(0),
253  color(BLACK),
254  type(type_arg)
255  {}
256 
257  int size() const
258  {
259  return elements;
260  }
261 
262  inline bool is_same(SEL_ARG *arg)
263  {
264  if (type != arg->type || part != arg->part)
265  return 0;
266  if (type != KEY_RANGE)
267  return 1;
268  return (cmp_min_to_min(arg) == 0 && cmp_max_to_max(arg) == 0);
269  }
270 
271  inline void merge_flags(SEL_ARG *arg)
272  {
273  maybe_flag|= arg->maybe_flag;
274  }
275 
276  inline void maybe_smaller()
277  {
278  maybe_flag= 1;
279  }
280 
281  /* Return true iff it's a single-point null interval */
282  inline bool is_null_interval()
283  {
284  return (maybe_null && max_value[0] == 1);
285  }
286 
287  inline int cmp_min_to_min(SEL_ARG *arg)
288  {
289  return sel_cmp(field,min_value, arg->min_value, min_flag, arg->min_flag);
290  }
291 
292  inline int cmp_min_to_max(SEL_ARG *arg)
293  {
294  return sel_cmp(field,min_value, arg->max_value, min_flag, arg->max_flag);
295  }
296 
297  inline int cmp_max_to_max(SEL_ARG *arg)
298  {
299  return sel_cmp(field,max_value, arg->max_value, max_flag, arg->max_flag);
300  }
301 
302  inline int cmp_max_to_min(SEL_ARG *arg)
303  {
304  return sel_cmp(field,max_value, arg->min_value, max_flag, arg->min_flag);
305  }
306 
307  SEL_ARG *clone_and(SEL_ARG *arg);
308 
309  SEL_ARG *clone_first(SEL_ARG *arg);
310 
311  SEL_ARG *clone_last(SEL_ARG *arg);
312 
313  SEL_ARG *clone(RangeParameter *param, SEL_ARG *new_parent, SEL_ARG **next);
314 
315  bool copy_min(SEL_ARG *arg);
316 
317  bool copy_max(SEL_ARG *arg);
318 
319  void copy_min_to_min(SEL_ARG *arg);
320 
321  void copy_min_to_max(SEL_ARG *arg);
322 
323  void copy_max_to_min(SEL_ARG *arg);
324 
325  /* returns a number of keypart values (0 or 1) appended to the key buffer */
326  int store_min(uint32_t length, unsigned char **min_key, uint32_t min_key_flag);
327 
328  /* returns a number of keypart values (0 or 1) appended to the key buffer */
329  int store_max(uint32_t length, unsigned char **max_key, uint32_t max_key_flag);
330 
331  /* returns a number of keypart values appended to the key buffer */
332  int store_min_key(KEY_PART *key, unsigned char **range_key, uint32_t *range_key_flag);
333 
334  /* returns a number of keypart values appended to the key buffer */
335  int store_max_key(KEY_PART *key, unsigned char **range_key, uint32_t *range_key_flag);
336 
337  SEL_ARG *insert(SEL_ARG *key);
338  SEL_ARG *tree_delete(SEL_ARG *key);
339  SEL_ARG *find_range(SEL_ARG *key);
340  SEL_ARG *rb_insert(SEL_ARG *leaf);
341 
342  friend SEL_ARG *rb_delete_fixup(SEL_ARG *root,SEL_ARG *key, SEL_ARG *par);
343 
344  SEL_ARG *first();
345 
346  SEL_ARG *last();
347 
348  void make_root();
349 
350  inline bool simple_key()
351  {
352  return (! next_key_part && elements == 1);
353  }
354 
355  void increment_use_count(long count)
356  {
357  if (next_key_part)
358  {
359  next_key_part->use_count+= count;
360  count*= (next_key_part->use_count - count);
361  for (SEL_ARG *pos= next_key_part->first(); pos; pos= pos->next)
362  if (pos->next_key_part)
363  pos->increment_use_count(count);
364  }
365  }
366 
367  void free_tree()
368  {
369  for (SEL_ARG *pos= first(); pos; pos= pos->next)
370  if (pos->next_key_part)
371  {
372  pos->next_key_part->use_count--;
373  pos->next_key_part->free_tree();
374  }
375  }
376 
377  inline SEL_ARG **parent_ptr()
378  {
379  return parent->left == this ? &parent->left : &parent->right;
380  }
381 
382 
383  /*
384  Check if this SEL_ARG object represents a single-point interval
385 
386  SYNOPSIS
387  is_singlepoint()
388 
389  DESCRIPTION
390  Check if this SEL_ARG object (not tree) represents a single-point
391  interval, i.e. if it represents a "keypart = const" or
392  "keypart IS NULL".
393 
394  RETURN
395  true This SEL_ARG object represents a singlepoint interval
396  false Otherwise
397  */
398 
399  bool is_singlepoint()
400  {
401  /*
402  Check for NEAR_MIN ("strictly less") and NO_MIN_RANGE (-inf < field)
403  flags, and the same for right edge.
404  */
405  if (min_flag || max_flag)
406  return false;
407  unsigned char *min_val= min_value;
408  unsigned char *max_val= max_value;
409 
410  if (maybe_null)
411  {
412  /* First byte is a NULL value indicator */
413  if (*min_val != *max_val)
414  return false;
415 
416  if (*min_val)
417  return true; /* This "x IS NULL" */
418  min_val++;
419  max_val++;
420  }
421  return ! field->key_cmp(min_val, max_val);
422  }
423 
424  SEL_ARG *clone_tree(RangeParameter *param);
425 
426 private:
427 
428  /*
429  Check if a compare is ok, when one takes ranges in account
430  Returns -2 or 2 if the ranges where 'joined' like < 2 and >= 2
431  */
432  int sel_cmp(Field *in_field,
433  unsigned char *a,
434  unsigned char *b,
435  uint8_t a_flag,
436  uint8_t b_flag)
437  {
438  int cmp= 0;
439  /* First check if there was a compare to a min or max element */
440  if (a_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
441  {
442  if ((a_flag & (NO_MIN_RANGE | NO_MAX_RANGE)) ==
443  (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE)))
444  return 0;
445  return (a_flag & NO_MIN_RANGE) ? -1 : 1;
446  }
447  if (b_flag & (NO_MIN_RANGE | NO_MAX_RANGE))
448  return (b_flag & NO_MIN_RANGE) ? 1 : -1;
449 
450  if (in_field->real_maybe_null()) // If null is part of key
451  {
452  if (*a != *b)
453  {
454  return *a ? -1 : 1;
455  }
456  if (*a)
457  goto end; // NULL where equal
458  a++; b++; // Skip NULL marker
459  }
460  cmp= in_field->key_cmp(a , b);
461  if (cmp) return cmp < 0 ? -1 : 1; // The values differed
462 
463  // Check if the compared equal arguments was defined with open/closed range
464 end:
465  if (a_flag & (NEAR_MIN | NEAR_MAX))
466  {
467  if ((a_flag & (NEAR_MIN | NEAR_MAX)) == (b_flag & (NEAR_MIN | NEAR_MAX)))
468  return 0;
469  if (! (b_flag & (NEAR_MIN | NEAR_MAX)))
470  return (a_flag & NEAR_MIN) ? 2 : -2;
471  return (a_flag & NEAR_MIN) ? 1 : -1;
472  }
473  if (b_flag & (NEAR_MIN | NEAR_MAX))
474  return (b_flag & NEAR_MIN) ? -2 : 2;
475  return 0; // The elements where equal
476  }
477 
478 
479 };
480 
481 SEL_ARG *rb_delete_fixup(SEL_ARG *root,
482  SEL_ARG *key,
483  SEL_ARG *par);
484 
485 extern SEL_ARG null_element;
486 
487 } /* namespace optimizer */
488 
489 } /* namespace drizzled */
490 
TODO: Rename this file - func.h is stupid.