Drizzled Public API Documentation

temporal_format.cc
Go to the documentation of this file.
1 /* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
2  * vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
3  *
4  * Copyright (C) 2008 Sun Microsystems, Inc.
5  *
6  * Authors:
7  *
8  * Jay Pipes <jay.pipes@sun.com>
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
31 #include <config.h>
32 
33 #include <boost/foreach.hpp>
35 #include <drizzled/temporal.h>
36 
37 #include <string.h>
38 #include PCRE_HEADER
39 
40 #include <string>
41 #include <vector>
42 
43 using namespace std;
44 
45 namespace drizzled {
46 
47 TemporalFormat::TemporalFormat(const char *pattern) :
48  _pattern(pattern)
49 , _error_offset(0)
50 , _error(NULL)
51 , _year_part_index(0)
52 , _month_part_index(0)
53 , _day_part_index(0)
54 , _hour_part_index(0)
55 , _minute_part_index(0)
56 , _second_part_index(0)
57 , _usecond_part_index(0)
58 , _nsecond_part_index(0)
59 {
60  /* Compile our regular expression */
61  _re= pcre_compile(pattern
62  , 0 /* Default options */
63  , &_error
64  , &_error_offset
65  , NULL /* Use default character table */
66  );
67 }
68 
69 TemporalFormat::~TemporalFormat()
70 {
71  pcre_free(_re);
72 }
73 
74 bool TemporalFormat::matches(const char *data, size_t data_len, Temporal *to)
75 {
76  if (! is_valid())
77  return false;
78 
79  int32_t match_vector[OUT_VECTOR_SIZE];
81  /* Make sure we've got no junk in the match_vector. */
82  memset(match_vector, 0, sizeof(match_vector));
83 
84  /* Simply check the subject against the compiled regular expression */
85  int32_t result= pcre_exec(_re
86  , NULL /* No extra data */
87  , data
88  , data_len
89  , 0 /* Start at offset 0 of subject...*/
90  , 0 /* Default options */
91  , match_vector
92  , OUT_VECTOR_SIZE
93  );
94  if (result < 0)
95  {
96  switch (result)
97  {
98  case PCRE_ERROR_NOMATCH:
99  return false; /* No match, just return false */
100  default:
101  return false;
102  }
103  return false;
104  }
105 
106  int32_t expected_match_count= (_year_part_index > 1 ? 1 : 0)
107  + (_month_part_index > 1 ? 1 : 0)
108  + (_day_part_index > 1 ? 1 : 0)
109  + (_hour_part_index > 1 ? 1 : 0)
110  + (_minute_part_index > 1 ? 1 : 0)
111  + (_second_part_index > 1 ? 1 : 0)
112  + (_usecond_part_index > 1 ? 1 : 0)
113  + (_nsecond_part_index > 1 ? 1 : 0)
114  + 1; /* Add one for the entire match... */
115  if (result != expected_match_count)
116  return false;
117 
118  /* C++ string class easy to use substr() method is very useful here */
119  string copy_data(data, data_len);
120  /*
121  * OK, we have the expected substring matches, so grab
122  * the various temporal parts from the subject string
123  *
124  * @note
125  *
126  * TemporalFormatMatch is a friend class to Temporal, so
127  * we can access the temporal instance's protected data.
128  */
129  if (_year_part_index > 1)
130  {
131  size_t year_start= match_vector[_year_part_index];
132  size_t year_len= match_vector[_year_part_index + 1] - match_vector[_year_part_index];
133  to->_years= atoi(copy_data.substr(year_start, year_len).c_str());
134  if (year_len == 2)
135  to->_years+= (to->_years >= DRIZZLE_YY_PART_YEAR ? 1900 : 2000);
136  }
137  if (_month_part_index > 1)
138  {
139  size_t month_start= match_vector[_month_part_index];
140  size_t month_len= match_vector[_month_part_index + 1] - match_vector[_month_part_index];
141  to->_months= atoi(copy_data.substr(month_start, month_len).c_str());
142  }
143  if (_day_part_index > 1)
144  {
145  size_t day_start= match_vector[_day_part_index];
146  size_t day_len= match_vector[_day_part_index + 1] - match_vector[_day_part_index];
147  to->_days= atoi(copy_data.substr(day_start, day_len).c_str());
148  }
149  if (_hour_part_index > 1)
150  {
151  size_t hour_start= match_vector[_hour_part_index];
152  size_t hour_len= match_vector[_hour_part_index + 1] - match_vector[_hour_part_index];
153  to->_hours= atoi(copy_data.substr(hour_start, hour_len).c_str());
154  }
155  if (_minute_part_index > 1)
156  {
157  size_t minute_start= match_vector[_minute_part_index];
158  size_t minute_len= match_vector[_minute_part_index + 1] - match_vector[_minute_part_index];
159  to->_minutes= atoi(copy_data.substr(minute_start, minute_len).c_str());
160  }
161  if (_second_part_index > 1)
162  {
163  size_t second_start= match_vector[_second_part_index];
164  size_t second_len= match_vector[_second_part_index + 1] - match_vector[_second_part_index];
165  to->_seconds= atoi(copy_data.substr(second_start, second_len).c_str());
166  }
167  if (_usecond_part_index > 1)
168  {
169  size_t usecond_start= match_vector[_usecond_part_index];
170  size_t usecond_len= match_vector[_usecond_part_index + 1] - match_vector[_usecond_part_index];
171  /*
172  * For microseconds, which are millionth of 1 second,
173  * we must ensure that we produce a correct result,
174  * even if < 6 places were specified. For instance, if we get .1,
175  * we must produce 100000. .11 should produce 110000, etc.
176  */
177  uint32_t multiplier= 1;
178  int32_t x= usecond_len;
179  while (x < 6)
180  {
181  multiplier*= 10;
182  ++x;
183  }
184  to->_useconds= atoi(copy_data.substr(usecond_start, usecond_len).c_str()) * multiplier;
185  }
186  if (_nsecond_part_index > 1)
187  {
188  size_t nsecond_start= match_vector[_nsecond_part_index];
189  size_t nsecond_len= match_vector[_nsecond_part_index + 1] - match_vector[_nsecond_part_index];
190  /*
191  * For nanoseconds, which are 1 billionth of a second,
192  * we must ensure that we produce a correct result,
193  * even if < 9 places were specified. For instance, if we get .1,
194  * we must produce 100000000. .11 should produce 110000000, etc.
195  */
196  uint32_t multiplier= 1;
197  int32_t x= nsecond_len;
198  while (x < 9)
199  {
200  multiplier*= 10;
201  ++x;
202  }
203  to->_nseconds= atoi(copy_data.substr(nsecond_start, nsecond_len).c_str()) * multiplier;
204  }
205  return true;
206 }
207 
208 
209 #define COUNT_KNOWN_FORMATS 19
210 
212 {
213  const char *pattern;
214  int32_t year_part_index;
215  int32_t month_part_index;
216  int32_t day_part_index;
217  int32_t hour_part_index;
218  int32_t minute_part_index;
219  int32_t second_part_index;
220  int32_t usecond_part_index;
221  int32_t nsecond_part_index;
222 };
223 
236 static struct temporal_format_args __format_args[COUNT_KNOWN_FORMATS]=
237 {
238  {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYYMMDDHHmmSS.uuuuuu */
239 , {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYYMMDDHHmmSS */
240 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYY[/-.]MM[/-.]DD[T]HH:mm:SS.uuuuuu */
241 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYY[/-.][M]M[/-.][D]D[T]HH:mm:SS */
242 , {"^(\\d{2})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YY[/-.][M]M[/-.][D]D HH:mm:SS */
243 , {"^(\\d{2})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 0, 0, 0} /* YY[/-.][M]M[/-.][D]D HH:mm */
244 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 0, 0, 0} /* YYYY[/-.][M]M[/-.][D]D HH:mm */
245 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY-[M]M-[D]D, YYYY.[M]M.[D]D, YYYY/[M]M/[D]D */
246 , {"^(\\d{4})(\\d{2})(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYYMMDD */
247 , {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{4})$", 3, 1, 2, 0, 0, 0, 0, 0} /* MM[-/.]DD[-/.]YYYY (US common format)*/
248 , {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.]MM[-/.]DD */
249 , {"^(\\d{2})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.][M]M[-/.][D]D */
250 , {"^(\\d{4})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY[-/.][M]M[-/.][D]D */
251 , {"^(\\d{2}):*(\\d{2}):*(\\d{2})\\.(\\d{1,6})$", 0, 0, 0, 1, 2, 3, 4, 0} /* HHmmSS.uuuuuu, HH:mm:SS.uuuuuu */
252 , {"^(\\d{1,2}):*(\\d{2}):*(\\d{2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]HmmSS, [H]H:mm:SS */
253 , {"^(\\d{1,2}):(\\d{1,2}):(\\d{1,2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]H:[m]m:[S]S */
254 , {"^(\\d{1,2}):*(\\d{2})$", 0, 0, 0, 0, 1, 2, 0, 0} /* [m]mSS, [m]m:SS */
255 , {"^(\\d{1,2})$", 0, 0, 0, 0, 0, 1, 0, 0} /* SS, S */
256 , {"^(\\d{1,2})\\.(\\d{1,6})$", 0, 0, 0, 0, 0, 1, 2, 0} /* [S]S.uuuuuu */
257 };
258 
259 vector<TemporalFormat *> known_datetime_formats;
260 vector<TemporalFormat *> known_date_formats;
261 vector<TemporalFormat *> known_time_formats;
262 vector<TemporalFormat *> all_temporal_formats;
263 
270 {
271  /* Compile all the regular expressions for the datetime formats */
272  TemporalFormat *tmp;
273  struct temporal_format_args current_format_args;
274 
275  for (int32_t x= 0; x < COUNT_KNOWN_FORMATS; ++x)
276  {
277  current_format_args= __format_args[x];
278  tmp= new TemporalFormat(current_format_args.pattern);
279  tmp->set_year_part_index(current_format_args.year_part_index);
280  tmp->set_month_part_index(current_format_args.month_part_index);
281  tmp->set_day_part_index(current_format_args.day_part_index);
282  tmp->set_hour_part_index(current_format_args.hour_part_index);
283  tmp->set_minute_part_index(current_format_args.minute_part_index);
284  tmp->set_second_part_index(current_format_args.second_part_index);
285  tmp->set_usecond_part_index(current_format_args.usecond_part_index);
286  tmp->set_nsecond_part_index(current_format_args.nsecond_part_index);
287 
288  /*
289  * We store the pointer in all_temporal_formats because we
290  * delete pointers from that vector and only that vector
291  */
292  all_temporal_formats.push_back(tmp);
293 
294  if (current_format_args.year_part_index > 0) /* A date must have a year */
295  {
296  known_datetime_formats.push_back(tmp);
297  if (current_format_args.second_part_index == 0) /* A time must have seconds. */
298  known_date_formats.push_back(tmp);
299  }
300 
301  if (current_format_args.second_part_index > 0) /* A time must have seconds, but may not have minutes or hours */
302  known_time_formats.push_back(tmp);
303  }
304  return true;
305 }
306 
309 {
310  BOOST_FOREACH(TemporalFormat* it, all_temporal_formats)
311  delete it;
312  known_date_formats.clear();
313  known_datetime_formats.clear();
314  known_time_formats.clear();
315  all_temporal_formats.clear();
316 }
317 
318 } /* end namespace drizzled */
bool matches(const char *data, size_t data_len, Temporal *to)
void set_second_part_index(int32_t index)
TODO: Rename this file - func.h is stupid.
void set_hour_part_index(int32_t index)
void set_minute_part_index(int32_t index)
void set_nsecond_part_index(int32_t index)
void set_year_part_index(int32_t index)
static struct temporal_format_args __format_args[COUNT_KNOWN_FORMATS]
void set_month_part_index(int32_t index)
bool init_temporal_formats()
void set_day_part_index(int32_t index)
void set_usecond_part_index(int32_t index)
void deinit_temporal_formats()