SphinxBase  5prealpha
lm_trie.c
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /* ====================================================================
3  * Copyright (c) 2015 Carnegie Mellon University. All rights
4  * reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright
11  * notice, this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright
14  * notice, this list of conditions and the following disclaimer in
15  * the documentation and/or other materials provided with the
16  * distribution.
17  *
18  * This work was supported in part by funding from the Defense Advanced
19  * Research Projects Agency and the National Science Foundation of the
20  * United States of America, and the CMU Sphinx Speech Consortium.
21  *
22  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
23  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
24  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
26  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
28  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
29  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
30  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
31  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
32  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33  *
34  * ====================================================================
35  *
36  */
37 
38 #include <string.h>
39 #include <stdio.h>
40 
41 #include <sphinxbase/prim_type.h>
42 #include <sphinxbase/ckd_alloc.h>
43 #include <sphinxbase/err.h>
44 #include <sphinxbase/priority_queue.h>
45 
46 #include "lm_trie.h"
47 #include "lm_trie_quant.h"
48 
49 static uint32
50 base_size(uint32 entries, uint32 max_vocab, uint8 remaining_bits)
51 {
52  uint8 total_bits = bitarr_required_bits(max_vocab) + remaining_bits;
53  // Extra entry for next pointer at the end.
54  // +7 then / 8 to round up bits and convert to bytes
55  // +sizeof(uint64) so that ReadInt57 etc don't go segfault.
56  // Note that this waste is O(order), not O(number of ngrams).
57  return ((1 + entries) * total_bits + 7) / 8 + sizeof(uint64);
58 }
59 
60 uint32
61 middle_size(uint8 quant_bits, uint32 entries, uint32 max_vocab,
62  uint32 max_ptr)
63 {
64  return base_size(entries, max_vocab,
65  quant_bits + bitarr_required_bits(max_ptr));
66 }
67 
68 uint32
69 longest_size(uint8 quant_bits, uint32 entries, uint32 max_vocab)
70 {
71  return base_size(entries, max_vocab, quant_bits);
72 }
73 
74 static void
75 base_init(base_t * base, void *base_mem, uint32 max_vocab,
76  uint8 remaining_bits)
77 {
78  base->word_bits = bitarr_required_bits(max_vocab);
79  base->word_mask = (1U << base->word_bits) - 1U;
80  if (base->word_bits > 25)
81  E_ERROR
82  ("Sorry, word indices more than %d are not implemented. Edit util/bit_packing.hh and fix the bit packing functions\n",
83  (1U << 25));
84  base->total_bits = base->word_bits + remaining_bits;
85 
86  base->base = (uint8 *) base_mem;
87  base->insert_index = 0;
88  base->max_vocab = max_vocab;
89 }
90 
91 void
92 middle_init(middle_t * middle, void *base_mem, uint8 quant_bits,
93  uint32 entries, uint32 max_vocab, uint32 max_next,
94  void *next_source)
95 {
96  middle->quant_bits = quant_bits;
97  bitarr_mask_from_max(&middle->next_mask, max_next);
98  middle->next_source = next_source;
99  if (entries + 1 >= (1U << 25) || (max_next >= (1U << 25)))
100  E_ERROR
101  ("Sorry, this does not support more than %d n-grams of a particular order. Edit util/bit_packing.hh and fix the bit packing functions\n",
102  (1U << 25));
103  base_init(&middle->base, base_mem, max_vocab,
104  quant_bits + middle->next_mask.bits);
105 }
106 
107 void
108 longest_init(longest_t * longest, void *base_mem, uint8 quant_bits,
109  uint32 max_vocab)
110 {
111  base_init(&longest->base, base_mem, max_vocab, quant_bits);
112 }
113 
114 static bitarr_address_t
115 middle_insert(middle_t * middle, uint32 word, int order, int max_order)
116 {
117  uint32 at_pointer;
118  uint32 next;
119  bitarr_address_t address;
120  assert(word <= middle->base.word_mask);
121  address.base = middle->base.base;
122  address.offset = middle->base.insert_index * middle->base.total_bits;
123  bitarr_write_int25(address, middle->base.word_bits, word);
124  address.offset += middle->base.word_bits;
125  at_pointer = address.offset;
126  address.offset += middle->quant_bits;
127  if (order == max_order - 1) {
128  next = ((longest_t *) middle->next_source)->base.insert_index;
129  }
130  else {
131  next = ((middle_t *) middle->next_source)->base.insert_index;
132  }
133 
134  bitarr_write_int25(address, middle->next_mask.bits, next);
135  middle->base.insert_index++;
136  address.offset = at_pointer;
137  return address;
138 }
139 
140 static bitarr_address_t
141 longest_insert(longest_t * longest, uint32 index)
142 {
143  bitarr_address_t address;
144  assert(index <= longest->base.word_mask);
145  address.base = longest->base.base;
146  address.offset = longest->base.insert_index * longest->base.total_bits;
147  bitarr_write_int25(address, longest->base.word_bits, index);
148  address.offset += longest->base.word_bits;
149  longest->base.insert_index++;
150  return address;
151 }
152 
153 static void
154 middle_finish_loading(middle_t * middle, uint32 next_end)
155 {
156  bitarr_address_t address;
157  address.base = middle->base.base;
158  address.offset =
159  (middle->base.insert_index + 1) * middle->base.total_bits -
160  middle->next_mask.bits;
161  bitarr_write_int25(address, middle->next_mask.bits, next_end);
162 }
163 
164 static uint32
165 unigram_next(lm_trie_t * trie, int order)
166 {
167  return order ==
168  2 ? trie->longest->base.insert_index : trie->middle_begin->base.
169  insert_index;
170 }
171 
172 static void
173 recursive_insert(lm_trie_t * trie, ngram_raw_t ** raw_ngrams,
174  uint32 * counts, int order)
175 {
176  uint32 unigram_idx = 0;
177  uint32 *words;
178  float *probs;
179  const uint32 unigram_count = (uint32) counts[0];
180  priority_queue_t *ngrams =
181  priority_queue_create(order, &ngram_ord_comparator);
182  ngram_raw_ord_t *ngram;
183  uint32 *raw_ngrams_ptr;
184  int i;
185 
186  words = (uint32 *) ckd_calloc(order, sizeof(*words)); //for blanks catching
187  probs = (float *) ckd_calloc(order - 1, sizeof(*probs)); //for blanks prob generating
188  ngram = (ngram_raw_ord_t *) ckd_calloc(1, sizeof(*ngram));
189  ngram->order = 1;
190  ngram->instance.words = &unigram_idx;
191  priority_queue_add(ngrams, ngram);
192  raw_ngrams_ptr =
193  (uint32 *) ckd_calloc(order - 1, sizeof(*raw_ngrams_ptr));
194  for (i = 2; i <= order; ++i) {
195  ngram_raw_ord_t *tmp_ngram;
196 
197  if (counts[i - 1] <= 0)
198  continue;
199  tmp_ngram =
200  (ngram_raw_ord_t *) ckd_calloc(1, sizeof(*tmp_ngram));
201  tmp_ngram->order = i;
202  raw_ngrams_ptr[i - 2] = 0;
203  tmp_ngram->instance = raw_ngrams[i - 2][0];
204  priority_queue_add(ngrams, tmp_ngram);
205  }
206 
207  for (;;) {
208  ngram_raw_ord_t *top =
209  (ngram_raw_ord_t *) priority_queue_poll(ngrams);
210  if (top->order == 1) {
211  trie->unigrams[unigram_idx].next = unigram_next(trie, order);
212  words[0] = unigram_idx;
213  probs[0] = trie->unigrams[unigram_idx].prob;
214  if (++unigram_idx == unigram_count + 1) {
215  ckd_free(top);
216  break;
217  }
218  priority_queue_add(ngrams, top);
219  }
220  else {
221  for (i = 0; i < top->order - 1; i++) {
222  if (words[i] != top->instance.words[i]) {
223  //need to insert dummy suffixes to make ngram of higher order reachable
224  int j;
225  assert(i > 0); //unigrams are not pruned without removing ngrams that contains them
226  for (j = i; j < top->order - 1; j++) {
227  middle_t *middle = &trie->middle_begin[j - 1];
228  bitarr_address_t address =
229  middle_insert(middle, top->instance.words[j],
230  j + 1, order);
231  //calculate prob for blank
232  float calc_prob =
233  probs[j - 1] +
234  trie->unigrams[top->instance.words[j]].bo;
235  probs[j] = calc_prob;
236  lm_trie_quant_mwrite(trie->quant, address, j - 1,
237  calc_prob, 0.0f);
238  }
239  }
240  }
241  memcpy(words, top->instance.words,
242  top->order * sizeof(*words));
243  if (top->order == order) {
244  float *weights = top->instance.weights;
245  bitarr_address_t address =
246  longest_insert(trie->longest,
247  top->instance.words[top->order - 1]);
248  lm_trie_quant_lwrite(trie->quant, address, weights[0]);
249  }
250  else {
251  float *weights = top->instance.weights;
252  middle_t *middle = &trie->middle_begin[top->order - 2];
253  bitarr_address_t address =
254  middle_insert(middle,
255  top->instance.words[top->order - 1],
256  top->order, order);
257  //write prob and backoff
258  probs[top->order - 1] = weights[0];
259  lm_trie_quant_mwrite(trie->quant, address, top->order - 2,
260  weights[0], weights[1]);
261  }
262  raw_ngrams_ptr[top->order - 2]++;
263  if (raw_ngrams_ptr[top->order - 2] < counts[top->order - 1]) {
264  top->instance =
265  raw_ngrams[top->order -
266  2][raw_ngrams_ptr[top->order - 2]];
267  priority_queue_add(ngrams, top);
268  }
269  else {
270  ckd_free(top);
271  }
272  }
273  }
274  assert(priority_queue_size(ngrams) == 0);
275  priority_queue_free(ngrams, NULL);
276  ckd_free(raw_ngrams_ptr);
277  ckd_free(words);
278  ckd_free(probs);
279 }
280 
281 static lm_trie_t *
282 lm_trie_init(uint32 unigram_count)
283 {
284  lm_trie_t *trie;
285 
286  trie = (lm_trie_t *) ckd_calloc(1, sizeof(*trie));
287  memset(trie->prev_hist, -1, sizeof(trie->prev_hist)); //prepare request history
288  memset(trie->backoff, 0, sizeof(trie->backoff));
289  trie->unigrams =
290  (unigram_t *) ckd_calloc((unigram_count + 1),
291  sizeof(*trie->unigrams));
292  trie->ngram_mem = NULL;
293  return trie;
294 }
295 
296 lm_trie_t *
297 lm_trie_create(uint32 unigram_count, lm_trie_quant_type_t quant_type,
298  int order)
299 {
300  lm_trie_t *trie = lm_trie_init(unigram_count);
301  trie->quant =
302  (order > 1) ? lm_trie_quant_create(quant_type, order) : 0;
303  return trie;
304 }
305 
306 lm_trie_t *
307 lm_trie_read_bin(uint32 * counts, int order, FILE * fp)
308 {
309  lm_trie_t *trie = lm_trie_init(counts[0]);
310  trie->quant = (order > 1) ? lm_trie_quant_read_bin(fp, order) : NULL;
311  fread(trie->unigrams, sizeof(*trie->unigrams), (counts[0] + 1), fp);
312  if (order > 1) {
313  lm_trie_alloc_ngram(trie, counts, order);
314  fread(trie->ngram_mem, 1, trie->ngram_mem_size, fp);
315  }
316  return trie;
317 }
318 
319 void
320 lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp)
321 {
322 
323  if (trie->quant)
324  lm_trie_quant_write_bin(trie->quant, fp);
325  fwrite(trie->unigrams, sizeof(*trie->unigrams), (unigram_count + 1),
326  fp);
327  if (trie->ngram_mem)
328  fwrite(trie->ngram_mem, 1, trie->ngram_mem_size, fp);
329 }
330 
331 void
332 lm_trie_free(lm_trie_t * trie)
333 {
334  if (trie->ngram_mem) {
335  ckd_free(trie->ngram_mem);
336  ckd_free(trie->middle_begin);
337  ckd_free(trie->longest);
338  }
339  if (trie->quant)
340  lm_trie_quant_free(trie->quant);
341  ckd_free(trie->unigrams);
342  ckd_free(trie);
343 }
344 
345 void
346 lm_trie_alloc_ngram(lm_trie_t * trie, uint32 * counts, int order)
347 {
348  int i;
349  uint8 *mem_ptr;
350  uint8 **middle_starts;
351 
352  trie->ngram_mem_size = 0;
353  for (i = 1; i < order - 1; i++) {
354  trie->ngram_mem_size +=
355  middle_size(lm_trie_quant_msize(trie->quant), counts[i],
356  counts[0], counts[i + 1]);
357  }
358  trie->ngram_mem_size +=
359  longest_size(lm_trie_quant_lsize(trie->quant), counts[order - 1],
360  counts[0]);
361  trie->ngram_mem =
362  (uint8 *) ckd_calloc(trie->ngram_mem_size,
363  sizeof(*trie->ngram_mem));
364  mem_ptr = trie->ngram_mem;
365  trie->middle_begin =
366  (middle_t *) ckd_calloc(order - 2, sizeof(*trie->middle_begin));
367  trie->middle_end = trie->middle_begin + (order - 2);
368  middle_starts =
369  (uint8 **) ckd_calloc(order - 2, sizeof(*middle_starts));
370  for (i = 2; i < order; i++) {
371  middle_starts[i - 2] = mem_ptr;
372  mem_ptr +=
373  middle_size(lm_trie_quant_msize(trie->quant), counts[i - 1],
374  counts[0], counts[i]);
375  }
376  trie->longest = (longest_t *) ckd_calloc(1, sizeof(*trie->longest));
377  // Crazy backwards thing so we initialize using pointers to ones that have already been initialized
378  for (i = order - 1; i >= 2; --i) {
379  middle_t *middle_ptr = &trie->middle_begin[i - 2];
380  middle_init(middle_ptr, middle_starts[i - 2],
381  lm_trie_quant_msize(trie->quant), counts[i - 1],
382  counts[0], counts[i],
383  (i ==
384  order -
385  1) ? (void *) trie->longest : (void *) &trie->
386  middle_begin[i - 1]);
387  }
388  ckd_free(middle_starts);
389  longest_init(trie->longest, mem_ptr, lm_trie_quant_lsize(trie->quant),
390  counts[0]);
391 }
392 
393 void
394 lm_trie_build(lm_trie_t * trie, ngram_raw_t ** raw_ngrams, uint32 * counts,
395  int order)
396 {
397  int i;
398  if (lm_trie_quant_to_train(trie->quant)) {
399  E_INFO("Training quantizer\n");
400  for (i = 2; i < order; i++) {
401  lm_trie_quant_train(trie->quant, i, counts[i - 1],
402  raw_ngrams[i - 2]);
403  }
404  lm_trie_quant_train_prob(trie->quant, order, counts[order - 1],
405  raw_ngrams[order - 2]);
406  }
407  E_INFO("Building LM trie\n");
408  recursive_insert(trie, raw_ngrams, counts, order);
409  /* Set ending offsets so the last entry will be sized properly */
410  // Last entry for unigrams was already set.
411  if (trie->middle_begin != trie->middle_end) {
412  middle_t *middle_ptr;
413  for (middle_ptr = trie->middle_begin;
414  middle_ptr != trie->middle_end - 1; ++middle_ptr) {
415  middle_t *next_middle_ptr = middle_ptr + 1;
416  middle_finish_loading(middle_ptr,
417  next_middle_ptr->base.insert_index);
418  }
419  middle_ptr = trie->middle_end - 1;
420  middle_finish_loading(middle_ptr,
421  trie->longest->base.insert_index);
422  }
423 }
424 
425 unigram_t *
426 unigram_find(unigram_t * u, uint32 word, node_range_t * next)
427 {
428  unigram_t *ptr = &u[word];
429  next->begin = ptr->next;
430  next->end = (ptr + 1)->next;
431  return ptr;
432 }
433 
434 static size_t
435 calc_pivot(uint32 off, uint32 range, uint32 width)
436 {
437  return (size_t) ((off * width) / (range + 1));
438 }
439 
440 static uint8
441 uniform_find(void *base, uint8 total_bits, uint8 key_bits, uint32 key_mask,
442  uint32 before_it, uint32 before_v,
443  uint32 after_it, uint32 after_v, uint32 key, uint32 * out)
444 {
445  bitarr_address_t address;
446  address.base = base;
447  while (after_it - before_it > 1) {
448  uint32 mid;
449  uint32 pivot =
450  before_it + (1 +
451  calc_pivot(key - before_v, after_v - before_v,
452  after_it - before_it - 1));
453  //access by pivot
454  address.offset = pivot * (uint32) total_bits;
455  mid = bitarr_read_int25(address, key_bits, key_mask);
456  if (mid < key) {
457  before_it = pivot;
458  before_v = mid;
459  }
460  else if (mid > key) {
461  after_it = pivot;
462  after_v = mid;
463  }
464  else {
465  *out = pivot;
466  return TRUE;
467  }
468  }
469  return FALSE;
470 }
471 
472 static bitarr_address_t
473 middle_find(middle_t * middle, uint32 word, node_range_t * range)
474 {
475  uint32 at_pointer;
476  bitarr_address_t address;
477 
478  //finding BitPacked with uniform find
479  if (!uniform_find
480  ((void *) middle->base.base, middle->base.total_bits,
481  middle->base.word_bits, middle->base.word_mask, range->begin - 1,
482  0, range->end, middle->base.max_vocab, word, &at_pointer)) {
483  address.base = NULL;
484  address.offset = 0;
485  return address;
486  }
487 
488  address.base = middle->base.base;
489  at_pointer *= middle->base.total_bits;
490  at_pointer += middle->base.word_bits;
491  address.offset = at_pointer + middle->quant_bits;
492  range->begin =
493  bitarr_read_int25(address, middle->next_mask.bits,
494  middle->next_mask.mask);
495  address.offset += middle->base.total_bits;
496  range->end =
497  bitarr_read_int25(address, middle->next_mask.bits,
498  middle->next_mask.mask);
499  address.offset = at_pointer;
500 
501  return address;
502 }
503 
504 static bitarr_address_t
505 longest_find(longest_t * longest, uint32 word, node_range_t * range)
506 {
507  uint32 at_pointer;
508  bitarr_address_t address;
509 
510  //finding BitPacked with uniform find
511  if (!uniform_find
512  ((void *) longest->base.base, longest->base.total_bits,
513  longest->base.word_bits, longest->base.word_mask,
514  range->begin - 1, 0, range->end, longest->base.max_vocab, word,
515  &at_pointer)) {
516  address.base = NULL;
517  address.offset = 0;
518  return address;
519  }
520  address.base = longest->base.base;
521  address.offset =
522  at_pointer * longest->base.total_bits + longest->base.word_bits;
523  return address;
524 }
525 
526 static float
527 get_available_prob(lm_trie_t * trie, int32 wid, int32 * hist,
528  int max_order, int32 n_hist, int32 * n_used)
529 {
530  float prob;
531  node_range_t node;
532  bitarr_address_t address;
533  int order_minus_2;
534  uint8 independent_left;
535  int32 *hist_iter, *hist_end;
536 
537  *n_used = 1;
538  prob = unigram_find(trie->unigrams, wid, &node)->prob;
539  if (n_hist == 0) {
540  return prob;
541  }
542 
543  //find ngrams of higher order if any
544  order_minus_2 = 0;
545  independent_left = (node.begin == node.end);
546  hist_iter = hist;
547  hist_end = hist + n_hist;
548  for (;; order_minus_2++, hist_iter++) {
549  if (hist_iter == hist_end)
550  return prob;
551  if (independent_left)
552  return prob;
553  if (order_minus_2 == max_order - 2)
554  break;
555 
556  address =
557  middle_find(&trie->middle_begin[order_minus_2], *hist_iter,
558  &node);
559  independent_left = (address.base == NULL)
560  || (node.begin == node.end);
561 
562  //didn't find entry
563  if (address.base == NULL)
564  return prob;
565  prob = lm_trie_quant_mpread(trie->quant, address, order_minus_2);
566  *n_used = order_minus_2 + 2;
567  }
568 
569  address = longest_find(trie->longest, *hist_iter, &node);
570  if (address.base != NULL) {
571  prob = lm_trie_quant_lpread(trie->quant, address);
572  *n_used = max_order;
573  }
574  return prob;
575 }
576 
577 static float
578 get_available_backoff(lm_trie_t * trie, int32 start, int32 * hist,
579  int32 n_hist)
580 {
581  float backoff = 0.0f;
582  int order_minus_2;
583  int32 *hist_iter;
584  node_range_t node;
585  unigram_t *first_hist = unigram_find(trie->unigrams, hist[0], &node);
586  if (start <= 1) {
587  backoff += first_hist->bo;
588  start = 2;
589  }
590  order_minus_2 = start - 2;
591  for (hist_iter = hist + start - 1; hist_iter < hist + n_hist;
592  hist_iter++, order_minus_2++) {
593  bitarr_address_t address =
594  middle_find(&trie->middle_begin[order_minus_2], *hist_iter,
595  &node);
596  if (address.base == NULL)
597  break;
598  backoff +=
599  lm_trie_quant_mboread(trie->quant, address, order_minus_2);
600  }
601  return backoff;
602 }
603 
604 static float
605 lm_trie_nobo_score(lm_trie_t * trie, int32 wid, int32 * hist,
606  int max_order, int32 n_hist, int32 * n_used)
607 {
608  float prob =
609  get_available_prob(trie, wid, hist, max_order, n_hist, n_used);
610  if (n_hist < *n_used)
611  return prob;
612  return prob + get_available_backoff(trie, *n_used, hist, n_hist);
613 }
614 
615 static float
616 lm_trie_hist_score(lm_trie_t * trie, int32 wid, int32 * hist, int32 n_hist,
617  int32 * n_used)
618 {
619  float prob;
620  int i, j;
621  node_range_t node;
622  bitarr_address_t address;
623 
624  *n_used = 1;
625  prob = unigram_find(trie->unigrams, wid, &node)->prob;
626  if (n_hist == 0)
627  return prob;
628  for (i = 0; i < n_hist - 1; i++) {
629  address = middle_find(&trie->middle_begin[i], hist[i], &node);
630  if (address.base == NULL) {
631  for (j = i; j < n_hist; j++) {
632  prob += trie->backoff[j];
633  }
634  return prob;
635  }
636  else {
637  (*n_used)++;
638  prob = lm_trie_quant_mpread(trie->quant, address, i);
639  }
640  }
641  address = longest_find(trie->longest, hist[n_hist - 1], &node);
642  if (address.base == NULL) {
643  return prob + trie->backoff[n_hist - 1];
644  }
645  else {
646  (*n_used)++;
647  return lm_trie_quant_lpread(trie->quant, address);
648  }
649 }
650 
651 static uint8
652 history_matches(int32 * hist, int32 * prev_hist, int32 n_hist)
653 {
654  int i;
655  for (i = 0; i < n_hist; i++) {
656  if (hist[i] != prev_hist[i]) {
657  return FALSE;
658  }
659  }
660  return TRUE;
661 }
662 
663 static void
664 update_backoff(lm_trie_t * trie, int32 * hist, int32 n_hist)
665 {
666  int i;
667  node_range_t node;
668  bitarr_address_t address;
669 
670  memset(trie->backoff, 0, sizeof(trie->backoff));
671  trie->backoff[0] = unigram_find(trie->unigrams, hist[0], &node)->bo;
672  for (i = 1; i < n_hist; i++) {
673  address = middle_find(&trie->middle_begin[i - 1], hist[i], &node);
674  if (address.base == NULL) {
675  break;
676  }
677  trie->backoff[i] =
678  lm_trie_quant_mboread(trie->quant, address, i - 1);
679  }
680  memcpy(trie->prev_hist, hist, n_hist * sizeof(*hist));
681 }
682 
683 float
684 lm_trie_score(lm_trie_t * trie, int order, int32 wid, int32 * hist,
685  int32 n_hist, int32 * n_used)
686 {
687  if (n_hist < order - 1) {
688  return lm_trie_nobo_score(trie, wid, hist, order, n_hist, n_used);
689  }
690  else {
691  assert(n_hist == order - 1);
692  if (!history_matches(hist, (int32 *) trie->prev_hist, n_hist)) {
693  update_backoff(trie, hist, n_hist);
694  }
695  return lm_trie_hist_score(trie, wid, hist, n_hist, n_used);
696  }
697 }
#define E_INFO(...)
Print logging information to standard error stream.
Definition: err.h:114
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
SPHINXBASE_EXPORT void bitarr_mask_from_max(bitarr_mask_t *bit_mask, uint32 max_value)
Fills mask for certain int range according to provided max value.
Definition: bitarr.c:172
#define E_ERROR(...)
Print error message to error log.
Definition: err.h:104
SPHINXBASE_EXPORT uint8 bitarr_required_bits(uint32 max_value)
Computes amount of bits required ti store integers upto value provided.
Definition: bitarr.c:178
Sphinx&#39;s memory allocation/deallocation routines.
Basic type definitions used in Sphinx.
SPHINXBASE_EXPORT void bitarr_write_int25(bitarr_address_t address, uint8 length, uint32 value)
Write specified value into bit array.
Definition: bitarr.c:128
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
Definition: lm_trie.h:58
Structure that stores address of certain value in bit array.
Definition: bitarr.h:75
SPHINXBASE_EXPORT uint32 bitarr_read_int25(bitarr_address_t address, uint8 length, uint32 mask)
Read uint32 value from bit array.
Definition: bitarr.c:116
Implementation of logging routines.
Definition: dtoa.c:178