SphinxBase  5prealpha
yin.c
Go to the documentation of this file.
1 /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 /*
3  * Copyright (c) 2008 Beyond Access, Inc. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in
14  * the documentation and/or other materials provided with the
15  * distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY
18  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR
21  * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
22  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
24  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
25  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  */
29 
35 /* This implements part of the YIN algorithm:
36  *
37  * "YIN, a fundamental frequency estimator for speech and music".
38  * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical
39  * Society of America, 111 (4), April 2002.
40  */
41 
42 #include "sphinxbase/prim_type.h"
43 #include "sphinxbase/ckd_alloc.h"
44 #include "sphinxbase/fixpoint.h"
45 
46 #include "sphinxbase/yin.h"
47 
48 #include <stdio.h>
49 #include <string.h>
50 
51 struct yin_s {
52  uint16 frame_size;
53 #ifndef FIXED_POINT
55  float search_range;
56 #else
57  uint16 search_threshold;
58  uint16 search_range;
59 #endif
60  uint16 nfr;
62  unsigned char wsize;
63  unsigned char wstart;
64  unsigned char wcur;
65  unsigned char endut;
67 #ifndef FIXED_POINT
68  float **diff_window;
69 #else
70  fixed32 **diff_window;
71 #endif
72  uint16 *period_window;
73  int16 *frame;
74 };
75 
79 #ifndef FIXED_POINT
80 static void
81 cmn_diff(int16 const *signal, float *out_diff, int ndiff)
82 {
83  double cum;
84  int t, j;
85 
86  cum = 0.0f;
87  out_diff[0] = 1.0f;
88 
89  for (t = 1; t < ndiff; ++t) {
90  float dd;
91  dd = 0.0f;
92  for (j = 0; j < ndiff; ++j) {
93  int diff = signal[j] - signal[t + j];
94  dd += (diff * diff);
95  }
96  cum += dd;
97  out_diff[t] = (float)(dd * t / cum);
98  }
99 }
100 #else
101 static void
102 cmn_diff(int16 const *signal, int32 *out_diff, int ndiff)
103 {
104  uint32 cum, cshift;
105  int32 t, tscale;
106 
107  out_diff[0] = 32768;
108  cum = 0;
109  cshift = 0;
110 
111  /* Determine how many bits we can scale t up by below. */
112  for (tscale = 0; tscale < 32; ++tscale)
113  if (ndiff & (1<<(31-tscale)))
114  break;
115  --tscale; /* Avoid teh overflowz. */
116  /* printf("tscale is %d (ndiff - 1) << tscale is %d\n",
117  tscale, (ndiff-1) << tscale); */
118 
119  /* Somewhat elaborate block floating point implementation.
120  * The fp implementation of this is really a lot simpler. */
121  for (t = 1; t < ndiff; ++t) {
122  uint32 dd, dshift, norm;
123  int j;
124 
125  dd = 0;
126  dshift = 0;
127  for (j = 0; j < ndiff; ++j) {
128  int diff = signal[j] - signal[t + j];
129  /* Guard against overflows. */
130  if (dd > (1UL<<tscale)) {
131  dd >>= 1;
132  ++dshift;
133  }
134  dd += (diff * diff) >> dshift;
135  }
136  /* Make sure the diffs and cum are shifted to the same
137  * scaling factor (usually dshift will be zero) */
138  if (dshift > cshift) {
139  cum += dd << (dshift-cshift);
140  }
141  else {
142  cum += dd >> (cshift-dshift);
143  }
144 
145  /* Guard against overflows and also ensure that (t<<tscale) > cum. */
146  while (cum > (1UL<<tscale)) {
147  cum >>= 1;
148  ++cshift;
149  }
150  /* Avoid divide-by-zero! */
151  if (cum == 0) cum = 1;
152  /* Calculate the normalizer in high precision. */
153  norm = (t << tscale) / cum;
154  /* Do a long multiply and shift down to Q15. */
155  out_diff[t] = (int32)(((long long)dd * norm)
156  >> (tscale - 15 + cshift - dshift));
157  /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n",
158  dd, cshift, dshift, (t<<tscale), cum, norm, out_diff[t]); */
159  }
160 }
161 #endif
162 
163 yin_t *
164 yin_init(int frame_size, float search_threshold,
165  float search_range, int smooth_window)
166 {
167  yin_t *pe;
168 
169  pe = ckd_calloc(1, sizeof(*pe));
170  pe->frame_size = frame_size;
171 #ifndef FIXED_POINT
174 #else
175  pe->search_threshold = (uint16)(search_threshold * 32768);
176  pe->search_range = (uint16)(search_range * 32768);
177 #endif
178  pe->wsize = smooth_window * 2 + 1;
179  pe->diff_window = ckd_calloc_2d(pe->wsize,
180  pe->frame_size / 2,
181  sizeof(**pe->diff_window));
182  pe->period_window = ckd_calloc(pe->wsize,
183  sizeof(*pe->period_window));
184  pe->frame = ckd_calloc(pe->frame_size, sizeof(*pe->frame));
185  return pe;
186 }
187 
188 void
190 {
192  ckd_free(pe->period_window);
193  ckd_free(pe);
194 }
195 
196 void
198 {
199  /* Reset the circular window pointers. */
200  pe->wstart = pe->endut = 0;
201  pe->nfr = 0;
202 }
203 
204 void
206 {
207  pe->endut = 1;
208 }
209 
210 int
211 #ifndef FIXED_POINT
212 thresholded_search(float *diff_window, float threshold, int start, int end)
213 #else
214 thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end)
215 #endif
216 {
217  int i, argmin;
218 #ifndef FIXED_POINT
219  float min;
220 #else
221  int min;
222 #endif
223 
224  min = diff_window[start];
225  argmin = start;
226  for (i = start + 1; i < end; ++i) {
227 #ifndef FIXED_POINT
228  float diff = diff_window[i];
229 #else
230  int diff = diff_window[i];
231 #endif
232 
233  if (diff < threshold) {
234  min = diff;
235  argmin = i;
236  break;
237  }
238  if (diff < min) {
239  min = diff;
240  argmin = i;
241  }
242  }
243  return argmin;
244 }
245 
246 void
247 yin_store(yin_t *pe, int16 const *frame)
248 {
249  memcpy(pe->frame, frame, pe->frame_size * sizeof(*pe->frame));
250 }
251 
252 void
253 yin_write(yin_t *pe, int16 const *frame)
254 {
255  int outptr, difflen;
256 
257  /* Rotate the window one frame forward. */
258  ++pe->wstart;
259  /* Fill in the frame before wstart. */
260  outptr = pe->wstart - 1;
261  /* Wrap around the window pointer. */
262  if (pe->wstart == pe->wsize)
263  pe->wstart = 0;
264 
265  /* Now calculate normalized difference function. */
266  difflen = pe->frame_size / 2;
267  cmn_diff(frame, pe->diff_window[outptr], difflen);
268 
269  /* Find the first point under threshold. If not found, then
270  * use the absolute minimum. */
271  pe->period_window[outptr]
272  = thresholded_search(pe->diff_window[outptr],
273  pe->search_threshold, 0, difflen);
274 
275  /* Increment total number of frames. */
276  ++pe->nfr;
277 }
278 
279 void
281 {
282  yin_write(pe, pe->frame);
283 }
284 
285 int
286 yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff)
287 {
288  int wstart, wlen, half_wsize, i;
289  int best, search_width, low_period, high_period;
290 #ifndef FIXED_POINT
291  float best_diff;
292 #else
293  int best_diff;
294 #endif
295 
296  half_wsize = (pe->wsize-1)/2;
297  /* Without any smoothing, just return the current value (don't
298  * need to do anything to the current poitner either). */
299  if (half_wsize == 0) {
300  if (pe->endut)
301  return 0;
302  *out_period = pe->period_window[0];
303 #ifndef FIXED_POINT
304  *out_bestdiff = pe->diff_window[0][pe->period_window[0]];
305 #else
306  *out_bestdiff = pe->diff_window[0][pe->period_window[0]] / 32768.0f;
307 #endif
308  return 1;
309  }
310 
311  /* We can't do anything unless we have at least (wsize-1)/2 + 1
312  * frames, unless we're at the end of the utterance. */
313  if (pe->endut == 0 && pe->nfr < half_wsize + 1) {
314  /* Don't increment the current pointer either. */
315  return 0;
316  }
317 
318  /* Establish the smoothing window. */
319  /* End of utterance. */
320  if (pe->endut) {
321  /* We are done (no more data) when pe->wcur = pe->wstart. */
322  if (pe->wcur == pe->wstart)
323  return 0;
324  /* I.e. pe->wcur (circular minus) half_wsize. */
325  wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize;
326  /* Number of frames from wstart up to pe->wstart. */
327  wlen = pe->wstart - wstart;
328  if (wlen < 0) wlen += pe->wsize;
329  /*printf("ENDUT! ");*/
330  }
331  /* Beginning of utterance. */
332  else if (pe->nfr < pe->wsize) {
333  wstart = 0;
334  wlen = pe->nfr;
335  }
336  /* Normal case, it is what it is. */
337  else {
338  wstart = pe->wstart;
339  wlen = pe->wsize;
340  }
341 
342  /* Now (finally) look for the best local estimate. */
343  /* printf("Searching for local estimate in %d frames around %d\n",
344  wlen, pe->nfr + 1 - wlen); */
345  best = pe->period_window[pe->wcur];
346  best_diff = pe->diff_window[pe->wcur][best];
347  for (i = 0; i < wlen; ++i) {
348  int j = wstart + i;
349 #ifndef FIXED_POINT
350  float diff;
351 #else
352  int diff;
353 #endif
354 
355  j %= pe->wsize;
356  diff = pe->diff_window[j][pe->period_window[j]];
357  /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768,
358  pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */
359  if (diff < best_diff) {
360  best_diff = diff;
361  best = pe->period_window[j];
362  }
363  }
364  /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768,
365  best ? 8000.0/best : 8000.0); */
366  /* If it's the same as the current one then return it. */
367  if (best == pe->period_window[pe->wcur]) {
368  /* Increment the current pointer. */
369  if (++pe->wcur == pe->wsize)
370  pe->wcur = 0;
371  *out_period = best;
372 #ifndef FIXED_POINT
373  *out_bestdiff = best_diff;
374 #else
375  *out_bestdiff = best_diff / 32768.0f;
376 #endif
377  return 1;
378  }
379  /* Otherwise, redo the search inside a narrower range. */
380 #ifndef FIXED_POINT
381  search_width = (int)(best * pe->search_range);
382 #else
383  search_width = best * pe->search_range / 32768;
384 #endif
385  /* printf("Search width = %d * %.2f = %d\n",
386  best, (double)pe->search_range/32768, search_width); */
387  if (search_width == 0) search_width = 1;
388  low_period = best - search_width;
389  high_period = best + search_width;
390  if (low_period < 0) low_period = 0;
391  if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2;
392  /* printf("Searching from %d to %d\n", low_period, high_period); */
393  best = thresholded_search(pe->diff_window[pe->wcur],
394  pe->search_threshold,
395  low_period, high_period);
396  best_diff = pe->diff_window[pe->wcur][best];
397 
398  if (out_period)
399  *out_period = (best > 65535) ? 65535 : best;
400  if (out_bestdiff) {
401 #ifndef FIXED_POINT
402  *out_bestdiff = (best_diff > 1.0f) ? 1.0f : best_diff;
403 #else
404  *out_bestdiff = (best_diff > 32768) ? 1.0f : best_diff / 32768.0f;
405 #endif
406  }
407 
408  /* Increment the current pointer. */
409  if (++pe->wcur == pe->wsize)
410  pe->wcur = 0;
411  return 1;
412 }
uint16 * period_window
Window of best period estimates.
Definition: yin.c:72
#define ckd_calloc_2d(d1, d2, sz)
Macro for ckd_calloc_2d
Definition: ckd_alloc.h:270
unsigned char wstart
First frame in window.
Definition: yin.c:63
#define ckd_calloc(n, sz)
Macros to simplify the use of above functions.
Definition: ckd_alloc.h:248
Sphinx&#39;s memory allocation/deallocation routines.
void yin_start(yin_t *pe)
Start processing an utterance.
Definition: yin.c:197
unsigned char endut
Hoch Hech! Are we at the utterance end?
Definition: yin.c:65
uint16 nfr
Number of frames read so far.
Definition: yin.c:60
float search_threshold
Size of analysis frame.
Definition: yin.c:54
Basic type definitions used in Sphinx.
unsigned char wsize
Size of smoothing window.
Definition: yin.c:62
SPHINXBASE_EXPORT void ckd_free(void *ptr)
Test and free a 1-D array.
Definition: ckd_alloc.c:244
unsigned char wcur
Current frame of analysis.
Definition: yin.c:64
yin_t * yin_init(int frame_size, float search_threshold, float search_range, int smooth_window)
Initialize moving-window pitch estimation.
Definition: yin.c:164
float ** diff_window
Window of difference function outputs.
Definition: yin.c:68
void yin_store(yin_t *pe, int16 const *frame)
Store a frame of data to the pitch estimator.
Definition: yin.c:247
int yin_read(yin_t *pe, uint16 *out_period, float *out_bestdiff)
Read a raw estimated pitch value from the pitch estimator.
Definition: yin.c:286
Implementation of pitch estimation.
void yin_free(yin_t *pe)
Free a moving-window pitch estimator.
Definition: yin.c:189
void yin_end(yin_t *pe)
Mark the end of an utterance.
Definition: yin.c:205
SPHINXBASE_EXPORT void ckd_free_2d(void *ptr)
Free a 2-D array (ptr) previously allocated by ckd_calloc_2d.
Definition: ckd_alloc.c:255
int16 * frame
Storage for frame.
Definition: yin.c:73
float search_range
Range around best local estimate to search.
Definition: yin.c:55
Definition: yin.c:51
void yin_write_stored(yin_t *pe)
Feed stored frame of data to the pitch estimator.
Definition: yin.c:280
void yin_write(yin_t *pe, int16 const *frame)
Feed a frame of data to the pitch estimator.
Definition: yin.c:253