Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
sigpr_example.cc
1 /************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /************************************************************************/
33 /* */
34 /* Author: Paul Taylor (pault@cstr.ed.ac.uk) */
35 /* Date: Fri May 9 1997 */
36 /* ------------------------------------------------------------------- */
37 /* Examples of Generation of Acoustic Feature Vectors from Waveforms */
38 /* */
39 /************************************************************************/
40 
41 #include <cstdlib>
42 #include "EST_sigpr.h"
43 #include "EST_cmd_line.h"
44 #include "EST_inline_utils.h"
45 #include "EST_sigpr.h"
46 
47 /**@name Signal processing example code
48  *
49  * @toc
50  */
51 ///@{
52 
53 EST_StrList empty;
54 
55 void print_map(EST_TrackMap &t);
56 void print_track_map(EST_Track &t);
57 
58 int main(void)
59 
60 {
61  EST_StrList base_list; // decl
62  EST_StrList delta_list; // decl
63  EST_StrList acc_list; // decl
64  EST_Option op, al; // decl
65  init_lib_ops(al, op);
66  EST_Wave sig; // decl
67  EST_Track fv, part; // decl
68  float shift; // decl
69  int i;
70 
71 
72  cout << "position 1\n";
73 
74  /* Producing a single type of feature vector for an utterance */
75 
76  ///@ code
77 
78  int lpc_order = 16;
79  sig.load(DATA "/kdt_001.wav");
80 
81  ///@ endcode
82 
83  /* Now allocate enough space in the track to hold the analysis. */
84  ///@ code
85  int num_frames;
86  num_frames = (int)ceil(sig.end() / 0.01);
87  fv.resize(num_frames, lpc_order + 1);
88  ///@ endcode
89 
90  /* The positions of the frames, corresponding to the middle of their
91  analysis window also needs to be set. For fixed frame analysis, this
92  can be done with the fill_time() function: */
93 
94  ///@ code
95  fv.fill_time(0.01);
96  ///@ endcode
97 
98  /* The simplest way to do the actual analysis is as follows, which
99  will fill the track with the values from the LP analysis using the
100  default processing controls.
101  */
102 
103  ///@ code
104  sig2coef(sig, fv, "lpc");
105  ///@ endcode
106 
107  /* In this style of analysis, default values are used to control the
108  windowing mechanisms which split the whole signal into frames.
109 
110  Extending one time period before and one time period after the
111  current time mark:
112  */
113  ///@ code
114  sig2coef(sig, fv, "lpc", 2.0);
115  ///@ endcode
116 
117  /* Extending 1.5 time periods before and after the
118  current time mark, etc;
119  */
120  ///@ code
121  sig2coef(sig, fv, "lpc", 3.0);
122  ///@ endcode
123 
124  /* The type of windowing function may be changed also as this
125  can be passed in as an optional argument. First we
126  create a window function (This is explained more in \ref Windowing ).
127  */
128  ///@ code
129  EST_WindowFunc *wf = EST_Window::creator("hamming");
130  ///@ endcode
131  /* and then pass it in as the last argument
132  */
133  ///@ code
134  sig2coef(sig, fv, "lpc", 3.0, wf);
135  ///@ endcode
136  ///@}
137 
138  /* Pitch-Synchronous vs fixed frame analysis.
139 
140  There are many ways to fill the time array for fixed frame analysis.
141 
142  manually:
143 
144  */
145  ///@{
146 
147  ///@ code
148  int num_frames = 300;
149  fv.resize(num_frames, lpc_order + 1);
150  shift = 0.01; // time interval in seconds
151 
152  for (i = 0; i < num_frames; ++i)
153  fv.t(i) = shift * (float) i;
154  ///@ endcode
155  /* or by use of the member function EST_Track::fill_time}
156  */
157 
158  ///@ code
159  fv.fill_time(0.01);
160  ///@ endcode
161 
162  /* Pitch synchronous values can simply be read from pitchmark
163  files:
164  */
165  ///@ code
166  fv.load(DATA "/kdt_001.pm");
167  make_track(fv, "lpc", lpc_order + 1);
168  ///@ endcode
169 
170  /* Regardless of how the time points where obtain, the analysis
171  function call is just the same:
172  */
173  ///@ code
174  sig2coef(sig, fv, "lpc");
175  ///@ endcode
176  ///@}
177 
178  cout << "position 3\n";
179 
180  /* Naming Channels */
181  ///@{
182  ///@ code
183 
184  int cep_order = 16;
185  EST_StrList map;
186 
187  map.append("$lpc-0+" Stringtoi(lpc_order));
188  map.append("$cepc-0+" Stringtoi(cep_order));
189  map.append("power");
190 
191  fv.resize(EST_CURRENT, map);
192  ///@ endcode
193 
194  /* An alternative is to use add_channels_to_map()
195  which takes a list of coefficient types and makes a map.
196  The order of each type of processing is extracted from
197  op.
198  */
199 
200  ///@ code
201 
202  EST_StrList coef_types;
203 
204  coef_types.append("lpc");
205  coef_types.append("cep");
206  coef_types.append("power");
207 
208  map.clear();
209 
210  add_channels_to_map(map, coef_types, op);
211  fv.resize(EST_CURRENT, map);
212 
213  ///@ endcode
214 
215  /* After allocating the right number of frames and channels
216  in fv, we extract a sub_track, which has all the frames
217  (i.e. between 0 and EST_ALL) and all the lpc channels
218  */
219  ///@ code
220  fv.sub_track(part, 0, EST_ALL, 0, "lpc_0", "lpc_N");
221  ///@ endcode
222  /* now call the signal processing function on this part: */
223  ///@ code
224  sig2coef(sig, part, "lpc");
225  ///@ endcode
226 
227  /* We repeat the procedure for the cepstral coefficients, but this
228  time take the next 8 channels (17-24 inclusive) and calculate the coefficients:
229  */
230  ///@ code
231  fv.sub_track(part, 0, EST_ALL, "cep_0", "cep_N");
232 
233  sig2coef(sig, part, "cep");
234  ///@ endcode
235  /* Extract the last channel for power and call the power function:
236  */
237  ///@ code
238  fv.sub_track(part, 0, EST_ALL, "power", 1);
239  power(sig, part, 0.01);
240 
241  ///@ endcode
242 
243  /* While the above technique is adequate for our needs and is
244  a useful demonstration of sub_track extraction, the
245  sigpr_base function is normally easier to use as it does
246  all the sub track extraction itself. To perform the lpc, cepstrum
247  and power analysis, we put these names into a StrList and
248  call sigpr_base.
249  */
250  ///@ code
251  base_list.clear(); // empty the list, just in case
252  base_list.append("lpc");
253  base_list.append("cep");
254  base_list.append("power");
255 
256  sigpr_base(sig, fv, op, base_list);
257  ///@ endcode
258  /* This will call sigpr_track as many times as is necessary.
259  */
260  ///@}
261 
262  /* Producing delta and acceleration coefficients */
263  ///@{
264  ///@ code
265 
266  map.append("$cep_d-0+" Stringtoi(cep_order)); // add deltas
267  map.append("$cep_a-0+" Stringtoi(cep_order)); // add accs
268 
269  fv.resize(EST_CURRENT, map); // resize the track.
270  ///@ endcode
271  /* Given a EST_Track of coefficients fv, the delta
272  function is used to produce the delta equivalents del.
273  The following uses the track allocated above and
274  generates a set of cepstral coefficients and then makes their
275  delta and acc:
276 
277  */
278  ///@ code
279 
280  EST_Track del, acc;
281 
282  fv.sub_track(part, 0, EST_ALL, 0, "cep_0", "cep_N"); // make subtrack of coefs
283  sig2coef(sig, part, "cep"); // fill with cepstra
284 
285  // make subtrack of deltas
286  fv.sub_track(del, 0, EST_ALL, 0, "cep_d_0", "cep_d_N");
287  delta(part, del); // calculate deltas of part, and place answer in del
288 
289  // make subtrack of accs
290  fv.sub_track(acc, 0, EST_ALL, 0, "cep_a_0", "cep_a_N");
291  delta(del, acc); // calculate deltas of del, and place answer in acc
292  ///@ endcode
293  /* It is possible to directly calculate the delta coefficients of
294  a type of coefficient, even if we don't have the base type.
295  \ref sigpr_delta will process the waveform, make a temporary
296  track of the required type "lpc" and calculate the delta of this.
297  </para><para>
298  The following makes a set of delta reflection coefficients:
299 
300  */
301  ///@ code
302  map.append("$ref_d-0+" Stringtoi(lpc_order)); // add to map
303  fv.resize(EST_CURRENT, map); // resize the track.
304 
305  sigpr_delta(sig, fv, op, "ref");
306  ///@ endcode
307  /* an equivalent function exists for acceleration coefficients:
308  */
309  ///@ code
310  map.append("$lsf_a-0+" Stringtoi(lpc_order)); // add acc lsf
311  fv.resize(EST_CURRENT, map); // resize the track.
312 
313  sigpr_acc(sig, fv, op, "ref");
314 
315  ///@ endcode
316  ///@}
317 
318  /* Windowing
319 
320  The \ref EST_Window class provides a variety of means to
321  divide speech into frames using windowing mechanisms.
322 
323  A window function can be created from a window name using the
324  \ref EST_Window::creator function:
325  */
326  ///@{
327  ///@ code
328 
329  EST_WindowFunc *hamm = EST_Window::creator("hamming");
330  EST_WindowFunc *rect = EST_Window::creator("rectangular");
331  ///@ endcode
332  /* This function can then be used to create a EST_TBuffer of
333  window values. In the following example the values from a
334  256 point hamming window are stored in the buffer win_vals:
335  */
336  ///@ code
337  EST_FVector frame;
338  EST_FVector win_vals;
339 
340  hamm(256, win_vals);
341  ///@ endcode
342 
343  /* The make_window function also creates a window:
344  */
345  ///@ code
346  EST_Window::make_window(win_vals, 256, "hamming",-1);
347  ///@ endcode
348 
349  /* this can then be used to make a frame of speech from the main EST_Wave
350  sig. The following example extracts speech starting at sample 1000:
351  */
352  ///@ code
353  for (i = 0; i < 256; ++i)
354  frame[i] = (float)sig.a(i + 1000) * win_vals[i];
355  ///@ endcode
356 
357  /* Alternatively, exactly the same operation can be performed in a
358  single step by passing the window function to the
359  \ref EST_Window::window_signal function which takes a
360  \ref EST_Wave and performs windowing on a section of it,
361  storing the output in the \ref EST_FVector {\tt frame}.
362  */
363  ///@ code
364  EST_Window::window_signal(sig, hamm, 1000, 256, frame, 1);
365  ///@ endcode
366  /* The window function need not be explicitly created, the window
367  signal can work on just the name of the window type:
368  */
369 
370  ///@ code
371  EST_Window::window_signal(sig, "hamming", 1000, 256, frame, 1);
372  ///@ endcode
373 
374  ///@}
375  /* Frame based signal processing
376  The signal processing library provides an extensive set of functions
377  which operate on a single frame of coefficients.
378  The following example shows one method of splitting the signal
379  into frames and calling a signal processing algorithm.
380 
381  First set up the track for 16 order LP analysis:
382 
383  */
384  ///@{
385  ///@ code
386 
387  map.clear();
388  map.append("$lpc-0+16");
389 
390  fv.resize(EST_CURRENT, map);
391 
392  ///@ endcode
393  /* In this example, we take the analysis frame length to be 256 samples
394  long, and the shift in samples is just the shift in seconds times the
395  sampling frequency.
396  */
397  ///@ code
398  int s_length = 256;
399  int s_shift = int(shift * float(sig.sample_rate()));
400  EST_FVector coefs;
401  ///@ endcode
402 
403  /* Now we set up a loop which calculates the frames one at a time.
404  */
405  ///@ code
406  for (int k1 = 0; k1 < fv.num_frames(); ++k1)
407  {
408  int start = (k1 * s_shift) - (s_length/2);
409  EST_Window::window_signal(sig, "hamming", start, s_length, frame, 1);
410 
411  fv.frame(coefs, k1); // Extract a single frame
412  sig2lpc(frame, coefs); // Pass this to actual algorithm
413  }
414  ///@ endcode
415 
416  /* A slightly different tack can be taken for pitch-synchronous analysis.
417  Setting up fv with the pitchmarks and channels:
418  */
419  ///@ code
420  fv.load(DATA "/kd1_001.pm");
421  fv.resize(EST_CURRENT, map);
422  ///@ endcode
423  /* Set up as before, but this time calculate the window starts and
424  lengths from the time points. In this example, the length is a
425  {\tt factor} (twice) the local frame shift.
426  Note that the only difference between this function and the fixed
427  frame one is in the calculation of the start and end points - the
428 
429  windowing, frame extraction and call to \ref sig2lpc are exactly
430  the same.
431  */
432  ///@ code
433  float factor = 2.0;
434 
435  for (int k2 = 0; k2 < fv.num_frames(); ++k2)
436  {
437  s_length = irint(get_frame_size(fv, k2, sig.sample_rate())* factor);
438  int start = (irint(fv.t(k2) * sig.sample_rate()) - (s_length/2));
439 
440  EST_Window::window_signal(sig, wf, start, s_length, frame, 1);
441 
442  fv.frame(coefs, k2);
443  sig2lpc(frame, coefs);
444  }
445  ///@ endcode
446  ///@}
447 
448  /* Filtering */
449  ///@{
450  ///@ code
451 
452  EST_FVector filter;
453  int freq = 400;
454  int filter_order = 99;
455 
456  filter = design_lowpass_FIR_filter(sig.sample_rate(), 400, 99);
457  ///@ endcode
458  /* And now use this filter on the signal:
459  */
460  ///@ code
461  FIRfilter(sig, filter);
462  ///@ endcode
463  /* For one-off filtering operations, the filter design can be
464  done in the filter function itself. The \ref FIRlowpass_filter
465  function takes the signal, cut-off frequency and order as
466  arguments and designs the filter on the fly. Because of the
467  overhead of filter design, this function is expensive and
468  should only be used for one-off operations.
469  */
470  ///@ code
471  FIRlowpass_filter(sig, 400, 99);
472  ///@ endcode
473  /* The equivalent operations exist for high-pass filtering:
474  */
475  ///@ code
476  filter = design_highpass_FIR_filter(sig.sample_rate(), 50, 99);
477  FIRfilter(sig, filter);
478  FIRhighpass_filter(sig, 50, 99);
479  ///@ endcode
480  /* Filters of arbitrary frequency response can also be designed using
481  the \ref design_FIR_filter function.
482  */
483  ///@ code
484  EST_FVector response(16);
485  response[0] = 1;
486  response[1] = 1;
487  response[2] = 1;
488  response[3] = 1;
489  response[4] = 0;
490  response[5] = 0;
491  response[6] = 0;
492  response[7] = 0;
493  response[8] = 1;
494  response[9] = 1;
495  response[10] = 1;
496  response[11] = 1;
497  response[12] = 0;
498  response[13] = 0;
499  response[14] = 0;
500  response[15] = 0;
501 
502  filter = design_FIR_filter(response, 15);
503 
504  FIRfilter(sig, response);
505  ///@ endcode
506  /*The normal filtering functions can cause a time delay in the
507  filtered waveform. To attempt to eliminate this, a set of
508  double filter function functions are provided which guarantees
509  zero phase differences between the original and filtered waveform.
510  */
511  ///@ code
512  FIRlowpass_double_filter(sig, 400);
513  FIRhighpass_double_filter(sig, 40);
514  ///@ endcode
515 
516  /* Sometimes it is undesirable to have the input signal overwritten.
517  For these cases, a set of parallel functions exist which take
518  a input waveform for reading and a output waveform for writing to.
519  */
520  ///@ code
521  EST_Wave sig_out;
522 
523  FIRfilter(sig, sig_out, response);
524  FIRlowpass_filter(sig, sig_out, 400);
525  FIRhighpass_filter(sig, sig_out, 40);
526  ///@ endcode
527  ///@}
528 
529 }
530 
531 ///@}
532 
533 
534 /**@page sigpr-example Example of Signal Processing code
535  @brief Signal processing examples
536  @dontinclude sigpr_example.cc
537 
538 @tableofcontents
539 @section producing-feature-vector-for-utt Producing a single type of feature vector for an utterance
540 
541 A number of types of signal processing can be performed by the
542 \ref sig2coef function. The following code demonstrates a simple
543 case of calculating the linear prediction (LP) coefficients for
544 a waveform.
545 
546 First set the order of the lpc analysis to 16 (this entails 17 actual
547 coefficients) and then load in the waveform to be analysed.
548 
549  @skipline //@ code
550  @until //@ endcode
551 
552 Now allocate enough space in the track to hold the analysis.
553 The following command resizes `fv` to have enough frames for
554 analysis frames at 0.01 intervals up to the end of the waveform,
555 (sig.end()), and enough channels to store `lpc_order + 1` coefficients.
556 The channels are named so as to take lpc coefficients.
557 
558  @skipline //@ code
559  @until //@ endcode
560 
561 The positions of the frames, corresponding to the middle of their
562 analysis window also need to be set. For fixed frame analysis, this
563 can be done with the EST_Track::fill_time() function:
564 
565  @skipline //@ code
566  @until //@ endcode
567 
568 The simplest way to do the actual analysis is as follows, which
569 will fill the track with the values from the LP analysis using the
570 default processing controls.
571 
572  @skipline //@ code
573  @until //@ endcode
574 
575 In this style of analysis, default values are used to control the
576 windowing mechanisms which split the whole signal into frames.
577 
578 Specifically, each frame is defined to start a certain distance
579 before the time interval, and extending the same distance after.
580 This distance is calculated as a function of the local window
581 spacing and can be adjusted as follows:
582 
583 Extending one time period before and one time period after the
584 current time mark:
585 
586  @skipline //@ code
587  @until //@ endcode
588 
589 Extending 1.5 time periods before and after the
590 current time mark, etc;
591 
592  @skipline //@ code
593  @until //@ endcode
594 
595 The type of windowing function may be changed also as this
596 can be passed in as an optional argument. First we create a window
597 function (This is explained more in \ref Windowing).
598 
599  @skipline //@ code
600  @until //@ endcode
601  and then pass it in as the last argument
602 
603  @skipline //@ code
604  @until //@ endcode
605 
606 
607  @section pitchvsfixframe Pitch-Synchronous vs fixed frame analysis.
608 
609  Most of the core signal processing functions operate on individual
610  frames of speech and are oblivious as to how these frames were
611  extracted from the original speech. This allows us to take the frames
612  from anywhere in the signal: specifically, this facilitates two
613  common forms of analysis:
614 
615  - **fixed frame**: The time points are space at even intervals
616  throughout the signal.
617  - **pitch-synchronous**: The time points represent *pitchmarks*
618  and correspond to a specific position in each pitch period,
619  e.g. the instant of glottal closure.
620 
621  It is a simple matter to fill the time array, but normally
622  pitchmarks are read from a file or taken from another signal
623  processing algorithm (see \ref "Pitchmark functions").
624 
625  There are many ways to fill the time array for fixed frame analysis.
626 
627  manually:
628 
629  @skipline //@ code
630  @until //@ endcode
631 
632  or by use of the member function \ref EST_Track::fill_time
633 
634  @skipline //@ code
635  @until //@ endcode
636 
637  Pitch synchronous values can simply be read from pitchmark
638  files:
639 
640  @skipline //@ code
641  @until //@ endcode
642 
643  Regardless of how the time points where obtain, the analysis
644  function call is just the same:
645 
646  @skipline //@ code
647  @until //@ endcode
648 
649  @section sigpr-example-naming-channels Naming Channels
650 
651  Multiple types of feature vector can be stored in the same Track.
652  Imagine that we want lpc, cepstrum and power
653  coefficients in that order in a track. This can be achieved by using
654  the \ref sig2coef function multiple times, or by the wrap
655  around \ref sigpr_base function.
656 
657  It is vitally important here to ensure that before passing the
658  track to the signal processing functions that it has the correct
659  number of channels and that these are appropriately named. This is
660  most easily done using the track map facility, explained
661  in \ref est_trac_naming_channels.
662 
663  For each call, we only us the part of track that is relevant.
664  The EST_Track::sub_track member function is used to get
665  this. In the following example, we are assuming here that
666  `fv` has sufficient space for 17 lpc coefficients, 8 cepstrum
667  coefficients and power and that they are stored in that order.
668 
669  @skipline //@ code
670  @until //@ endcode
671 
672  An alternative is to use \ref add_channels_to_map()
673  which takes a list of coefficient types and makes a map.
674  The order of each type of processing is extracted from op.
675 
676  @skipline //@ code
677  @until //@ endcode
678 
679  After allocating the right number of frames and channels
680  in `fv`, we extract a sub_track, which has all the frames
681  (i.e. between 0 and EST_ALL) and all the lpc channels.
682 
683  @skipline //@ code
684  @until //@ endcode
685 
686  now call the signal processing function on this part:
687 
688  @skipline //@ code
689  @until //@ endcode
690 
691  We repeat the procedure for the cepstral coefficients, but this
692  time take the next 8 channels (17-24 inclusive) and calculate the coefficients:
693 
694  @skipline //@ code
695  @until //@ endcode
696 
697  Extract the last channel for power and call the power function:
698 
699  @skipline //@ code
700  @until //@ endcode
701 
702  While the above technique is adequate for our needs and is
703  a useful demonstration of sub_track extraction, the
704  \ref sigpr_base function is normally easier to use as it does
705  all the sub track extraction itself. To perform the lpc, cepstrum
706  and power analysis, we put these names into a EST_StrList and
707  call \ref sigpr_base.
708 
709  @skipline //@ code
710  @until //@ endcode
711 
712  This will call \ref sigpr_track as many times as is necessary.
713 
714  @section sigpr-deltaacc Producing delta and acceleration coefficients
715 
716  Delta coefficients represent the numerical differentiation of a
717  track, and acceleration coefficients represent the second
718  order numerical differentiation.
719 
720  By convention, delta coefficients have a "_d" suffix and acceleration
721  coefficients "_a". If the coefficient is multi-dimensional, the
722  numbers go after the "_d" or "_a".
723 
724  @skipline //@ code
725  @until //@ endcode
726 
727  Given a EST_Track of coefficients `fv`, the \ref EST_Track::delta
728  function is used to produce the delta equivalents `del`.
729  The following uses the track allocated above and
730  generates a set of cepstral coefficients and then makes their
731  delta and acc:
732 
733  @skipline //@ code
734  @until //@ endcode
735 
736  It is possible to directly calculate the delta coefficients of
737  a type of coefficient, even if we don't have the base type.
738  \ref sigpr_delta will process the waveform, make a temporary
739  track of the required type "lpc" and calculate the delta of this.
740 
741  The following makes a set of delta reflection coefficients:
742 
743  @skipline //@ code
744  @until //@ endcode
745 
746  an equivalent function exists for acceleration coefficients:
747 
748  @skipline //@ code
749  @until //@ endcode
750 
751  @section sigpr-windowing Windowing
752 
753  The \ref EST_Window class provides a variety of means to
754  divide speech into frames using windowing mechanisms.
755 
756  A window function can be created from a window name using the
757  EST_Window::creator function:
758 
759  @skipline //@ code
760  @until //@ endcode
761 
762  This function can then be used to create a EST_TBuffer of
763  window values. In the following example the values from a
764  256 point hamming window are stored in the buffer `win_vals`:
765 
766  @skipline //@ code
767  @until //@ endcode
768 
769  The make_window function also creates a window:
770 
771  @skipline //@ code
772  @until //@ endcode
773 
774  this can then be used to make a frame of speech from the main EST_Wave
775  `sig`. The following example extracts speech starting at sample 1000:
776 
777  @skipline //@ code
778  @until //@ endcode
779 
780  Alternatively, exactly the same operation can be performed in a
781  single step by passing the window function to the
782  EST_Window::window_signal function which takes a
783  EST_Wave and performs windowing on a section of it,
784  storing the output in the EST_FVector `frame`.
785 
786  @skipline //@ code
787  @until //@ endcode
788 
789  The window function need not be explicitly created, the window
790  signal can work on just the name of the window type:
791 
792  @skipline //@ code
793  @until //@ endcode
794 
795  @section sigpr-example-frames Frame based signal processing
796 
797  The signal processing library provides an extensive set of functions
798  which operate on a single frame of coefficients.
799  The following example shows one method of splitting the signal
800  into frames and calling a signal processing algorithm.
801 
802  First set up the track for 16 order LP analysis:
803 
804  @skipline //@ code
805  @until //@ endcode
806 
807  In this example, we take the analysis frame length to be 256 samples
808  long, and the shift in samples is just the shift in seconds times the
809  sampling frequency.
810 
811  @skipline //@ code
812  @until //@ endcode
813 
814  Now we set up a loop which calculates the frames one at a time.
815  `start` is the start position in samples of each frame.
816  The EST_Window::window_signal function is called which
817  makes a EST_FVector frame of the speech via a hamming window.
818 
819  Using the EST_Track::frame function, the EST_FVector
820  `coefs` is set to frame `k` in the track. It is important
821  to understand that this operation involves setting an internal
822  smart pointer in `coefs` to the memory of frame `k`. This
823  allows the signal processing function \ref sig2lpc to operate
824  on an input and output EST_FVector, without any copying to or
825  from the main track. After the \ref sig2lpc call, the kth frame
826  of `fv` is now filled with the LP coefficients.
827 
828  @skipline //@ code
829  @until //@ endcode
830 
831  A slightly different tack can be taken for pitch-synchronous analysis.
832  Setting up fv with the pitchmarks and channels:
833 
834  @skipline //@ code
835  @until //@ endcode
836 
837  Set up as before, but this time calculate the window starts and
838  lengths from the time points. In this example, the length is a
839  `factor` (twice) the local frame shift.
840  Note that the only difference between this function and the fixed
841  frame one is in the calculation of the start and end points - the
842 
843  windowing, frame extraction and call to \ref sig2lpc are exactly
844  the same.
845 
846  @skipline //@ code
847  @until //@ endcode
848 
849  @section sigpr-filtering Filtering
850 
851  In the EST library we so far have two main types of filter,
852  **finite impulse response (FIR)** filters and **linear prediction (LP)**
853  filters. **infinite impulse response (IIR)** filters are not yet
854  implemented, though LP filters are a special case of these.
855 
856  Filtering involves 2 stages: the design of the filter and the
857  use of this filter on the waveform.
858 
859  First we examine a simple low-pass filter which attempts to suppress
860  all frequencies about a cut-off. Imagine we want to low pass filter
861  a signal at 400Hz. First we design the filter:
862 
863  @skipline //@ code
864  @until //@ endcode
865 
866  And now use this filter on the signal:
867 
868  @skipline //@ code
869  @until //@ endcode
870 
871  For one-off filtering operations, the filter design can be
872  done in the filter function itself. The \ref FIRlowpass_filter
873  function takes the signal, cut-off frequency and order as
874  arguments and designs the filter on the fly. Because of the
875  overhead of filter design, this function is expensive and
876  should only be used for one-off operations.
877 
878  @skipline //@ code
879  @until //@ endcode
880 
881  The equivalent operations exist for high-pass filtering:
882 
883  @skipline //@ code
884  @until //@ endcode
885 
886  Filters of arbitrary frequency response can also be designed using
887  the \ref design_FIR_filter function. This function takes a
888  EST_FVector of order \f$2^{N}\f$ which specifies the desired frequency
889  response up to 1/2 the sampling frequency. The function returns
890  a set of filter coefficients that attempt to match the desired
891  reponse.
892 
893  @skipline //@ code
894  @until //@ endcode
895 
896  The normal filtering functions can cause a time delay in the
897  filtered waveform. To attempt to eliminate this, a set of
898  double filter function functions are provided which guarantees
899  zero phase differences between the original and filtered waveform.
900 
901  @skipline //@ code
902  @until //@ endcode
903 
904  Sometimes it is undesirable to have the input signal overwritten.
905  For these cases, a set of parallel functions exist which take
906  a input waveform for reading and a output waveform for writing to.
907 
908  @skipline //@ code
909  @until //@ endcode
910 
911 */
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
EST_read_status load(const EST_String filename, int offset=0, int length=0, int rate=default_sample_rate)
Definition: EST_Wave.cc:178
EST_FVector design_FIR_filter(const EST_FVector &freq_response, int filter_order)
Definition: filter.cc:416
void FIRfilter(EST_Wave &in_sig, const EST_FVector &numerator, int delay_correction=0)
Definition: filter.cc:334
static Func * creator(const char *name, bool report_error=false)
Return the creation function for the given window type.
Definition: EST_Window.cc:216
void delta(EST_Track &tr, EST_Track &d, int regression_length=3)
Definition: delta.cc:49
float & t(int i=0)
return time position of frame i
Definition: EST_Track.h:477
static void window_signal(const EST_Wave &sig, EST_WindowFunc *make_window, int start, int size, EST_TBuffer< float > &frame)
Definition: EST_Window.cc:275
EST_FVector design_lowpass_FIR_filter(int sample_rate, int freq, int order)
Definition: filter.cc:504
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:118
void FIRlowpass_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:524
void power(EST_Wave &sig, EST_Track &a, float factor)
Definition: sigpr_utt.cc:420
static void make_window(EST_TBuffer< float > &window_vals, int size, const char *name, int window_centre)
Definition: EST_Window.cc:255
void sig2coef(EST_Wave &sig, EST_Track &a, EST_String type, float factor=2.0, EST_WindowFunc *wf=EST_Window::creator(DEFAULT_WINDOW_NAME))
Definition: sigpr_utt.cc:397
void clear(void)
remove all items in list
Definition: EST_TList.h:246
void sub_track(EST_Track &st, int start_frame=0, int nframes=EST_ALL, int start_chan=0, int nchans=EST_ALL)
Definition: EST_Track.cc:1097
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
Definition: EST_Track.cc:1309
int get_frame_size(EST_Track &pms, int current_pos, int sample_rate, int prefer_prev=0)
Definition: sigpr_utt.cc:315
short & a(int i, int channel=0)
Definition: EST_Wave.cc:126
void FIRlowpass_double_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:547
void resize(int num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:211
int Stringtoi(EST_String s)
Make an int from a EST_String. EST_String equivalent of atoi()
Definition: util_io.cc:128
void sigpr_delta(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:307
void EST_WindowFunc(int size, EST_TBuffer< float > &r_window, int window_centre)
Function which creates a window.
Definition: EST_Window.h:52
void FIRhighpass_filter(EST_Wave &in_sig, int freq, int order)
Definition: filter.cc:532
void sigpr_acc(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:300
float end()
return the time position of the last sample.
Definition: EST_Wave.h:153
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:198
int sample_rate() const
return the sampling rate (frequency)
Definition: EST_Wave.h:147
void FIRhighpass_double_filter(EST_Wave &sigin, int freq, int order=DEFAULT_FILTER_ORDER)
Definition: filter.cc:582
int num_frames() const
return number of frames in track
Definition: EST_Track.h:650
void frame(EST_FVector &fv, int n, int startf=0, int nf=EST_ALL)
Definition: EST_Track.h:209
void fill_time(float t, int start=1)
Definition: EST_Track.cc:786
void sig2lpc(const EST_FVector &sig, EST_FVector &acf, EST_FVector &ref, EST_FVector &lpc)
Definition: sigpr_frame.cc:432
void sigpr_base(EST_Wave &sig, EST_Track &fv, EST_Features &op, const EST_StrList &slist)
Definition: sigpr_utt.cc:136
EST_FVector design_highpass_FIR_filter(int sample_rate, int freq, int order)
Definition: filter.cc:510