Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
track_example.cc
1  /************************************************************************/
2  /* */
3  /* Centre for Speech Technology Research */
4  /* University of Edinburgh, UK */
5  /* Copyright (c) 1996,1997 */
6  /* All Rights Reserved. */
7  /* */
8  /* Permission is hereby granted, free of charge, to use and distribute */
9  /* this software and its documentation without restriction, including */
10  /* without limitation the rights to use, copy, modify, merge, publish, */
11  /* distribute, sublicense, and/or sell copies of this work, and to */
12  /* permit persons to whom this work is furnished to do so, subject to */
13  /* the following conditions: */
14  /* 1. The code must retain the above copyright notice, this list of */
15  /* conditions and the following disclaimer. */
16  /* 2. Any modifications must be clearly marked as such. */
17  /* 3. Original authors' names are not deleted. */
18  /* 4. The authors' names are not used to endorse or promote products */
19  /* derived from this software without specific prior written */
20  /* permission. */
21  /* */
22  /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23  /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24  /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25  /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26  /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27  /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28  /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29  /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30  /* THIS SOFTWARE. */
31  /* */
32  /*************************************************************************/
33  /* */
34  /* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35  /* Date: Fri May 9 1997 */
36  /* ------------------------------------------------------------------- */
37  /* Example of declaration and use of tracks. */
38  /* */
39  /*************************************************************************/
40 
41 
42 #include <iostream>
43 #include <cstdlib>
44 #include "EST_Track.h"
45 #include "EST_Wave.h"
46 #include "EST_sigpr.h"
47 #include "EST_error.h"
48 
49 
50 int main(void)
51 
52 {
53  int i, j;
54 
55  /* This program is designed as an example not as something to run
56  so for testing purpose it simply exists */
57  exit(0);
58 
59  //@ code
60  EST_Track tr; // default track declaration
61  EST_Track tra(500, 10); // allocate track with 500 frames and 10 channels
62  //@ endcode
63 
64 
65  //@ code
66  tr.resize(10, 500); // resize track to have 10 frames and 500 channels
67  tr.resize(500, 10); // resize track to have 500 frames and 10 channels
68  //@ endcode
69 
70  /* by default, resizing preserves values in the track. This
71  may involve copying some information, so if the existing values
72  are not needed, a flag can be set which usually results in
73  quicker resizing
74  */
75  //@ code
76  tr.resize(250, 5, 0); // throw away any existing values
77  //@ endcode
78 
79  //@ code
80  tr.set_num_channels(10); // makes 10 channels, keeps same no of frames
81 
82  tr.set_num_frames(400); // makes 400 frames, keeps same no of channels
83  //@ endcode
84 
85 
86  //@ code
87  tr.resize(500, 10);
88 
89  for (i = 0; i < tr.num_frames(); ++i)
90  for (j = 0; j < tr.num_channels(); ++j)
91  tr.a(i, j) = -5.0;
92 
93  //@ endcode
94 
95  /** A well formed track will have a time value, specified in seconds,
96  for every frame. The time array can be filled directly:
97  */
98  //@ code
99  for (i = 0; i < tr.num_frames(); ++i)
100  tr.t(i) = (float) i * 0.01;
101  //@ endcode
102 
103 
104  //@ code
105  tr.fill_time(0.1);
106 
107  //@ endcode
108 
109  //@ code
110  for (i = 50; i < 100; ++i)
111  tr.set_break(i);
112  //@ endcode
113 
114  //@ code
115  for (i = 50; i < 100; ++i)
116  tr.set_value(i);
117  //@ endcode
118 
119  //@ code
120  if (tr.val(60))
121  cout << "Frame 60 is not a break\n";
122 
123  if (tr.track_break(60))
124  cout << "Frame 60 is a break\n";
125  //@ endcode
126 
127 
128  //@ code
129  tr.set_channel_name("F0", 0);
130  tr.set_channel_name("energy", 1);
131  //@ endcode
132 
133  //@ code
134  EST_StrList map;
135  map.append("F0");
136  map.append("energy");
137 
138  tr.resize(500, map); // this makes a 2 channel track and sets the names to F0 and energy
139  //@ endcode
140 
141  //@ code
142 
143  map.clear();
144  map.append("F0");
145  map.append("energy");
146 
147  map.append("cep_0");
148  map.append("cep_1");
149  map.append("cep_2");
150  map.append("cep_3");
151  map.append("cep_4");
152  map.append("cep_5");
153  map.append("cep_6");
154  map.append("cep_7");
155  map.append("cep_N");
156 
157  tr.resize(500, map); // makes a 11 channel track and sets the names
158  //@ endcode
159 
160 
161  //@ code
162  map.clear();
163  map.append("F0");
164  map.append("energy");
165 
166  map.append("$cep-0+8");
167 
168  tr.resize(500, map); // does exactly as above
169  //@ endcode
170 
171  //@}
172 
173 
174  /*Access single frames or single channels. */
175 
176  //@ code
177  EST_FVector tmp_frame;
178 
179  tr.frame(tmp_frame, 50);
180  //@ endcode
181 
182  //@ code
183  EST_FVector tmp_channel;
184 
185  tr.channel(tmp_channel, 5);
186  //@ endcode
187 
188  //@ code
189  tr.channel(tmp_channel, "energy");
190  //@ endcode
191 
192  //@ code
193  tr.frame(tmp_frame, 50, 2, 9);
194  //@ endcode
195 
196  //@ code
197  tr.channel(tmp_channel, 5, 400, 100);
198  //@ endcode
199 
200  //@ code
201  EST_Track sub;
202 
203  tr.sub_track(sub, 0, EST_ALL, 2, 9);
204 
205  //@ endcode
206 
207  //@ code
208  EST_Wave sig;
209 
210  melcep(sig, sub, 1.0, 20, 22);
211  //@ endcode
212 
213  //@ code
214 
215  tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
216  //@ endcode
217 
218  //@ code
219  tr.sub_track(sub, 0, EST_ALL, "cep_0", "cep_N");
220  //@ endcode
221 
222  //@ code
223  tr.sub_track(sub, 47, 39, "cep_0", "cep_N");
224  //@ endcode
225 
226 
227  //@ code
228  EST_Track::Entries frames;
229 
230  // print out the time of every 50th track
231  cout << "Times:";
232 
233  for (frames.begin(tr); frames; ++frames)
234  {
235  const EST_Track &frame = *frames;
236  if (frames.n() % 50 ==0)
237  cout << " " << frames.n() << "[" << frame.t() << "]";
238  }
239  cout << "\n";
240 
241  //@ endcode
242 
243  //@ code
244  EST_Track tr_copy;
245 
246 // tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
247  //@ endcode
248 
249 
250  //@ code
251  float *channel_buf, *frame_buf;
252  channel_buf = new float[tr.num_frames()];
253  frame_buf = new float[tr.num_channels()];
254 
255  tr.copy_channel_out(5, channel_buf); // copy channel 5 into channel_buf
256  tr.copy_frame_out(43, frame_buf); // copy frame 4 into frame_buf
257  //@ endcode
258 
259 
260  //@ code
261  tr.copy_channel_in(5, channel_buf); // copy channel_buf into channel 5
262  tr.copy_frame_in(43, frame_buf); // copy frame_buf into frame 4
263  //@ endcode
264 
265  //@ code
266  EST_StrList aux_names;
267 
268  aux_names.append("voicing");
269  aux_names.append("join_points");
270  aux_names.append("cost");
271 
272  tr.resize_aux(aux_names);
273  //@ endcode
274 
275  //@ code
276 
277  for (i = 0; i < 500; ++i)
278  {
279  tr.aux(i, "voicing") = i;
280  tr.aux(i, "join_points") = EST_String("stuff");
281  tr.aux(i, "cost") = 0.111;
282  }
283  //@ endcode
284 
285  /* File I/O */
286 
287  //@ code
288  if (tr.save("tmp/track.htk", "htk") != write_ok)
289  EST_error("can't save htk file\n");
290  //@ endcode
291 
292  //@ code
293  if (tr.save("tmp/track.est", "est") != write_ok)
294  EST_error("can't save est file\n");
295  //@ endcode
296 
297  //@ code
298  if (tr.save("tmp/track.ascii", "ascii") != write_ok)
299  EST_error("can't save ascii file\n");
300  //@ endcode
301 
302  //@ code
303  EST_Track tr2;
304  if (tr2.load("tmp/track.htk") != read_ok)
305  EST_error("can't reload htk\n");
306  //@ endcode
307 
308  //@ code
309  if (tr.load("tmp/track.ascii", 0.01) != read_ok)
310  EST_error("can't reload ascii file\n");
311  //@ endcode
312 
313  exit(0);
314 }
315 
316 
317 
318 /** @page EST_Track-example EST_Track class example code
319  @tableofcontents
320  @brief Some examples of track manipulations.
321  @dontinclude track_example.cc
322 
323  @section initializing Initialising and Resizing a Track
324 
325  The constructor functions can be used to create a track with
326  zero frames and channels or a track with a specified number of
327  frames and channels
328 
329  @skipline //@ code
330  @until //@ endcode
331 
332 
333  tracks can be resized at any time:
334 
335  @skipline //@ code
336  @until //@ endcode
337 
338  by default, resizing preserves values in the track. This
339  may involve copying some information, so if the existing values
340  are not needed, a flag can be set which usually results in
341  quicker resizing
342 
343  @skipline //@ code
344  @until //@ endcode
345 
346  If only the number of channels or the number of frames needs
347  to be changed, this an be done with the following functions:
348  @skipline //@ code
349  @until //@ endcode
350 
351  The preserve flag works in the same way with these functions
352 
353  @section simple_access Simple Access
354 
355  Values in the track can be accessed and set by frame
356  number and channel number.
357 
358  The following resizes a track to have 500 frames and 10 channels
359  and fills every position with -5.
360 
361  @skipline //@ code
362  @until //@ endcode
363 
364  A well formed track will have a time value, specified in seconds,
365  for every frame. The time array can be filled directly:
366 
367  @skipline //@ code
368  @until //@ endcode
369 
370  which fills the time array with values 0.01, 0.02,
371  0.03... 5.0. However, A shortcut function is provided for fixed
372  frame spacing:
373  @skipline //@ code
374  @until //@ endcode
375 
376  which performs the same operation as above. Frames do not have
377  to be evenly spaced, in pitch synchronous processing the time
378  array holds the time position of each pitch period. In such
379  cases each position in the time array must obviously be set
380  individually.
381 
382  Some representations have undefined values during certain
383  sections of the track, for example the F0 value during
384  unvoiced speech.
385 
386  The break/value array can be used to specify if a frame has an
387  undefined value.
388 
389  If a frame in this array is 1, that means the amplitude is defined
390  at that point. If 0, the amplitude is undefined.
391  By default, every frame has a value.
392 
393  Breaks (undefined values) can be set by EST_Track::set_break().
394  The following sets every frame from 50 to 99 as a break:
395 
396  @skipline //@ code
397  @until //@ endcode
398 
399  frames can be turned back to values as follows:
400 
401  @skipline //@ code
402  @until //@ endcode
403 
404  It is up to individual functions to decide how to interpret breaks.
405 
406  A frame's status can be checked as follows:
407 
408  @skipline //@ code
409  @until //@ endcode
410 
411  @section est_trac_naming_channels Naming Channels
412 
413  While channels can be accessed by their index, it is often useful
414  to give them names and refer to them by those names.
415 
416  The EST_Track::set_channel_name() function sets the name of a
417  single channel:
418 
419  @skipline //@ code
420  @until //@ endcode
421 
422  An alternative is to use a predefined set of channel names
423  stored in a *map*. A track map
424  is simply a String List strings which describe a channel name
425  configuration. The EST_Track::resize function can take
426  this and resize the number of channels to the number of channels
427  indicated in the map, and give each channel its name from the
428  map. For example:
429  @skipline //@ code
430  @until //@ endcode
431 
432  A convention is used for channels which comprise
433  components of a multi-dimensional analysis such as
434  cepstra. In such cases the channels are named
435  `TYPE_I`. The last coefficient is
436  always names `TYPE_N` regardless of
437  the number of coefficients. This is very useful in extracting
438  a set of related channels without needing to know the order
439  of the analysis.
440 
441  For example, a track map might look like:
442 
443  @skipline //@ code
444  @until //@ endcode
445 
446  This obviously gets unwieldy quite quickly, so the mapping
447  mechanism provides a short hand for multi-dimensional data.
448 
449  @skipline //@ code
450  @until //@ endcode
451 
452  Here \$ indicates the special status, "cep" the name of the
453  coefficients, "-0" that the first is number 0 and "+8" that
454  there are 8 more to follow.
455 
456 
457  @section tr_example_access_single_frames Access single frames or single channels.
458 
459  Often functions perform their operations on only a single
460  frame or channel, and the track class provides a general
461  mechanism for doing this.
462 
463  Single frames or channels can be accessed as EST_FVector :
464  Given a track with 500 frames and 10 channels, the 50th frame
465  can be accessed as:
466  @skipline //@ code
467  @until //@ endcode
468 
469  now `tmp_frame` is 10 element vector, which is
470  a window into `tr`: any changes to the contents of `tmp_frame` will
471  change `tr`. `tmp_frame` cannot be resized. (This operation can
472  be thought in standard C terms as `tmp_frame` being a pointer
473  to the 5th frame of `tr`).
474 
475 
476  Likewise with channels:
477 
478  @skipline //@ code
479  @until //@ endcode
480 
481  Again, `tmp_channel` is 500 element vector, which is
482  a window into `tr`: any changes to the contents of `tmp_channel` will
483  change `tr`. `tmp_channel` cannot be resized.
484 
485 
486  Channels can also be extracted by name:
487 
488  @skipline //@ code
489  @until //@ endcode
490 
491  not all the channels need be put into the temporary frame.
492  Imagine we have a track with a F0 channel,a energy channel and
493  10 cepstrum channels. The following makes a frame from the
494  50th frame, which only includes the cepstral information in
495  channels 2 through 11
496 
497  @skipline //@ code
498  @until //@ endcode
499 
500  @skipline //@ code
501  @until //@ endcode
502 
503  @section tr_example_access_multiple_frames Access multiple frames or channels.
504 
505  In addition to extracting single frames and channels, multiple
506  frame and channel portions can be extracted in a similar
507  way. In the following example, we make a sub-track sub, which
508  points to the entire cepstrum portion of a track (channels 2
509  through 11)
510 
511  @skipline //@ code
512  @until //@ endcode
513 
514  Parameter `sub` behaves exactly like a normal
515  track in every way, except that it cannot be resized. Its
516  contents behave like a point into the designated portion of
517  `tr`, so changing `sub` will change `tr`.
518 
519  The first argument is the
520  `sub` track. The second states the start
521  frame and the total number of frames required. EST_ALL is a
522  special constant that specifies that all the frames are
523  required here. The next argument is the start channel number
524  (remember channels are numbered from 0), and the last argument
525  is the total number of channels required.
526 
527  This facility is particularly useful for using standard
528  signal processing functions efficiently. For example,
529  the \ref melcep in the signal processing library
530  takes a waveform and produces a mel-scale cepstrum. It determines
531  the order of the cepstral analysis by the number of channels in
532  the track it is given, which has already been allocated to have
533  the correct number of frames and channels.
534 
535  The following will process the waveform
536  `sig`, produce a 10th order mel cepstrum
537  and place the output in `sub`. (For
538  explanation of the other options see
539  \ref melcep
540 
541  @skipline //@ code
542  @until //@ endcode
543 
544  because we have made `sub` a window
545  into `tr`, the melcep function writes its
546  output into the correct location, i.e. channels 2-11 of tr. If
547  it were no for the sub_track facility, either a separate track
548  of the right size would be passed into melcep and then it
549  would be copied into tr (wasteful), or else tr would be passed
550  in and other arguments would have to specify which channels
551  should be written to (messy).
552 
553  Sub-tracks can also be set using channel names. The
554  following example does exactly as above, but is referenced by
555  the name of the first channel required and the number of
556  channels to follow:
557 
558  @skipline //@ code
559  @until //@ endcode
560 
561  and this specifies the end by a string also:
562 
563  @skipline //@ code
564  @until //@ endcode
565 
566  sub_tracks can be any set of continuous frames and
567  channels. For example if a word started at frame 43 and ended
568  and frame 86, the following would set a sub track to that
569  portion:
570  @skipline //@ code
571  @until //@ endcode
572 
573  We can step through the frames of a Track using a standard
574  iterator. The frames are returned as one-frame sub-tracks.
575 
576  @skipline //@ code
577  @until //@ endcode
578 
579  The EST_Track::channel, EST_Track::frame
580  and EST_Track::sub_track functions are most commonly
581  used to write into a track using a convenient
582  sub-portion. Sometimes, however a simple copy is required
583  whose contents can be written without affecting the original.
584 
585  The EST_Track::copy_sub_track function does this
586  @skipline //@ code
587  @until //@ endcode
588 
589  Individual frames and channels can be copied out into
590  pre-allocated float * arrays as follows:
591 
592  @skipline //@ code
593  @until //@ endcode
594 
595  Individual frames and channels can be copied into the track
596  from float * arrays as follows:
597 
598  @skipline //@ code
599  @until //@ endcode
600 
601 
602 
603  @section auxiliary Auxiliary Channels
604  Auxiliary channels are used for storing frame information other than
605  amplitude coefficients, for example voicing decisions and points of
606  interest in the track.
607 
608  Auxiliary channels always have the same number of frames as the
609  amplitude channels. They are resized by assigning names to the
610  channels that need to be created:
611 
612  @skipline //@ code
613  @until //@ endcode
614 
615  The following fills in these three channels with some values:
616  @skipline //@ code
617  @until //@ endcode
618 
619 
620  @section file_i_o File I/O
621  Tracks in various formats can be saved and loaded:
622 
623  Save as a HTK file:
624 
625  @skipline //@ code
626  @until //@ endcode
627 
628  Save as a EST file:
629 
630  @skipline //@ code
631  @until //@ endcode
632 
633  Save as an ascii file:
634  @skipline //@ code
635  @until //@ endcode
636 
637  The file type is automatically determined from the file's
638  header during loading:
639 
640  @skipline //@ code
641  @until //@ endcode
642 
643  If no header is found, the function assumes the
644  file is ascii data, with a fixed frame shift, arranged with rows
645  representing frames and columns channels. In this case, the
646  frame shift must be specified as an argument to this function:
647  @skipline //@ code
648  @until //@ endcode
649 
650 */
651 
A class for storing digital waveforms. The waveform is stored as an array of 16 bit shorts...
Definition: EST_Wave.h:64
void set_num_frames(int n, bool preserve=1)
Definition: EST_Track.h:177
void set_value(int i)
set frame i to be a value
Definition: EST_Track.cc:131
void set_break(int i)
set frame i to be a break
Definition: EST_Track.cc:122
float & t(int i=0)
return time position of frame i
Definition: EST_Track.h:477
float & a(int i, int c=0)
Definition: EST_Track.cc:1022
A vector class for floating point numbers. EST_FVector x should be used instead of float *x wherever ...
Definition: EST_FMatrix.h:118
void set_channel_name(const EST_String &name, int channel)
set the name of the channel.
Definition: EST_Track.cc:166
int num_channels() const
return number of channels in track
Definition: EST_Track.h:656
unsigned int n() const
Return the current position.
void set_num_channels(int n, bool preserve=1)
Definition: EST_Track.h:169
void melcep(EST_Wave &sig, EST_Track &mfcc_track, float factor, int fbank_order, float liftering_parameter, EST_WindowFunc *wf=EST_Window::creator(DEFAULT_WINDOW_NAME), const bool include_c0=false, const bool up=false)
Definition: sigpr_utt.cc:536
void clear(void)
remove all items in list
Definition: EST_TList.h:246
void sub_track(EST_Track &st, int start_frame=0, int nframes=EST_ALL, int start_chan=0, int nchans=EST_ALL)
Definition: EST_Track.cc:1097
EST_read_status load(const EST_String name, float ishift=0.0, float startt=0.0)
Definition: EST_Track.cc:1309
void resize(int num_frames, int num_channels, bool preserve=1)
Definition: EST_Track.cc:211
EST_write_status save(const EST_String name, const EST_String EST_filetype="")
Definition: EST_Track.cc:1230
void copy_channel_out(int n, float *buf, int offset=0, int num=EST_ALL) const
Definition: EST_Track.h:310
int val(int i) const
return true if frame i is a value
Definition: EST_Track.cc:539
void resize_aux(EST_StrList &map, bool preserve=1)
Definition: EST_Track.cc:311
void copy_frame_out(int n, float *buf, int offset=0, int num=EST_ALL) const
Definition: EST_Track.h:320
void copy_frame_in(int n, const float *buf, int offset=0, int num=EST_ALL)
Definition: EST_Track.h:345
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:198
void copy_channel_in(int n, const float *buf, int offset=0, int num=EST_ALL)
Definition: EST_Track.h:328
void channel(EST_FVector &cv, int n, int startf=0, int nf=EST_ALL)
Definition: EST_Track.h:214
int track_break(int i) const
return true if frame i is a break
Definition: EST_Track.h:633
void begin(const Container &over)
Set the iterator ready to run over this container.
int num_frames() const
return number of frames in track
Definition: EST_Track.h:650
void frame(EST_FVector &fv, int n, int startf=0, int nf=EST_ALL)
Definition: EST_Track.h:209
void fill_time(float t, int start=1)
Definition: EST_Track.cc:786