Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
wfst_run_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : December 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Run a WFST on some data, either as a recognizer or as a transducer */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <cstdio>
41 #include <iostream>
42 #include <fstream>
43 #include <cstring>
44 #include "EST.h"
45 #include "EST_simplestats.h"
46 #include "EST_WFST.h"
47 
48 static int wfst_run_main(int argc, char **argv);
49 
50 
51 int main(int argc, char **argv)
52 {
53 
54  wfst_run_main(argc,argv);
55 
56  exit(0);
57  return 0;
58 }
59 
60 static int wfst_run_main(int argc, char **argv)
61 {
62  // recognize/transduce
63  EST_Option al;
64  EST_StrList files;
65  EST_Litem *f;
66  EST_String wfstfile;
67  FILE *ofd;
68  int r;
69  EST_SuffStats R;
70  float sumlogp=0,isumlogp;
71  float count=0,icount;
72 
73  parse_command_line
74  (argc, argv,
75  EST_String("[WFSTFILE] [input file0] ... [-o output file]\n")+
76  "Summary: Recognize/transduce using a WFST on data\n"+
77  "-wfst <ifile> The WFST to use\n"+
78  "-transduce Transduce input to output (default)\n"+
79  "-recog Recognize input consists of pairs\n"+
80  "-cumulate_into <ofile>\n"+
81  " Cumulate transitions to give new weights\n"+
82  " save new WFST into ofile\n"+
83  "-itype <string> char or token\n"+
84  "-quiet No extraneous messages\n"+
85  "-perplexity Calculate perplexity on given data set\n"+
86  "-heap <int> {210000}\n"+
87  " Set size of Lisp heap, needed for large wfsts\n"+
88  "-o <ofile> Output file for transduced forms\n",
89  files, al);
90 
91  if (al.present("-o"))
92  {
93  if ((ofd=fopen(al.val("-o"),"w")) == NULL)
94  EST_error("can't open output file for writing \"%s\"",
95  (const char *)al.val("-o"));
96  }
97  else
98  ofd = stdout;
99 
100  if (al.present("-wfst"))
101  wfstfile = al.val("-wfst");
102  else
103  EST_error("no WFST specified");
104 
105  siod_init(al.ival("-heap"));
106 
107  EST_WFST wfst;
108  EST_TokenStream ts;
109 
110  if (wfst.load(wfstfile) != format_ok)
111  EST_error("failed to read WFST from \"%s\"",
112  (const char *)wfstfile);
113 
114  if (al.present("-cumulate_into"))
115  wfst.start_cumulate();
116 
117  for (f=files.head(); f != 0; f=f->next())
118  {
119  if (files(f) == "-")
120  ts.open(stdin,FALSE);
121  else
122  if (ts.open(files(f)) != 0)
123  EST_error("failed to read WFST data file from \"%s\"",
124  (const char *)files(f));
125 
126  // Not the best way to input things but will do the the present
127  while(!ts.eof())
128  {
129  EST_StrList ostrs,istrs;
130  do
131  istrs.append(ts.get());
132  while((!ts.eof()) && (!ts.eoln()));
133 
134  if (al.present("-recog"))
135  {
136  if (al.present("-perplexity"))
137  {
138  r = recognize_for_perplexity(wfst,istrs,
139  al.present("-quiet"),
140  icount,
141  isumlogp);
142  if (r)
143  {
144  count += icount;
145  sumlogp += isumlogp;
146  }
147  }
148  else
149  r = recognize(wfst,istrs,al.present("-quiet"));
150  }
151  else
152  {
153  r = transduce(wfst,istrs,ostrs);
154  if (r)
155  {
156  cout << ostrs;
157  cout << endl;
158  }
159  }
160  R += r;
161 
162  if (!al.present("-quiet"))
163  {
164  if (r)
165  cout << "OK." << endl;
166  else
167  cout << "failed." << endl;
168  }
169  }
170  ts.close();
171  }
172 
173  if (al.present("-cumulate_into"))
174  {
175  wfst.stop_cumulate();
176  if (wfst.save(al.val("-cumulate_into")) != write_ok)
177  EST_error("failed to write cumulated WFST to \"%s\"",
178  (const char *)al.val("-cumulate_into"));
179  }
180 
181  printf("total %d OK %f%% failed %f%%\n",
182  (int)R.samples(),R.mean()*100,(1-R.mean())*100);
183  if (al.present("-perplexity"))
184  {
185  printf("perplexity is %f\n", pow(float(2.0),float(-1.0 * (sumlogp/count))));
186  }
187 
188  if (ofd != stdout)
189  fclose(ofd);
190 
191  if (R.mean() == 1) // true is *all* files were recognized
192  return 0;
193  else
194  return -1;
195 }
196 
EST_TokenStream & get(EST_Token &t)
get next token in stream
Definition: EST_Token.cc:486
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:76
double mean(void) const
mean of currently cummulated values
void close(void)
Close stream.
Definition: EST_Token.cc:406
int open(const EST_String &filename)
open a EST_TokenStream for a file.
Definition: EST_Token.cc:200
int eof()
end of file
Definition: EST_Token.h:363
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:198
double samples(void)
number of samples in set
int eoln()
end of line
Definition: EST_Token.cc:818