Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
wfst_build_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* Build a WFST from some base: */
37 /* 1 a set of context dependent rewrite rules using the */
38 /* the algorithms from "An Efficient Compiler for Weighted Rewrite */
39 /* Rules", by Mehryar Mohri and Richard Sproat ACL 1996 */
40 /* and information from the techniques in Rithie el al. 1992 */
41 /* 2 A regular grammar (but can be written as a CFG as long as it */
42 /* contains no centre embedding */
43 /* 3 A regular expression */
44 /* 4 lts rules (but that doesn't work yet) */
45 /* */
46 /* or apply some operator on existing wfst(s): compose, concatenate, */
47 /* difference, union, */
48 /* */
49 /* Also allow determinizing and minimization as required */
50 /* */
51 /*=======================================================================*/
52 #include <cstdlib>
53 #include <cstdio>
54 #include <iostream>
55 #include <fstream>
56 #include <cstring>
57 #include "EST.h"
58 #include "EST_WFST.h"
59 
60 static int wfst_build_main(int argc, char **argv);
61 
62 
63 int main(int argc, char **argv)
64 {
65 
66  wfst_build_main(argc,argv);
67 
68  exit(0);
69  return 0;
70 }
71 
72 static int wfst_build_main(int argc, char **argv)
73 {
74  // Top level function generates a WFST from rules
75  EST_Option al;
76  EST_StrList files;
77  EST_String outfile;
78 
79  parse_command_line
80  (argc, argv,
81  EST_String("[option] [rulefile0] [rulefile1] ...\n")+
82  "Summary: Build a weighted finite state transducer from rules/wfsts\n"+
83  "-type <string> {kk} Input rule type: kk, lts, rg, tl, compose, regex\n"+
84  " union, intersect, concat, asis\n"+
85  "-determinize Determinize WFST before saving it\n"+
86  "-detmin Determinize and minimize WFST before saving it\n"+
87  "-o <ofile> Output file for saved WFST (default stdout)\n"+
88  "-otype <string> {ascii}\n"+
89  " Output type, ascii or binary\n"+
90  "-heap <int> {210000}\n"+
91  " Set size of Lisp heap, needed for large rulesets\n"+
92  "-q Quiet mode, no summary generated\n",
93  files, al);
94 
95  if (al.present("-o"))
96  outfile = al.val("-o");
97  else
98  outfile = "-";
99 
100  siod_init(al.ival("-heap"));
101 
102  LISP ruleset;
103  LISP inalpha, outalpha;
104  EST_WFST *wfst = new EST_WFST;
105  gc_protect(&ruleset);
106 
107  if (al.val("-type") == "kk")
108  {
109  ruleset = car(vload(files(files.head()),1));
110  kkcompile(ruleset,*wfst);
111  }
112  else if (al.val("-type") == "lts")
113  {
114  ruleset = car(vload(files(files.head()),1));
115  ltscompile(ruleset,*wfst);
116  }
117  else if (al.val("-type") == "rg")
118  {
119  ruleset = car(vload(files(files.head()),1));
120  rgcompile(ruleset,*wfst);
121  }
122  else if (al.val("-type") == "tl")
123  {
124  ruleset = car(vload(files(files.head()),1));
125  tlcompile(ruleset,*wfst);
126  }
127  else if (al.val("-type") == "asis")
128  {
129  if (wfst->load(files.nth(0)) != format_ok) exit(-1);
130  }
131  else if (al.val("-type") == "compose")
132  {
133  EST_WFST a,b;
134 
135  if (files.length() != 2)
136  EST_error("compose requires two WFSTs to combine");
137 
138  if (a.load(files.nth(0)) != format_ok) exit(-1);
139  if (b.load(files.nth(1)) != format_ok) exit(-1);
140 
141  wfst->compose(a,b);
142  }
143  else if (al.val("-type") == "union")
144  {
145  EST_WFST a,b;
146 
147  if (files.length() != 2)
148  EST_error("union requires two WFSTs to combine");
149 
150  if (a.load(files.nth(0)) != format_ok) exit(-1);
151  if (b.load(files.nth(1)) != format_ok) exit(-1);
152 
153  wfst->uunion(a,b);
154  }
155  else if (al.val("-type") == "intersect")
156  {
157  EST_WFST a,b;
158 
159  if (files.length() != 2)
160  EST_error("intersect requires two WFSTs to combine");
161  if (a.load(files.nth(0)) != format_ok) exit(-1);
162  if (b.load(files.nth(1)) != format_ok) exit(-1);
163 
164  wfst->intersection(a,b);
165  }
166  else if (al.val("-type") == "concat")
167  {
168  EST_WFST a,b;
169 
170  if (files.length() != 2)
171  EST_error("concat requires two WFSTs to combine");
172  if (a.load(files.nth(0)) != format_ok) exit(-1);
173  if (b.load(files.nth(1)) != format_ok) exit(-1);
174 
175  wfst->concat(a,b);
176  }
177  else if (al.val("-type") == "difference")
178  {
179  EST_WFST a,b;
180 
181  if (files.length() != 2)
182  EST_error("difference requires two WFSTs to combine");
183  if (a.load(files.nth(0)) != format_ok) exit(-1);
184  if (b.load(files.nth(1)) != format_ok) exit(-1);
185 
186  wfst->difference(a,b);
187  }
188  else if (al.val("-type") == "regex")
189  {
190  ruleset = car(vload(files(files.head()),1));
191  inalpha = siod_nth(0,ruleset);
192  outalpha = siod_nth(1,ruleset);
193  wfst->build_from_regex(inalpha,outalpha,car(cdr(cdr(ruleset))));
194  }
195  else
196  {
197  cerr << "wfst_build: unknown rule type \"" << al.val("-type")
198  << "\"" << endl;
199  exit(-1);
200  }
201 
202  if (al.present("-determinize"))
203  {
204  EST_WFST *dwfst = new EST_WFST;
205  dwfst->determinize(*wfst);
206  if (!al.present("-q"))
207  {
208  cout << "wfst_build summary: " << endl;
209  cout << " non-deterministic wfst: " <<
210  wfst->summary() << endl;
211  cout << " deterministic wfst: " <<
212  dwfst->summary() << endl;
213  }
214  delete wfst;
215  wfst = dwfst;
216  }
217  else if (al.present("-detmin"))
218  {
219  if (!al.present("-q"))
220  {
221  cout << "wfst_build summary: " << endl;
222  cout << " non-deterministic wfst: " <<
223  wfst->summary() << endl;
224  }
225  EST_WFST *dwfst = new EST_WFST;
226  dwfst->determinize(*wfst);
227  delete wfst;
228  if (!al.present("-q"))
229  cout << " deterministic wfst: " <<
230  dwfst->summary() << endl;
231  EST_WFST *mwfst = new EST_WFST;
232  mwfst->minimize(*dwfst);
233  if (!al.present("-q"))
234  cout << " minimized wfst: " <<
235  mwfst->summary() << endl;
236  delete dwfst;
237  wfst = mwfst;
238  }
239  else
240  {
241  if (!al.present("-q"))
242  cout << "wfst_build: " << wfst->summary() << endl;
243  }
244 
245  wfst->save(outfile,al.val("-otype"));
246  delete wfst;
247  gc_unprotect(&ruleset);
248 
249  return 0;
250 }
251 
void minimize(const EST_WFST &a)
Build minimized form of a.
Definition: wfst_ops.cc:484
a call representing a weighted finite-state transducer
Definition: EST_WFST.h:154
int ival(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:76
void intersection(EST_TList< EST_WFST > &wl)
Definition: wfst_ops.cc:356
void determinize(const EST_WFST &a)
Build determinized form of a.
Definition: wfst_ops.cc:164
void compose(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:812
EST_write_status save(const EST_String &filename, const EST_String type="ascii")
?
Definition: EST_WFST.cc:349
void concat(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:776
void uunion(EST_TList< EST_WFST > &wl)
T & nth(int n)
return the Nth value
Definition: EST_TList.h:147
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
void difference(const EST_WFST &a, const EST_WFST &b)
Definition: wfst_ops.cc:898
EST_read_status load(const EST_String &filename)
?
Definition: EST_WFST.cc:508