45 #include "EST_simplestats.h"
48 static int wfst_run_main(
int argc,
char **argv);
51 int main(
int argc,
char **argv)
54 wfst_run_main(argc,argv);
60 static int wfst_run_main(
int argc,
char **argv)
70 float sumlogp=0,isumlogp;
75 EST_String(
"[WFSTFILE] [input file0] ... [-o output file]\n")+
76 "Summary: Recognize/transduce using a WFST on data\n"+
77 "-wfst <ifile> The WFST to use\n"+
78 "-transduce Transduce input to output (default)\n"+
79 "-recog Recognize input consists of pairs\n"+
80 "-cumulate_into <ofile>\n"+
81 " Cumulate transitions to give new weights\n"+
82 " save new WFST into ofile\n"+
83 "-itype <string> char or token\n"+
84 "-quiet No extraneous messages\n"+
85 "-perplexity Calculate perplexity on given data set\n"+
86 "-heap <int> {210000}\n"+
87 " Set size of Lisp heap, needed for large wfsts\n"+
88 "-o <ofile> Output file for transduced forms\n",
93 if ((ofd=fopen(al.
val(
"-o"),
"w")) == NULL)
94 EST_error(
"can't open output file for writing \"%s\"",
95 (
const char *)al.
val(
"-o"));
101 wfstfile = al.
val(
"-wfst");
103 EST_error(
"no WFST specified");
105 siod_init(al.
ival(
"-heap"));
110 if (wfst.load(wfstfile) != format_ok)
111 EST_error(
"failed to read WFST from \"%s\"",
112 (
const char *)wfstfile);
114 if (al.
present(
"-cumulate_into"))
115 wfst.start_cumulate();
117 for (f=files.head(); f != 0; f=f->next())
120 ts.
open(stdin,FALSE);
122 if (ts.
open(files(f)) != 0)
123 EST_error(
"failed to read WFST data file from \"%s\"",
124 (
const char *)files(f));
132 while((!ts.
eof()) && (!ts.
eoln()));
138 r = recognize_for_perplexity(wfst,istrs,
149 r = recognize(wfst,istrs,al.
present(
"-quiet"));
153 r = transduce(wfst,istrs,ostrs);
165 cout <<
"OK." << endl;
167 cout <<
"failed." << endl;
173 if (al.
present(
"-cumulate_into"))
175 wfst.stop_cumulate();
176 if (wfst.save(al.
val(
"-cumulate_into")) != write_ok)
177 EST_error(
"failed to write cumulated WFST to \"%s\"",
178 (
const char *)al.
val(
"-cumulate_into"));
181 printf(
"total %d OK %f%% failed %f%%\n",
185 printf(
"perplexity is %f\n", pow(
float(2.0),
float(-1.0 * (sumlogp/count))));
EST_TokenStream & get(EST_Token &t)
get next token in stream
a call representing a weighted finite-state transducer
int ival(const EST_String &rkey, int m=1) const
double mean(void) const
mean of currently cummulated values
void close(void)
Close stream.
int open(const EST_String &filename)
open a EST_TokenStream for a file.
const int present(const K &rkey) const
Returns true if key is present.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void append(const T &item)
add item onto end of list
double samples(void)
number of samples in set