49 static int align_main(
int argc,
char **argv);
55 int &total,
int &ins,
int &del,
int &sub,
int &correct);
67 int main(
int argc,
char **argv)
70 align_main(argc,argv);
76 static int align_main(
int argc,
char **argv)
87 "Summary: align an hypothesis with a reference string\n"+
88 "-rfile <ifile> Reference file\n"+
89 "-hfile <ifile> Hypothesis file\n"+
90 "-rstring <string> Reference string\n"+
91 "-hstring <string> Hypothesis string\n"+
93 " Supported formats: strings, nisttool\n",
97 outfile = al.
val(
"-o");
102 format = al.
val(
"-format");
106 if (format ==
"strings")
108 else if (format ==
"nisttool")
111 cout <<
"Unknown or unhandled format: " << format << endl;
119 float ins,
float del,
float sub);
126 int total,ins,del,sub,correct;
128 load_sentence(u,
"ref",refStr);
129 load_sentence(u,
"hypo",hypStr);
130 align(u,
"ref",
"hypo",
"align");
131 align_score(u,
"ref",
"hypo",
"align",total,ins,del,sub,correct);
132 fprintf(stdout,
"words %d\n",total);
133 fprintf(stdout,
"insertions %d\n",ins);
134 fprintf(stdout,
"deletions %d\n",del);
135 fprintf(stdout,
"substitutions %d\n",sub);
136 fprintf(stdout,
"correct %d\n",correct);
137 fprintf(stdout,
"WER %f\n",(100.0 * (
float)(ins+del+sub))/total);
150 int total,ins,del,sub,correct;
151 int s_total,s_ins,s_del,s_sub,s_correct;
155 s_total=s_ins=s_del=s_sub=s_correct=0;
161 load_sentence(u,
"ref",rts);
162 load_sentence(u,
"hypo",hts);
165 if ((!r->name().
matches(
id)) ||
166 (r->name() != h->name()))
168 cerr <<
"Align: failed to match sentence " <<
169 sents <<
" at id " << r->name() << endl;
176 align(u,
"ref",
"hypo",
"align");
183 align_score(u,
"ref",
"hypo",
"align",
184 total,ins,del,sub,correct);
189 s_correct += correct;
196 fprintf(stdout,
"sentences %d\n",sents);
197 fprintf(stdout,
"words %d\n",s_total);
198 fprintf(stdout,
"insertions %d\n",s_ins);
199 fprintf(stdout,
"deletions %d\n",s_del);
200 fprintf(stdout,
"substitutions %d\n",s_sub);
201 fprintf(stdout,
"correct %d\n",s_correct);
202 fprintf(stdout,
"WER %f\n",(100.0 * (
float)(s_ins+s_del+s_sub))/s_total);
214 i->set_name(ts.
get());
216 while ((!ts.
eoln()) && (!ts.
eof()));
228 for (iter.
begin(strlist); iter; ++iter)
238 int &total,
int &ins,
int &del,
int &sub,
int &correct)
242 total=ins=del=correct=sub=0;
244 for (ri=u.
relation(refrel)->first(),
247 ri=ri->next(),hi=hi->next())
249 for ( ; (as(hi,alignrel) == 0) && hi ; hi=hi->next())
251 fprintf(stdout,
"inserted: %s\n",(
const char *)hi->name());
254 for ( ; (
daughter1(ri,alignrel) == 0) && ri; ri=ri->next())
256 fprintf(stdout,
"deleted: %s\n",(
const char *)ri->name());
261 if (name_distance(ri,
daughter1(ri,alignrel)) == 0)
263 fprintf(stdout,
"correct: %s\n",(
const char *)ri->name());
268 fprintf(stdout,
"substituted: %s\n",(
const char *)ri->name());
273 for ( ; hi ; hi=hi->next())
275 fprintf(stdout,
"inserted: %s\n",(
const char *)hi->name());
290 if ((rname == hname) ||
291 (downcase(rname) == downcase(hname)))
310 float to_insert,to_del,to_subs;
319 dpt(0,0) = subs_cost * name_distance(ri,hi);
321 for (i=1; i<r_size+1; i++)
323 dpt(i,0) = insdel_cost + dpt(i-1,0);
326 for (j=1; j < h_size+1; j++)
328 dpt(0,j) = insdel_cost + dpt(0,j-1);
333 for (i=1; ri; ri=ri->next(),i++)
336 hi = utt.
relation(hyporel)->first();
337 for (j=1; hi; hi=hi->next(),j++)
339 cost = name_distance(ri,hi);
340 to_insert = insdel_cost + dpt(i,j-1);
341 to_del = insdel_cost + dpt(i-1,j);
342 to_subs = (cost * subs_cost) + dpt(i-1,j-1);
343 if (to_insert < to_del)
345 if (to_insert < to_subs)
347 dpt(i,j) = to_insert;
358 if (to_del < to_subs)
380 for (i=r_size,j=h_size,
383 ri; i--,ri=ri->prev())
385 while (dpp(i,j) == 1)
EST_TokenStream & get(EST_Token &t)
get next token in stream
EST_Item * append_daughter(EST_Item *n, EST_Item *p=0)
EST_Relation * create_relation(const EST_String &relname)
create a new relation called n.
A Regular expression class to go with the CSTR EST_String class.
void close(void)
Close stream.
int open(const EST_String &filename)
open a EST_TokenStream for a file.
void StringtoStrList(EST_String s, EST_StrList &l, EST_String sep)
Convert a EST_String to a EST_StrList by separating tokens in s delimited by the separator sep...
const int present(const K &rkey) const
Returns true if key is present.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void begin(const Container &over)
Set the iterator ready to run over this container.
EST_Relation * relation(const char *name, int err_on_not_found=1) const
get relation by name
int matches(const char *e, int pos=0) const
Exactly match this string?
EST_Item * daughter1(const EST_Item *n)
return first daughter of n