55 static int scfg_make_main(
int argc,
char **argv);
59 static LISP assign_probs(LISP rules,
const EST_String &domain,
61 static LISP make_all_rules(
const EST_StrList &NonTerminals,
63 static void generate_probs(
double *probs,
int num);
68 int main(
int argc,
char **argv)
71 scfg_make_main(argc,argv);
77 static int scfg_make_main(
int argc,
char **argv)
89 "Summary: Build a stochastic context free grammar\n"+
90 "-nonterms <string> Number of nonterminals or file containing them\n"+
91 "-terms <string> Number of terminals or file containing them\n"+
92 "-domain <string> {nlogp}\n"+
93 " Values to be nlogp (negative log probabilities)\n"+
94 " or prob (probabilities)\n"+
95 "-values <string> {equal}\n"+
96 " General initial scores on rules as equal or\n"
98 "-heap <int> {500000}\n"+
99 " Set size of Lisp heap, only needed for large grammars\n"+
100 "-o <ofile> File to save grammar (default stdout)\n",
104 outfile = al.
val(
"-o");
110 if (al.
val(
"-domain") ==
"nlogp")
112 else if (al.
val(
"-domain") ==
"prob")
116 cerr <<
"scfg_make: domain must be nlogp or prob" << endl;
123 if (al.
val(
"-values") ==
"equal")
125 else if (al.
val(
"-values") ==
"random")
129 cerr <<
"scfg_make: values must be equal or random" << endl;
137 make_symbols(NonTerminals,al.
ival(
"-nonterms"),
"NT");
139 load_symbols(NonTerminals,al.
val(
"-nonterms"));
143 cerr <<
"scfg_make: no nonterminals specified" << endl;
150 make_symbols(Terminals,al.
ival(
"-terms"),
"T");
152 load_symbols(Terminals,al.
val(
"-terms"));
156 cerr <<
"scfg_make: no terminals specified" << endl;
160 siod_init(al.
ival(
"-heap"));
162 rules = make_all_rules(NonTerminals,Terminals);
163 rules = assign_probs(rules,domain,values);
169 if ((fd=fopen(outfile,
"w")) == NULL)
171 cerr <<
"scfg_make: failed to open file \"" << outfile <<
172 "\" for writing" << endl;
177 for (r=rules; r != NIL; r=cdr(r))
178 pprint_to_fd(fd,car(r));
187 static LISP make_all_rules(
const EST_StrList &NonTerminals,
195 for (p=NonTerminals.head(); p != 0; p=p->next())
197 int num_rules_nt = (NonTerminals.length()*NonTerminals.length())+
199 double *probs =
new double[num_rules_nt];
200 generate_probs(probs,num_rules_nt);
202 for (q=NonTerminals.head(); q != 0; q=q->next())
203 for (r=NonTerminals.head(); r != 0; r=r->next(),i++)
204 rules = cons(cons(flocons(probs[i]),
205 cons(rintern(NonTerminals(p)),
206 cons(rintern(NonTerminals(q)),
207 cons(rintern(NonTerminals(r)),NIL)))),
209 for (q=Terminals.head(); q != 0; q=q->next(),i++)
210 rules = cons(cons(flocons(probs[i]),
211 cons(rintern(NonTerminals(p)),
212 cons(rintern(Terminals(q)),NIL))),
217 return reverse(rules);
220 static void generate_probs(
double *probs,
int num)
225 if (values ==
"equal")
227 double defp = 1.0/(float)num;
228 for (i=0; i < num; i++)
231 else if (values ==
"random")
235 for (i=0; i < num; i++)
237 probs[i] = (double)abs(rand())/(
double)0x7fff;
240 for (i=0; i < num; i++)
247 cerr <<
"scfg_make: unknown value for probability distribution"
253 static LISP assign_probs(LISP rules,
const EST_String &domain,
260 if (domain ==
"nlogp")
261 for (r=rules; r != NIL; r = cdr(r))
262 if (get_c_float(car(car(r))) == 0)
263 CAR(car(r)) = flocons(40);
265 CAR(car(r)) = flocons(-log(get_c_float(car(car(r)))));
276 for (magnitude=0,t=n; t > 0; t=t/10)
279 char *name = walloc(
char,prefix.
length()+magnitude+1);
280 char *skel = walloc(
char,prefix.
length()+5);
281 sprintf(skel,
"%s%%%02dd",(
const char *)prefix,magnitude);
283 for (i=0; i < n; i++)
285 sprintf(name,skel,i);
int ival(const EST_String &rkey, int m=1) const
const int present(const K &rkey) const
Returns true if key is present.
int length(void) const
Length of string ({not} length of underlying chunk)
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
void append(const T &item)
add item onto end of list
EST_read_status load_StrList(EST_String filename, EST_StrList &l)
Load tokens from a file and return them in a EST_StrList.
int matches(const char *e, int pos=0) const
Exactly match this string?