45 #include "EST_String.h"
48 #include "EST_THash.h"
57 static
EST_IList int_EST_IList_kv_def_EST_IList_s;
58 static
int int_EST_IList_kv_def_int_s;
61 template <>
int *
EST_TKVL<
int,
EST_IList >::default_key=&int_EST_IList_kv_def_int_s;
63 Declare_TList_N(KVI_int_EST_IList_t, 0)
66 #if defined(INSTANTIATE_TEMPLATES)
67 #include "../base_class/EST_TList.cc"
71 #include "../base_class/EST_TKVL.cc"
80 Instantiate_TStructIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
81 Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
85 template class
EST_TList< TLIST_KVI_int_EST_IList_t_VAL >;
86 template class
EST_TItem< TLIST_KVI_int_EST_IList_t_VAL >;
87 template const
char *error_name(
EST_TList< KVI_int_EST_IList_t > val);
89 Instantiate_TIterator_T(
EST_TList<KVI_int_EST_IList_t>,
EST_TList<KVI_int_EST_IList_t>::IPointer, KVI_int_EST_IList_t, TList_KVI_int_EST_IList_t_itt);
96 static enum wfst_state_type intersect_state_type(
wfst_list &wl,
98 static int check_distinguished(
const EST_WFST &nmwfst,
103 void EST_WFST_MultiState::add(
int i)
108 if (p_type == wfst_ms_set)
109 for (p=head(); p != 0; p=p->next())
113 else if (i < (*
this)(p))
134 for (p=ms->head(); p != 0; p = p->next())
137 ns = index.
val(istring,found);
154 pairs_done.
val(p,found);
169 Agenda multistate_agenda;
176 p_in_symbols.copy(ndwfst.p_in_symbols);
177 p_out_symbols.copy(ndwfst.p_out_symbols);
181 start_state->add(ndwfst.start_state());
185 start_state->set_name(p_start_state);
187 multistate_agenda.append(start_state);
189 while (multistate_agenda.length() > 0)
192 current = multistate_agenda.
first();
193 multistate_agenda.remove(multistate_agenda.head());
195 cout <<
"Determinizing " << summary() <<
" Agenda "
196 << multistate_agenda.
length() << endl;
199 for (sp=current->head(); sp != 0; sp=sp->next())
202 for (tp=s->transitions.head(); tp != 0; tp = tp->next())
204 i = s->transitions(tp)->in_symbol();
205 o = s->transitions(tp)->out_symbol();
207 if (pair_check(pairs_done,i,o,p_out_symbols.
length()) == 1)
213 if ((i==o) && (i==0))
216 if ((nms->length() == 0) ||
217 (ndwfst.
ms_type(nms) == wfst_error))
222 new_name = multistate_index(index,nms,p_num_states);
223 if (new_name == p_num_states)
227 multistate_agenda.append(nms);
231 nms->set_name(new_name);
236 p_states[current->name()]
237 ->add_transition(nms->weight(),
252 int in,
int out)
const
260 for (p=ms->head(); p != 0; p=p->next())
276 enum wfst_state_type r = wfst_nonfinal;
278 for (p=ms->head(); p != 0; p = p->next())
279 if (p_states((*ms)(p))->type() == wfst_error)
281 else if (p_states((*ms)(p))->type() == wfst_licence)
284 else if ((p_states((*ms)(p))->type() == wfst_final) &&
288 if (r == wfst_licence)
289 return wfst_nonfinal;
303 for (i=s->transitions.head(); i != 0; i=i->next())
305 if ((in == s->transitions(i)->in_symbol()) &&
306 (out == s->transitions(i)->out_symbol()))
307 ms->add(s->transitions(i)->state());
311 static int is_a_member(
const EST_IList &ii,
int i)
313 for (
EST_Litem *p=ii.head(); p != 0; p=p->next())
328 for (p=ms->head(); p != 0; p=p->next())
331 for (p=ii.head(); p != 0; p=p->next())
336 for (i=s->transitions.head(); i != 0; i=i->next())
338 if ((ie == s->transitions(i)->in_symbol()) &&
339 (oe == s->transitions(i)->out_symbol()))
342 int nstate = s->transitions(i)->state();
343 if (!is_a_member(ii,nstate));
362 Agenda multistate_agenda;
364 int i,o, new_name, n;
370 p_in_symbols.copy(wl.
first().p_in_symbols);
371 p_out_symbols.copy(wl.
first().p_out_symbols);
375 for (p=wl.tail(); p != 0; p=p->prev())
377 if (!wl(p).deterministic())
379 cout <<
"...intersection deterministing" << endl;
381 wl(p).determinize(tt);
383 start_state->add(wl(p).start_state());
386 p_start_state =
add_state(intersect_state_type(wl,start_state));
388 start_state->set_name(p_start_state);
390 multistate_agenda.append(start_state);
392 while (multistate_agenda.length() > 0)
394 current = multistate_agenda.
first();
395 multistate_agenda.remove(multistate_agenda.head());
397 cout <<
"Intersection " << summary() <<
" Agenda "
398 << multistate_agenda.
length() << endl;
402 for (i=0; i < p_in_symbols.
length(); i++)
404 for (o=0; o < p_out_symbols.
length(); o++)
406 if ((i==o) && (i==0))
411 for (n=0,p=wl.head(),q=current->head();
412 (p != 0) && (q != 0);
413 p=p->next(),q=q->next(),n++)
414 nms->add(wl(p).
transition((*current)(q),i,o));
415 if (intersect_state_type(wl,nms) == wfst_error)
420 new_name = multistate_index(index,nms,p_num_states);
421 if (new_name == p_num_states)
423 ns =
add_state(intersect_state_type(wl,nms));
425 multistate_agenda.append(nms);
429 nms->set_name(new_name);
434 p_states[current->name()]
435 ->add_transition(nms->weight(),nms->name(),i,o);
444 static enum wfst_state_type intersect_state_type(
wfst_list &wl,
451 enum wfst_state_type r = wfst_final;
453 for (p=wl.head(),q=ms->head();
454 (p != 0) && (q != 0);
455 p=p->next(),q=q->next())
457 if ((*ms)(q) == WFST_ERROR_STATE)
460 enum wfst_state_type dd = wl(p).state((*ms)(q))->type();
462 if (dd == wfst_error)
464 else if (dd == wfst_nonfinal)
492 for (p=0; p < nmwfst.num_states()-1; p++)
493 for (q=p+1; q < nmwfst.num_states(); q++)
494 check_distinguished(nmwfst,p,q,marks,assumptions);
502 marks.find_state_map(state_map,num_new_states);
506 p_in_symbols.copy(nmwfst.p_in_symbols);
507 p_out_symbols.copy(nmwfst.p_out_symbols);
509 init(num_new_states);
510 p_start_state = state_map(nmwfst.p_start_state);
512 for (i=0; i < nmwfst.num_states(); i++)
514 if (p_states[state_map(i)] == 0)
515 p_states[state_map(i)] =
516 copy_and_map_states(state_map,nmwfst.
state(i),nmwfst);
521 static int check_distinguished(
const EST_WFST &nmwfst,
530 if (marks.distinguished(p,q))
532 else if (marks.undistinguished(p,q))
536 else if ((nmwfst.
state(p)->type() != nmwfst.
state(q)->type()) ||
537 (nmwfst.
state(p)->num_transitions() !=
538 nmwfst.
state(q)->num_transitions()))
541 marks.distinguish(p,q);
546 for (t=nmwfst.
state(p)->transitions.head(); t != 0; t=t->next())
548 int in = nmwfst.
state(p)->transitions(t)->in_symbol();
549 int out = nmwfst.
state(p)->transitions(t)->out_symbol();
550 int y = nmwfst.
state(p)->transitions(t)->state();
552 if ((z == WFST_ERROR_STATE) ||
553 (marks.distinguished(y,z)))
555 marks.distinguish(p,q);
558 else if (equivalent_to(y,z,assumptions))
570 if (assumptions.
length() == 0)
573 add_assumption(p,q,assumptions);
574 for (yp=from_p.head(),zp=from_q.head();
575 yp != 0; yp=yp->next(),zp=zp->next())
576 if (check_distinguished(nmwfst,from_p(yp),from_q(zp),
580 marks.distinguish(p,q);
589 mark_undistinguished(marks,assumptions);
596 void EST_WFST::extend_alphabets(
const EST_WFST &b)
604 for (i=0; i<p_in_symbols.
length(); i++)
606 for (i=0; i<b.p_in_symbols.
length(); i++)
609 for (i=0; i<p_out_symbols.
length(); i++)
611 for (i=0; i<b.p_out_symbols.
length(); i++)
615 p_in_symbols.
init(ivocab);
616 p_out_symbols.
init(ovocab);
627 ns->set_type(s->type());
629 for (i=s->transitions.head(); i != 0; i=i->next())
631 int mapped_state= state_map(s->transitions(i)->state());
632 if (mapped_state != WFST_ERROR_STATE)
633 ns->add_transition(s->transitions(i)->weight(),
652 for (i=0; i < num_states(); i++)
654 if (p_states[i]->type() == wfst_final)
655 p_states[i]->set_type(wfst_nonfinal);
656 else if (p_states[i]->type() == wfst_nonfinal)
657 p_states[i]->set_type(wfst_final);
662 static int noloopstostart(
const EST_WFST &a)
669 for (i=0; i < a.num_states(); i++)
673 for (p=s->transitions.head(); p != 0; p=p->next())
675 if (s->transitions(p)->state() == a.start_state())
682 int EST_WFST::deterministiconstartstates(
const EST_WFST &a,
const EST_WFST &b)
const
695 tab(a.
state(a.start_state())->transitions(t)->in_symbol(),
696 a.
state(a.start_state())->transitions(t)->out_symbol()) = 1;
700 tt != 0; tt=tt->next())
708 else if (tab(in,out) == 1)
726 noloopstostart(a) && noloopstostart(b) &&
727 deterministiconstartstates(a,b))
731 smap.
resize(b.num_states());
732 smap[0] = start_state();
733 for (i=1; i < b.num_states(); ++i)
734 smap[i] = i+a.num_states()-1;
736 more_states(a.num_states()+b.num_states()-1);
737 p_num_states += b.num_states()-1;
738 for (i=1; i < b.num_states(); i++)
739 p_states[smap(i)] = copy_and_map_states(smap,b.
state(i),b);
743 for (p=s->transitions.head(); p != 0; p=p->next())
745 int mapped_state= smap(s->transitions(p)->state());
746 if (mapped_state != WFST_ERROR_STATE)
747 p_states[start_state()]->
748 add_transition(s->transitions(p)->weight(),
756 smap.
resize(b.num_states());
757 for (i=0; i < b.num_states(); ++i)
758 smap[i] = i+a.num_states();
760 more_states(a.num_states()+b.num_states());
761 p_num_states += b.num_states();
762 for (i=0; i < b.num_states(); i++)
763 p_states[smap(i)] = copy_and_map_states(smap,b.
state(i),b);
767 p_states[start_state()]->add_transition(0.0,smap[b.start_state()],
784 smap.
resize(b.num_states());
785 for (i=0; i < b.num_states(); ++i)
786 smap[i] = i+a.num_states();
788 more_states(a.num_states()+b.num_states());
792 for (i=0; i < num_states(); i++)
794 if (p_states[i]->type() == wfst_final)
796 p_states[i]->set_type(wfst_nonfinal);
797 p_states[i]->add_transition(0.0,smap[b.start_state()],
802 p_num_states += b.num_states();
803 for (i=0; i < b.num_states(); i++)
804 p_states[smap(i)] = copy_and_map_states(smap,b.
state(i),b);
816 Agenda multistate_agenda;
824 p_in_symbols.copy(a.p_in_symbols);
825 p_out_symbols.copy(b.p_out_symbols);
829 start_state->add(a.start_state());
831 start_state->add(b.start_state());
833 p_start_state =
add_state(intersect_state_type(wl,start_state));
835 start_state->set_name(p_start_state);
837 multistate_agenda.append(start_state);
839 while (multistate_agenda.length() > 0)
841 current = multistate_agenda.
first();
842 multistate_agenda.remove(multistate_agenda.head());
845 for (i=0; i < p_in_symbols.
length(); i++)
850 wl.
first().transduce(current->
first(),i,transa);
851 for (p=transa.head(); p != 0; p=p->next())
859 for (q = transb.head(); q != 0; q=q->next())
862 nms->add(transa(p)->
state());
863 nms->add(transb(q)->
state());
865 if (intersect_state_type(wl,nms) == wfst_error)
870 new_name = multistate_index(index,nms,p_num_states);
871 if (new_name == p_num_states)
873 int ns =
add_state(intersect_state_type(wl,nms));
875 multistate_agenda.append(nms);
878 nms->set_name(new_name);
881 p_states[current->name()]
882 ->add_transition(nms->weight(),nms->name(),
883 i,transb(q)->out_symbol());
905 for (
int i=0; i < compb.num_states(); i++)
907 if (compb.p_states[i]->type() == wfst_final)
908 compb.p_states[i]->set_type(wfst_error);
926 ab.current_tag = ++traverse_tag;
927 for (
int i=0; i < ab.p_num_states; i++)
928 ab.can_reach_final(i);
934 int EST_WFST::can_reach_final(
int state)
938 if (p_states[state]->type() == wfst_final)
940 else if (p_states[state]->type() == wfst_error)
942 else if (p_states[state]->tag() == current_tag)
948 enum wfst_state_type current_val = p_states[
state]->type();
949 enum wfst_state_type r = wfst_error;
951 p_states[
state]->set_type(wfst_error);
953 for (i=p_states[state]->transitions.head(); i != 0; i=i->next())
956 if (can_reach_final(p_states[state]->transitions(i)->
state()))
961 p_states[
state]->set_type(r);
966 p_states[
state]->set_tag(current_tag);
982 for (
int i=0; i < p_num_states; i++)
985 for (
EST_Litem *t=
state(i)->transitions.head(); t != 0; t=t->next())
LISP epsilon_label() const
LISP for on epsilon symbols.
const T & first() const
return const reference to first item in list
V & val(const K &key, int &found) const
void minimize(const EST_WFST &a)
Build minimized form of a.
void complement(const EST_WFST &a)
Build complement of a.
int transition(int state, int in, int out) const
Find (first) new state given in and out symbols.
int deterministic() const
True if WFST is deterministic.
int add_state(enum wfst_state_type state_type)
Add a new state, returns new name.
An open hash table. The number of buckets should be set to allow enough space that there are relative...
a call representing a weighted finite-state transducer
void clear()
clear removing existing states if any
const T & last() const
return const reference to last item in list
int out_epsilon() const
Internal index for output epsilon.
const EST_String & name(const int n) const
The name given the index.
EST_String itoString(int n)
Make a EST_String object from an integer.
void transition_all(int state, int in, int out, EST_WFST_MultiState *ms) const
Find all possible transitions for given state/input/output.
void intersection(EST_TList< EST_WFST > &wl)
A specialised hash table for when the key is an EST_String.
int add_item(const K &key, const V &value, int no_search=0)
Add an entry to the table.
void determinize(const EST_WFST &a)
Build determinized form of a.
int in_symbol(const EST_String &s) const
Map input symbol to input alphabet index.
void clear(void)
remove all items in list
an internal class to EST_WFST used in holding multi-states when determinizing and find the intersecti...
void fill(const T &v)
fill matrix with value v
Templated Key-Value Item. Serves as the items in the list of the EST_TKVL class.
an internal class for EST_WFST used to represent a state in a WFST
int out_symbol(const EST_String &s) const
Map output symbol to output alphabet index.
void compose(const EST_WFST &a, const EST_WFST &b)
const EST_WFST_State * state(int i) const
Return internal state information.
void concat(const EST_WFST &a, const EST_WFST &b)
void uunion(EST_TList< EST_WFST > &wl)
Templated Key-Value list. Objects of type EST_TKVL contain lists which are accessed by a key of type ...
const int length() const
number of key value pairs in list
void add_epsilon_reachable(EST_WFST_MultiState *ms) const
Extend multi-state with epsilon reachable states.
int length(void) const
Length of string ({not} length of underlying chunk)
EST_Litem * insert_before(EST_Litem *ptr, const int &item)
void append(const int &item)
add item onto end of list
int in_epsilon() const
Internal index for input epsilon.
int strlist_member(const EST_StrList &l, const EST_String &s)
Return true if s is in list l.
enum wfst_state_type ms_type(EST_WFST_MultiState *ms) const
Given a multi-state return type (final, ok, error)
bool init(const EST_StrList &vocab)
(re-)initialise
void remove_error_states(const EST_WFST &a)
Remove error states from the WFST.
EST_WFST_MultiState * apply_multistate(const EST_WFST &wfst, EST_WFST_MultiState *ms, int in, int out) const
Transduce a multi-state given n and out.
void resize(int rows, int cols, int set=1)
resize matrix
void init(int init_num_states=10)
Clear with (estimation of number of states required)
const int length(void) const
The number of members in the discrete.
void difference(const EST_WFST &a, const EST_WFST &b)
void copy(const EST_WFST &wfst)
Copy from existing wfst.
void resize(int n, int set=1)
resize vector