RPA Toolkit
b15415b1a2c24483b09edb69cd026faa63706ef0
[rpatk.git] / rex / rexdb.c
1 #include <stdio.h>
2 #include <ctype.h>
3 #include "rlib/rmem.h"
4 #include "rlib/rutf.h"
5 #include "rex/rextransition.h"
6 #include "rex/rexnfasimulator.h"
7 #include "rex/rexdfaconv.h"
8 #include "rex/rexcompiler.h"
9
10 struct rexdfa_ctx {
11         unsigned long nstates;
12         unsigned long ntrnas;
13         unsigned long nsubstates;
14         unsigned long naccsubstates;
15 };
16
17
18 rexdb_t *rex_db_createdfa(rexdb_t *nfa, unsigned long start)
19 {
20         rexdfaconv_t *conv = rex_dfaconv_create();
21         rexdb_t *dfa = rex_dfaconv_run(conv, nfa, start);
22         rex_dfaconv_destroy(conv);
23         return dfa;
24 }
25
26
27 void rex_db_cleanup(robject_t *obj)
28 {
29         long i;
30         rexdb_t *rexdb = (rexdb_t*) obj;
31
32         for (i = 0; i < r_array_length(rexdb->states); i++)
33                 rex_state_destroy(r_array_index(rexdb->states, i, rexstate_t*));
34         r_array_destroy(rexdb->states);
35         r_array_destroy(rexdb->substates);
36         rex_compiler_destroy(rexdb->co);
37         r_object_cleanup(obj);
38 }
39
40
41 robject_t *rex_db_init(robject_t *obj, unsigned int objtype, r_object_cleanupfun cleanup, rexdb_type_t type)
42 {
43         rexdb_t *rexdb = (rexdb_t*) obj;
44
45         r_object_init(obj, objtype, cleanup, NULL);
46         rexdb->co = NULL;
47         rexdb->states = r_array_create(sizeof(rexstate_t*));
48         rexdb->substates = r_array_create(sizeof(rexsubstate_t));
49         rexdb->type = type;
50         if (type == REXDB_TYPE_DFA) {
51                 /*
52                  * Create the dead state.
53                  */
54                 rex_db_createstate(rexdb , REX_STATETYPE_DEAD);
55         }
56         if (type == REXDB_TYPE_NFA) {
57                 rexdb->co = rex_compiler_create(rexdb);
58         }
59         return (robject_t*)rexdb;
60 }
61
62
63 rexdb_t *rex_db_create(rexdb_type_t type)
64 {
65         rexdb_t *rexdb;
66
67         rexdb = (rexdb_t*)r_object_create(sizeof(*rexdb));
68         rex_db_init((robject_t*)rexdb, R_OBJECT_REXDB, rex_db_cleanup, type);
69         return rexdb;
70 }
71
72
73 void rex_db_destroy(rexdb_t *rexdb)
74 {
75         r_object_destroy((robject_t*)rexdb);
76 }
77
78
79 long rex_db_createstate(rexdb_t *rexdb, rex_statetype_t type)
80 {
81         unsigned long uid = 0;
82         rexstate_t *state;
83
84         state = rex_state_create(-1, type);
85         uid = r_array_add(rexdb->states, &state);
86         state->uid = uid;
87         return uid;
88 }
89
90
91 long rex_db_insertstate(rexdb_t *rexdb, rexstate_t *s)
92 {
93         long uid = r_array_insert(rexdb->states, s->uid, &s);
94         if (s->type == REX_STATETYPE_START)
95                 rexdb->start = s;
96         return uid;
97 }
98
99
100 rexstate_t *rex_db_getstate(rexdb_t *rexdb, long uid)
101 {
102         if (uid >= 0 && uid < r_array_length(rexdb->states))
103                 return r_array_index(rexdb->states, uid, rexstate_t*);
104         return NULL;
105 }
106
107
108 long rex_db_findstate(rexdb_t *a, const rarray_t *subset)
109 {
110         long i, j;
111         rexstate_t *s;
112
113         for (i = 0; i < r_array_length(a->states); i++) {
114                 s = r_array_index(a->states, i, rexstate_t*);
115                 if (r_array_length(s->subset) && r_array_length(s->subset) == r_array_length(subset)) {
116                         for (j = 0; j < r_array_length(subset); j++) {
117                                 if (rex_subset_index(s->subset, j) != rex_subset_index(subset, j))
118                                         break;
119                         }
120                         if (j == r_array_length(subset))
121                                 return s->uid;
122                 }
123         }
124         return -1;
125 }
126
127
128 rex_transition_t *rex_db_addrangetrasition(rexdb_t *rexdb, rexchar_t c1, rexchar_t c2, unsigned long srcuid, unsigned long dstuid)
129 {
130         rexstate_t *s = rex_db_getstate(rexdb, srcuid);
131
132         if (s == NULL)
133                 return NULL;
134         return rex_state_addtransition(s, c1, c2, dstuid);
135 }
136
137
138 rex_transition_t *rex_db_addtrasition_e(rexdb_t *rexdb, unsigned long srcuid, unsigned long dstuid)
139 {
140         rexstate_t *s = rex_db_getstate(rexdb, srcuid);
141
142         if (rexdb->type == REXDB_TYPE_DFA || s == NULL)
143                 return NULL;
144         return rex_state_addtransition_e(s, dstuid);
145 }
146
147
148 int rex_db_simulate_nfa(rexdb_t *rexdb, long uid, const char *str, const char *end)
149 {
150         rex_transition_t *t;
151         rexstate_t *state = rex_db_getstate(rexdb, uid);
152         long i;
153         int ret, wcsize;
154         ruint32 wc;
155
156 #if 1
157         fprintf(stdout, "state: %ld, string: %s\n", uid, str);
158 #endif
159         if (state->type == REX_STATETYPE_ACCEPT)
160                 return 0;
161
162         for (i = 0; *str && i < r_array_length(state->trans); i++) {
163                 t = (rex_transition_t *)r_array_slot(state->trans, i);
164                 wcsize = r_utf8_mbtowc(&wc, (const unsigned char*)str, (const unsigned char*)end);
165                 if (t->lowin == wc) {
166                         ret = rex_db_simulate_nfa(rexdb, t->dstuid, str + wcsize, end);
167                         if (ret >= 0)
168                                 return ret + wcsize;
169                 }
170         }
171
172         /*
173          * Look for e transitions
174          */
175         for (i = 0; i < r_array_length(state->etrans); i++) {
176                 t = (rex_transition_t *)r_array_slot(state->etrans, i);
177                 ret = rex_db_simulate_nfa(rexdb, t->dstuid, str, end);
178                 if (ret >= 0)
179                         return ret;
180         }
181
182         return -1;
183 }
184
185
186 rexsubstate_t *rex_db_getsubstate(rexdb_t *rexdb, unsigned long uid)
187 {
188         if (uid < r_array_length(rexdb->substates)) {
189                 return (rexsubstate_t *)r_array_slot(rexdb->substates, uid);
190         }
191         return NULL;
192 }
193
194
195 long rex_db_addexpression(rexdb_t *rexdb, unsigned long prev, const char *str, unsigned int size, rexuserdata_t userdata)
196 {
197         if (rexdb->type != REXDB_TYPE_NFA)
198                 return -1;
199         return rex_compiler_addexpression(rexdb->co, rexdb, prev, str, size, userdata);
200 }
201
202
203 long rex_db_addexpression_s(rexdb_t *rexdb, unsigned long prev, const char *str, rexuserdata_t userdata)
204 {
205         if (rexdb->type != REXDB_TYPE_NFA)
206                 return -1;
207         return rex_compiler_addexpression_s(rexdb->co, rexdb, prev, str, userdata);
208 }
209
210
211 void rex_db_dumpstate(rexdb_t *rexdb, unsigned long uid)
212 {
213         long index;
214         char buf[240];
215         int bufsize = sizeof(buf) - 1;
216         int n = 0;
217         rex_transition_t *t;
218         rexsubstate_t *ss;
219         rexstate_t *state = rex_db_getstate(rexdb, uid);
220
221         if (!state)
222                 return;
223         fprintf(stdout, "State %ld", state->uid);
224         if (rex_subset_length(state->subset)) {
225                 fprintf(stdout, " (");
226                 for (index = 0; index < rex_subset_length(state->subset); index++) {
227                         fprintf(stdout, "%ld", rex_subset_index(state->subset, index));
228                         if ((ss = rex_db_getsubstate(rexdb, rex_subset_index(state->subset, index))) != NULL) {
229                                 if (ss->ss_type == REX_STATETYPE_ACCEPT)
230                                         fprintf(stdout, "*");
231                         }
232                         fprintf(stdout, ",");
233                 }
234                 fprintf(stdout, ")");
235         }
236         fprintf(stdout, ": ");
237         if (state->type == REX_STATETYPE_ACCEPT) {
238                 fprintf(stdout, " REX_STATETYPE_ACCEPT ");
239         } else if (state->type == REX_STATETYPE_DEAD) {
240                 fprintf(stdout, " REX_STATETYPE_DEAD ");
241         } else if (state->type == REX_STATETYPE_START) {
242                 fprintf(stdout, " REX_STATETYPE_START ");
243         }
244         fprintf(stdout, "\n");
245
246         for (index = 0; index < r_array_length(state->etrans); index++) {
247                 n = 0;
248                 t = (rex_transition_t *)r_array_slot(state->etrans, index);
249                 if (t->type == REX_TRANSITION_EMPTY) {
250                         n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "        epsilon ");
251                         r_memset(buf + n, ' ', bufsize - n);
252                         n = 40;
253                         n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "-> %ld", t->dstuid);
254                         fprintf(stdout, "%s\n", buf);
255                 }
256         }
257         for (index = 0; index < r_array_length(state->trans); index++) {
258                 t = (rex_transition_t *)r_array_slot(state->trans, index);
259                 n = 0;
260                 if (t->type == REX_TRANSITION_EMPTY) {
261                         fprintf(stdout, "    epsilon -> %ld\n", t->dstuid);
262                 } else if (t->lowin != t->highin) {
263                         if (isprint(t->lowin) && !isspace(t->lowin) && isprint(t->highin) && !isspace(t->highin))
264                                 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "        [%c - %c] ", t->lowin, t->highin);
265                         else
266                                 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "        [0x%X - 0x%X] ", t->lowin, t->highin);
267                 } else {
268                         if (isprint(t->lowin) && !isspace(t->lowin))
269                                 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "        '%c' ", t->lowin);
270                         else
271                                 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "        0x%X ", t->lowin);
272                 }
273                 r_memset(buf + n, ' ', bufsize - n);
274                 n = 40;
275                 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "-> %ld", t->dstuid);
276                 fprintf(stdout, "%s\n", buf);
277         }
278         if (!r_array_length(state->etrans) && !r_array_length(state->trans))
279                 fprintf(stdout, "        (none)\n");
280         fprintf(stdout, "\n");
281
282 }
283
284
285 long rex_db_numtransitions(rexdb_t *rexdb)
286 {
287         long i;
288         long ret = 0;
289
290         for (i = 0; i < r_array_length(rexdb->states); i++) {
291                 rexstate_t *state = rex_db_getstate(rexdb, i);
292                 ret += r_array_length(state->trans);
293         }
294         return ret;
295 }
296
297
298 long rex_db_numstates(rexdb_t *rexdb)
299 {
300         return r_array_length(rexdb->states);
301 }
302
303
304 long rex_db_numsubstates(rexdb_t *rexdb)
305 {
306         long i;
307         long ret = 0;
308
309         for (i = 0; i < r_array_length(rexdb->states); i++) {
310                 rexstate_t *state = rex_db_getstate(rexdb, i);
311                 ret += rex_subset_length(state->subset);
312         }
313         return ret;
314 }
315
316
317 long rex_db_numaccsubstates(rexdb_t *rexdb)
318 {
319         long i, j;
320         long ret = 0;
321
322         for (i = 0; i < r_array_length(rexdb->states); i++) {
323                 rexstate_t *state = rex_db_getstate(rexdb, i);
324                 for (j = 0; j < rex_subset_length(state->subset); j++) {
325                         rexsubstate_t *substate = rex_db_getsubstate(rexdb, rex_subset_index(state->subset, j));
326                         if (substate->ss_type == REX_STATETYPE_ACCEPT)
327                                 ret += 1;
328                 }
329         }
330         return ret;
331 }
332
333
334 const char *rex_db_version()
335 {
336         return "1.0";
337 }
338
339
340 static void rex_db_filldfastate(rexdb_t *db, rexdfa_t *dfa, struct rexdfa_ctx *ctx, rexstate_t *state, int withsubstates)
341 {
342         long i;
343         rex_transition_t *t = NULL;
344         rexdfs_t *s = &dfa->states[ctx->nstates++];
345         s->type = state->type;
346         s->trans = ctx->ntrnas;
347         s->ntrans = r_array_length(state->trans);
348         for (i = 0; i < s->ntrans; i++) {
349                 t = (rex_transition_t *)r_array_slot(state->trans, i);
350                 dfa->trans[s->trans + i].lowin = t->lowin;
351                 dfa->trans[s->trans + i].highin = t->highin;
352                 dfa->trans[s->trans + i].state = t->dstuid;
353         }
354         ctx->ntrnas += s->ntrans;
355         s->accsubstates = ctx->naccsubstates;
356         s->naccsubstates = 0L;
357         for (i = 0; i < rex_subset_length(state->subset); i++) {
358                 unsigned long uid = rex_subset_index(state->subset, i);
359                 rexsubstate_t *substate = rex_db_getsubstate(db, uid);
360                 if (substate->ss_type == REX_STATETYPE_ACCEPT) {
361                         dfa->accsubstates[s->accsubstates + s->naccsubstates].uid = uid;
362                         dfa->accsubstates[s->accsubstates + s->naccsubstates].type = substate->ss_type;
363                         dfa->accsubstates[s->accsubstates + s->naccsubstates].userdata = substate->ss_userdata;
364                         s->naccsubstates++;
365                 }
366         }
367         ctx->naccsubstates += s->naccsubstates;
368         if (withsubstates) {
369                 s->substates = ctx->nsubstates;
370                 s->nsubstates = rex_subset_length(state->subset);
371                 for (i = 0; i < rex_subset_length(state->subset); i++) {
372                         unsigned long uid = rex_subset_index(state->subset, i);
373                         rexsubstate_t *substate = rex_db_getsubstate(db, uid);
374                         dfa->substates[s->substates + i].uid = uid;
375                         dfa->substates[s->substates + i].type = substate->ss_type;
376                         dfa->substates[s->substates + i].userdata = substate->ss_userdata;
377                 }
378                 ctx->nsubstates += s->nsubstates;
379         } else {
380                 s->substates = 0;
381                 s->nsubstates = 0;
382         }
383
384 }
385
386
387 rexdfa_t *rex_db_todfa(rexdb_t *db, int withsubstates)
388 {
389         long i;
390         rexdfa_t *dfa;
391         struct rexdfa_ctx ctx;
392         unsigned long nstates = rex_db_numstates(db);
393         unsigned long ntrans = rex_db_numtransitions(db);
394         unsigned long naccsubstates = rex_db_numaccsubstates(db);
395         unsigned long nsubstates = withsubstates ? rex_db_numsubstates(db) : 0UL;
396         dfa = rex_dfa_create(nstates, ntrans, naccsubstates, nsubstates);
397         r_memset(&ctx, 0, sizeof(ctx));
398
399         for (i = 0; i < r_array_length(db->states); i++) {
400                 rexstate_t *state = rex_db_getstate(db, i);
401                 rex_db_filldfastate(db, dfa, &ctx, state, withsubstates);
402         }
403         R_ASSERT(ctx.nstates == nstates);
404         R_ASSERT(ctx.ntrnas == ntrans);
405         R_ASSERT(ctx.nsubstates == nsubstates);
406         R_ASSERT(ctx.naccsubstates == naccsubstates);
407         return dfa;
408 }
409
410
411 int rex_db_isempty(rexdb_t *db)
412 {
413         if (!db)
414                 return 0;
415         return r_array_length(db->states) ? 0 : 1;
416 }