5 #include "rex/rextransition.h"
6 #include "rex/rexnfasimulator.h"
7 #include "rex/rexdfaconv.h"
8 #include "rex/rexcompiler.h"
11 unsigned long nstates;
13 unsigned long nsubstates;
14 unsigned long naccsubstates;
18 rexdb_t *rex_db_createdfa(rexdb_t *nfa, unsigned long start)
20 rexdfaconv_t *conv = rex_dfaconv_create();
21 rexdb_t *dfa = rex_dfaconv_run(conv, nfa, start);
22 rex_dfaconv_destroy(conv);
27 void rex_db_cleanup(robject_t *obj)
30 rexdb_t *rexdb = (rexdb_t*) obj;
32 for (i = 0; i < r_array_length(rexdb->states); i++)
33 rex_state_destroy(r_array_index(rexdb->states, i, rexstate_t*));
34 r_array_destroy(rexdb->states);
35 r_array_destroy(rexdb->substates);
36 rex_compiler_destroy(rexdb->co);
37 r_object_cleanup(obj);
41 robject_t *rex_db_init(robject_t *obj, unsigned int objtype, r_object_cleanupfun cleanup, rexdb_type_t type)
43 rexdb_t *rexdb = (rexdb_t*) obj;
45 r_object_init(obj, objtype, cleanup, NULL);
47 rexdb->states = r_array_create(sizeof(rexstate_t*));
48 rexdb->substates = r_array_create(sizeof(rexsubstate_t));
50 if (type == REXDB_TYPE_DFA) {
52 * Create the dead state.
54 rex_db_createstate(rexdb , REX_STATETYPE_DEAD);
56 if (type == REXDB_TYPE_NFA) {
57 rexdb->co = rex_compiler_create(rexdb);
59 return (robject_t*)rexdb;
63 rexdb_t *rex_db_create(rexdb_type_t type)
67 rexdb = (rexdb_t*)r_object_create(sizeof(*rexdb));
68 rex_db_init((robject_t*)rexdb, R_OBJECT_REXDB, rex_db_cleanup, type);
73 void rex_db_destroy(rexdb_t *rexdb)
75 r_object_destroy((robject_t*)rexdb);
79 long rex_db_createstate(rexdb_t *rexdb, rex_statetype_t type)
81 unsigned long uid = 0;
84 state = rex_state_create(-1, type);
85 uid = r_array_add(rexdb->states, &state);
91 long rex_db_insertstate(rexdb_t *rexdb, rexstate_t *s)
93 long uid = r_array_insert(rexdb->states, s->uid, &s);
94 if (s->type == REX_STATETYPE_START)
100 rexstate_t *rex_db_getstate(rexdb_t *rexdb, long uid)
102 if (uid >= 0 && uid < r_array_length(rexdb->states))
103 return r_array_index(rexdb->states, uid, rexstate_t*);
108 long rex_db_findstate(rexdb_t *a, const rarray_t *subset)
113 for (i = 0; i < r_array_length(a->states); i++) {
114 s = r_array_index(a->states, i, rexstate_t*);
115 if (r_array_length(s->subset) && r_array_length(s->subset) == r_array_length(subset)) {
116 for (j = 0; j < r_array_length(subset); j++) {
117 if (rex_subset_index(s->subset, j) != rex_subset_index(subset, j))
120 if (j == r_array_length(subset))
128 rex_transition_t *rex_db_addrangetrasition(rexdb_t *rexdb, rexchar_t c1, rexchar_t c2, unsigned long srcuid, unsigned long dstuid)
130 rexstate_t *s = rex_db_getstate(rexdb, srcuid);
134 return rex_state_addtransition(s, c1, c2, dstuid);
138 rex_transition_t *rex_db_addtrasition_e(rexdb_t *rexdb, unsigned long srcuid, unsigned long dstuid)
140 rexstate_t *s = rex_db_getstate(rexdb, srcuid);
142 if (rexdb->type == REXDB_TYPE_DFA || s == NULL)
144 return rex_state_addtransition_e(s, dstuid);
148 int rex_db_simulate_nfa(rexdb_t *rexdb, long uid, const char *str, const char *end)
151 rexstate_t *state = rex_db_getstate(rexdb, uid);
157 fprintf(stdout, "state: %ld, string: %s\n", uid, str);
159 if (state->type == REX_STATETYPE_ACCEPT)
162 for (i = 0; *str && i < r_array_length(state->trans); i++) {
163 t = (rex_transition_t *)r_array_slot(state->trans, i);
164 wcsize = r_utf8_mbtowc(&wc, (const unsigned char*)str, (const unsigned char*)end);
165 if (t->lowin == wc) {
166 ret = rex_db_simulate_nfa(rexdb, t->dstuid, str + wcsize, end);
173 * Look for e transitions
175 for (i = 0; i < r_array_length(state->etrans); i++) {
176 t = (rex_transition_t *)r_array_slot(state->etrans, i);
177 ret = rex_db_simulate_nfa(rexdb, t->dstuid, str, end);
186 rexsubstate_t *rex_db_getsubstate(rexdb_t *rexdb, unsigned long uid)
188 if (uid < r_array_length(rexdb->substates)) {
189 return (rexsubstate_t *)r_array_slot(rexdb->substates, uid);
195 long rex_db_addexpression(rexdb_t *rexdb, unsigned long prev, const char *str, unsigned int size, rexuserdata_t userdata)
197 if (rexdb->type != REXDB_TYPE_NFA)
199 return rex_compiler_addexpression(rexdb->co, rexdb, prev, str, size, userdata);
203 long rex_db_addexpression_s(rexdb_t *rexdb, unsigned long prev, const char *str, rexuserdata_t userdata)
205 if (rexdb->type != REXDB_TYPE_NFA)
207 return rex_compiler_addexpression_s(rexdb->co, rexdb, prev, str, userdata);
211 void rex_db_dumpstate(rexdb_t *rexdb, unsigned long uid)
215 int bufsize = sizeof(buf) - 1;
219 rexstate_t *state = rex_db_getstate(rexdb, uid);
223 fprintf(stdout, "State %ld", state->uid);
224 if (rex_subset_length(state->subset)) {
225 fprintf(stdout, " (");
226 for (index = 0; index < rex_subset_length(state->subset); index++) {
227 fprintf(stdout, "%ld", rex_subset_index(state->subset, index));
228 if ((ss = rex_db_getsubstate(rexdb, rex_subset_index(state->subset, index))) != NULL) {
229 if (ss->ss_type == REX_STATETYPE_ACCEPT)
230 fprintf(stdout, "*");
232 fprintf(stdout, ",");
234 fprintf(stdout, ")");
236 fprintf(stdout, ": ");
237 if (state->type == REX_STATETYPE_ACCEPT) {
238 fprintf(stdout, " REX_STATETYPE_ACCEPT ");
239 } else if (state->type == REX_STATETYPE_DEAD) {
240 fprintf(stdout, " REX_STATETYPE_DEAD ");
241 } else if (state->type == REX_STATETYPE_START) {
242 fprintf(stdout, " REX_STATETYPE_START ");
244 fprintf(stdout, "\n");
246 for (index = 0; index < r_array_length(state->etrans); index++) {
248 t = (rex_transition_t *)r_array_slot(state->etrans, index);
249 if (t->type == REX_TRANSITION_EMPTY) {
250 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, " epsilon ");
251 r_memset(buf + n, ' ', bufsize - n);
253 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "-> %ld", t->dstuid);
254 fprintf(stdout, "%s\n", buf);
257 for (index = 0; index < r_array_length(state->trans); index++) {
258 t = (rex_transition_t *)r_array_slot(state->trans, index);
260 if (t->type == REX_TRANSITION_EMPTY) {
261 fprintf(stdout, " epsilon -> %ld\n", t->dstuid);
262 } else if (t->lowin != t->highin) {
263 if (isprint(t->lowin) && !isspace(t->lowin) && isprint(t->highin) && !isspace(t->highin))
264 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, " [%c - %c] ", t->lowin, t->highin);
266 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, " [0x%X - 0x%X] ", t->lowin, t->highin);
268 if (isprint(t->lowin) && !isspace(t->lowin))
269 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, " '%c' ", t->lowin);
271 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, " 0x%X ", t->lowin);
273 r_memset(buf + n, ' ', bufsize - n);
275 n += r_snprintf(buf + n, n < bufsize ? bufsize - n : 0, "-> %ld", t->dstuid);
276 fprintf(stdout, "%s\n", buf);
278 if (!r_array_length(state->etrans) && !r_array_length(state->trans))
279 fprintf(stdout, " (none)\n");
280 fprintf(stdout, "\n");
285 long rex_db_numtransitions(rexdb_t *rexdb)
290 for (i = 0; i < r_array_length(rexdb->states); i++) {
291 rexstate_t *state = rex_db_getstate(rexdb, i);
292 ret += r_array_length(state->trans);
298 long rex_db_numstates(rexdb_t *rexdb)
300 return r_array_length(rexdb->states);
304 long rex_db_numsubstates(rexdb_t *rexdb)
309 for (i = 0; i < r_array_length(rexdb->states); i++) {
310 rexstate_t *state = rex_db_getstate(rexdb, i);
311 ret += rex_subset_length(state->subset);
317 long rex_db_numaccsubstates(rexdb_t *rexdb)
322 for (i = 0; i < r_array_length(rexdb->states); i++) {
323 rexstate_t *state = rex_db_getstate(rexdb, i);
324 for (j = 0; j < rex_subset_length(state->subset); j++) {
325 rexsubstate_t *substate = rex_db_getsubstate(rexdb, rex_subset_index(state->subset, j));
326 if (substate->ss_type == REX_STATETYPE_ACCEPT)
334 const char *rex_db_version()
340 static void rex_db_filldfastate(rexdb_t *db, rexdfa_t *dfa, struct rexdfa_ctx *ctx, rexstate_t *state, int withsubstates)
343 rex_transition_t *t = NULL;
344 rexdfs_t *s = &dfa->states[ctx->nstates++];
345 s->type = state->type;
346 s->trans = ctx->ntrnas;
347 s->ntrans = r_array_length(state->trans);
348 for (i = 0; i < s->ntrans; i++) {
349 t = (rex_transition_t *)r_array_slot(state->trans, i);
350 dfa->trans[s->trans + i].lowin = t->lowin;
351 dfa->trans[s->trans + i].highin = t->highin;
352 dfa->trans[s->trans + i].state = t->dstuid;
354 ctx->ntrnas += s->ntrans;
355 s->accsubstates = ctx->naccsubstates;
356 s->naccsubstates = 0L;
357 for (i = 0; i < rex_subset_length(state->subset); i++) {
358 unsigned long uid = rex_subset_index(state->subset, i);
359 rexsubstate_t *substate = rex_db_getsubstate(db, uid);
360 if (substate->ss_type == REX_STATETYPE_ACCEPT) {
361 dfa->accsubstates[s->accsubstates + s->naccsubstates].uid = uid;
362 dfa->accsubstates[s->accsubstates + s->naccsubstates].type = substate->ss_type;
363 dfa->accsubstates[s->accsubstates + s->naccsubstates].userdata = substate->ss_userdata;
367 ctx->naccsubstates += s->naccsubstates;
369 s->substates = ctx->nsubstates;
370 s->nsubstates = rex_subset_length(state->subset);
371 for (i = 0; i < rex_subset_length(state->subset); i++) {
372 unsigned long uid = rex_subset_index(state->subset, i);
373 rexsubstate_t *substate = rex_db_getsubstate(db, uid);
374 dfa->substates[s->substates + i].uid = uid;
375 dfa->substates[s->substates + i].type = substate->ss_type;
376 dfa->substates[s->substates + i].userdata = substate->ss_userdata;
378 ctx->nsubstates += s->nsubstates;
387 rexdfa_t *rex_db_todfa(rexdb_t *db, int withsubstates)
391 struct rexdfa_ctx ctx;
392 unsigned long nstates = rex_db_numstates(db);
393 unsigned long ntrans = rex_db_numtransitions(db);
394 unsigned long naccsubstates = rex_db_numaccsubstates(db);
395 unsigned long nsubstates = withsubstates ? rex_db_numsubstates(db) : 0UL;
396 dfa = rex_dfa_create(nstates, ntrans, naccsubstates, nsubstates);
397 r_memset(&ctx, 0, sizeof(ctx));
399 for (i = 0; i < r_array_length(db->states); i++) {
400 rexstate_t *state = rex_db_getstate(db, i);
401 rex_db_filldfastate(db, dfa, &ctx, state, withsubstates);
403 R_ASSERT(ctx.nstates == nstates);
404 R_ASSERT(ctx.ntrnas == ntrans);
405 R_ASSERT(ctx.nsubstates == nsubstates);
406 R_ASSERT(ctx.naccsubstates == naccsubstates);
411 int rex_db_isempty(rexdb_t *db)
415 return r_array_length(db->states) ? 0 : 1;