RPA Toolkit
Added macros for traversing rexdfa_t.
authorMartin Stoilov <martin@rpasearch.com>
Fri, 3 Feb 2012 05:35:45 +0000 (21:35 -0800)
committerMartin Stoilov <martin@rpasearch.com>
Fri, 3 Feb 2012 05:35:45 +0000 (21:35 -0800)
rex/rexdb.c
rex/rexdb.h
rex/rexdef.h
rex/rexdfa.c
rex/rexdfa.h
rex/rexstate.h
rexcc/rexcc.c
rexcc/unix/main.c
rexgrep/rexgrep.c
rexgrep/unix/main.c
rjs/rjs.c

index cac618d..f3218af 100644 (file)
@@ -7,6 +7,13 @@
 #include "rex/rexdfaconv.h"
 #include "rex/rexcompiler.h"
 
+struct rexdfa_ctx {
+       unsigned long nstates;
+       unsigned long ntrnas;
+       unsigned long nsubstates;
+       unsigned long naccsubstates;
+};
+
 
 rexdb_t *rex_db_createdfa(rexdb_t *nfa, unsigned long start)
 {
@@ -329,3 +336,68 @@ const char *rex_db_version()
 {
        return "1.0";
 }
+
+
+static void rex_db_filldfastate(rexdb_t *db, rexdfa_t *dfa, struct rexdfa_ctx *ctx, rexstate_t *state)
+{
+       long i;
+       rex_transition_t *t = NULL;
+       rexdfs_t *s = &dfa->states[ctx->nstates++];
+       s->type = state->type;
+       s->trans = ctx->ntrnas;
+       s->ntrans = r_array_length(state->trans);
+       for (i = 0; i < s->ntrans; i++) {
+               t = (rex_transition_t *)r_array_slot(state->trans, i);
+               dfa->trans[s->trans + i].lowin = t->lowin;
+               dfa->trans[s->trans + i].highin = t->highin;
+               dfa->trans[s->trans + i].state = t->dstuid;
+       }
+       ctx->ntrnas += s->ntrans;
+       s->substates = ctx->nsubstates;
+       s->nsubstates = rex_subset_length(state->subset);
+       for (i = 0; i < s->nsubstates; i++) {
+               unsigned long uid = rex_subset_index(state->subset, i);
+               rexsubstate_t *substate = rex_db_getsubstate(db, uid);
+               dfa->substates[s->substates + i].uid = uid;
+               dfa->substates[s->substates + i].type = substate->ss_type;
+               dfa->substates[s->substates + i].userdata = substate->ss_userdata;
+       }
+       ctx->nsubstates += s->nsubstates;
+       s->accsubstates = ctx->naccsubstates;
+       s->naccsubstates = 0L;
+       for (i = 0; i < s->nsubstates; i++) {
+               unsigned long uid = rex_subset_index(state->subset, i);
+               rexsubstate_t *substate = rex_db_getsubstate(db, uid);
+               if (substate->ss_type == REX_STATETYPE_ACCEPT) {
+                       dfa->accsubstates[s->accsubstates + s->naccsubstates].uid = uid;
+                       dfa->accsubstates[s->accsubstates + s->naccsubstates].type = substate->ss_type;
+                       dfa->accsubstates[s->accsubstates + s->naccsubstates].userdata = substate->ss_userdata;
+                       s->naccsubstates++;
+               }
+       }
+       ctx->naccsubstates += s->naccsubstates;
+}
+
+
+rexdfa_t *rex_db_todfa(rexdb_t *db)
+{
+       long i;
+       rexdfa_t *dfa;
+       struct rexdfa_ctx ctx;
+       unsigned long nstates = rex_db_numstates(db);
+       unsigned long ntrans = rex_db_numtransitions(db);
+       unsigned long nsubstates = rex_db_numsubstates(db);
+       unsigned long naccsubstates = rex_db_numaccsubstates(db);
+       dfa = rex_dfa_create(nstates, ntrans, nsubstates, naccsubstates);
+       r_memset(&ctx, 0, sizeof(ctx));
+
+       for (i = 0; i < r_array_length(db->states); i++) {
+               rexstate_t *state = rex_db_getstate(db, i);
+               rex_db_filldfastate(db, dfa, &ctx, state);
+       }
+       R_ASSERT(ctx.nstates == nstates);
+       R_ASSERT(ctx.ntrnas == ntrans);
+       R_ASSERT(ctx.nsubstates == nsubstates);
+       R_ASSERT(ctx.naccsubstates == naccsubstates);
+       return dfa;
+}
index 0737e24..58c6882 100644 (file)
@@ -69,7 +69,7 @@ long rex_db_numstates(rexdb_t *rexdb);
 long rex_db_numsubstates(rexdb_t *rexdb);
 long rex_db_numaccsubstates(rexdb_t *rexdb);
 const char *rex_db_version();
-rexdfa_t *rex_dfa_create_from_db(rexdb_t *db);
+rexdfa_t *rex_db_todfa(rexdb_t *db);
 
 
 /*
index 478867b..685f1e5 100644 (file)
 #ifndef _REXDEF_H_
 #define _REXDEF_H_
 
-#ifndef REX_USERDATA_TYPE
-typedef unsigned long rexuserdata_t;
-#else
-typedef REX_USERDATA_TYPE rexuserdata_t;
-#endif
-
-#ifndef REX_CHAR_TYPE
-typedef unsigned int rexchar_t;
-#else
-typedef REX_CHAR_TYPE rexchar_t;
-#endif
-#define REX_CHAR_MAX ((rexchar_t)-1)
-#define REX_CHAR_MIN ((rexchar_t)0)
-
+#include "rex/rexdfa.h"
 
 #endif /* _REXDEF_H_ */
index 8a3ca91..d5a6db5 100644 (file)
 #include <stdio.h>
 #include <ctype.h>
 #include "rlib/rmem.h"
+#include "rlib/rstring.h"
 #include "rex/rexdfa.h"
-#include "rex/rexdb.h"
-
-
-struct rexdfa_ctx {
-       unsigned long nstates;
-       unsigned long ntrnas;
-       unsigned long nsubstates;
-       unsigned long naccsubstates;
-};
 
 
 rexdfa_t *rex_dfa_create(unsigned long nstates, unsigned long ntrans, unsigned long nsubstates, unsigned long naccsubstates)
@@ -61,77 +53,12 @@ void rex_dfa_destroy(rexdfa_t *dfa)
 }
 
 
-static void rex_dfa_fillstate(rexdb_t *db, rexdfa_t *dfa, struct rexdfa_ctx *ctx, rexstate_t *state)
-{
-       long i;
-       rex_transition_t *t = NULL;
-       rexdfs_t *s = &dfa->states[ctx->nstates++];
-       s->type = state->type;
-       s->trans = ctx->ntrnas;
-       s->ntrans = r_array_length(state->trans);
-       for (i = 0; i < s->ntrans; i++) {
-               t = (rex_transition_t *)r_array_slot(state->trans, i);
-               dfa->trans[s->trans + i].lowin = t->lowin;
-               dfa->trans[s->trans + i].highin = t->highin;
-               dfa->trans[s->trans + i].state = t->dstuid;
-       }
-       ctx->ntrnas += s->ntrans;
-       s->substates = ctx->nsubstates;
-       s->nsubstates = rex_subset_length(state->subset);
-       for (i = 0; i < s->nsubstates; i++) {
-               unsigned long uid = rex_subset_index(state->subset, i);
-               rexsubstate_t *substate = rex_db_getsubstate(db, uid);
-               dfa->substates[s->substates + i].uid = uid;
-               dfa->substates[s->substates + i].type = substate->ss_type;
-               dfa->substates[s->substates + i].userdata = substate->ss_userdata;
-       }
-       ctx->nsubstates += s->nsubstates;
-       s->accsubstates = ctx->naccsubstates;
-       s->naccsubstates = 0L;
-       for (i = 0; i < s->nsubstates; i++) {
-               unsigned long uid = rex_subset_index(state->subset, i);
-               rexsubstate_t *substate = rex_db_getsubstate(db, uid);
-               if (substate->ss_type == REX_STATETYPE_ACCEPT) {
-                       dfa->accsubstates[s->accsubstates + s->naccsubstates].uid = uid;
-                       dfa->accsubstates[s->accsubstates + s->naccsubstates].type = substate->ss_type;
-                       dfa->accsubstates[s->accsubstates + s->naccsubstates].userdata = substate->ss_userdata;
-                       s->naccsubstates++;
-               }
-       }
-       ctx->naccsubstates += s->naccsubstates;
-}
-
-
-rexdfa_t *rex_dfa_create_from_db(rexdb_t *db)
-{
-       long i;
-       rexdfa_t *dfa;
-       struct rexdfa_ctx ctx;
-       unsigned long nstates = rex_db_numstates(db);
-       unsigned long ntrans = rex_db_numtransitions(db);
-       unsigned long nsubstates = rex_db_numsubstates(db);
-       unsigned long naccsubstates = rex_db_numaccsubstates(db);
-       dfa = rex_dfa_create(nstates, ntrans, nsubstates, naccsubstates);
-       r_memset(&ctx, 0, sizeof(ctx));
-
-       for (i = 0; i < r_array_length(db->states); i++) {
-               rexstate_t *state = rex_db_getstate(db, i);
-               rex_dfa_fillstate(db, dfa, &ctx, state);
-       }
-       R_ASSERT(ctx.nstates == nstates);
-       R_ASSERT(ctx.ntrnas == ntrans);
-       R_ASSERT(ctx.nsubstates == nsubstates);
-       R_ASSERT(ctx.naccsubstates == naccsubstates);
-       return dfa;
-}
-
-
 rexdfs_t *rex_dfa_state(rexdfa_t *dfa, unsigned long nstate)
 {
        rexdfs_t *s;
        if (nstate >= dfa->nstates)
                return NULL;
-       s = &dfa->states[nstate];
+       s = REX_DFA_STATE(dfa, nstate);
        return s;
 }
 
@@ -144,7 +71,7 @@ rexdft_t *rex_dfa_transition(rexdfa_t *dfa, unsigned long nstate, unsigned long
                return NULL;
        if (ntrans >= s->ntrans)
                return NULL;
-       t = &dfa->trans[s->trans + ntrans];
+       t = REX_DFA_TRANSITION(dfa, nstate, ntrans);
        return t;
 }
 
@@ -157,7 +84,7 @@ rexdfss_t *rex_dfa_substate(rexdfa_t *dfa, unsigned long nstate, unsigned long n
                return NULL;
        if (nsubstate >= s->nsubstates)
                return NULL;
-       ss = &dfa->substates[s->substates + nsubstate];
+       ss = REX_DFA_SUBSTATE(dfa, nstate, nsubstate);
        return ss;
 }
 
@@ -170,7 +97,7 @@ rexdfss_t *rex_dfa_accsubstate(rexdfa_t *dfa, unsigned long nstate, unsigned lon
                return NULL;
        if (naccsubstate >= s->naccsubstates)
                return NULL;
-       ss = &dfa->accsubstates[s->accsubstates + naccsubstate];
+       ss = REX_DFA_ACCSUBSTATE(dfa, nstate, naccsubstate);
        return ss;
 }
 
@@ -178,28 +105,19 @@ rexdfss_t *rex_dfa_accsubstate(rexdfa_t *dfa, unsigned long nstate, unsigned lon
 long rex_dfa_next(rexdfa_t *dfa, unsigned long nstate, rexchar_t input)
 {
        rexdft_t *t;
-       rexdfs_t *s = rex_dfa_state(dfa, nstate);
-       long min, max, mid;
-
-       if (!s || !s->ntrans)
-               return 0L;
-       min = 0;
-       max = min + s->ntrans;
+       long mid, min = 0, max = min + REX_DFA_STATE(dfa, nstate)->ntrans;
        while (max > min) {
                mid = (min + max)/2;
-               t = rex_dfa_transition(dfa, nstate, mid);
+               t = REX_DFA_TRANSITION(dfa, nstate, mid);
                if (input >= t->lowin) {
                        min = mid + 1;
                } else {
                        max = mid;
                }
        }
-       if (min > 0)
-               --min;
-       t = rex_dfa_transition(dfa, nstate, min);
-       if (input >= t->lowin && input <= t->highin)
-               return t->state;
-       return 0;
+       min -= (min > 0) ? 1 : 0;
+       t = REX_DFA_TRANSITION(dfa, nstate, min);
+       return (input >= t->lowin && input <= t->highin) ? t->state : 0;
 }
 
 
index b4f9922..7d8ee11 100644 (file)
 #ifndef _REXDFA_H_
 #define _REXDFA_H_
 
-#include "rex/rexdef.h"
-
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+#ifndef REX_USERDATA_TYPE
+typedef unsigned long rexuserdata_t;
+#else
+typedef REX_USERDATA_TYPE rexuserdata_t;
+#endif
+
+
+#ifndef REX_UINT_TYPE
+typedef unsigned long rexuint_t;
+#else
+typedef REX_UINT_TYPE rexuint_t;
+#endif
+
+
+#ifndef REX_CHAR_TYPE
+typedef unsigned int rexchar_t;
+#else
+typedef REX_CHAR_TYPE rexchar_t;
+#endif
+#define REX_CHAR_MAX ((rexchar_t)-1)
+#define REX_CHAR_MIN ((rexchar_t)0)
+
+typedef enum {
+       REX_STATETYPE_NONE = 0,
+       REX_STATETYPE_START = 1,
+       REX_STATETYPE_ACCEPT = 2,
+       REX_STATETYPE_DEAD = 3,
+} rex_statetype_t;
 
 #define REX_DFA_DEADSTATE (0)
 #define REX_DFA_STARTSTATE (1)
 
+#define REX_DFA_STATE(__dfa__, __nstate__)                                                     (&(__dfa__)->states[__nstate__])
+#define REX_DFA_TRANSITION(__dfa__, __nstate__, __ntrans__)                    (&(__dfa__)->trans[(REX_DFA_STATE(__dfa__, __nstate__)->trans) + (__ntrans__)])
+#define REX_DFA_SUBSTATE(__dfa__, __nstate__, __nsubstate__)           (&(__dfa__)->substates[REX_DFA_STATE(__dfa__, __nstate__)->substates + (__nsubstate__)])
+#define REX_DFA_ACCSUBSTATE(__dfa__, __nstate__, __naccsubstate__)     (&(__dfa__)->accsubstates[REX_DFA_STATE(__dfa__, __nstate__)->accsubstates + (__naccsubstate__)])
+#define REX_DFA_NEXT(__dfa__, __nstate__, __input__) \
+               ({ \
+                       rexdft_t *t; \
+                       long mid, min = 0, max = min + REX_DFA_STATE(__dfa__, __nstate__)->ntrans; \
+                       while (max > min) { \
+                               mid = (min + max)/2; \
+                               t = REX_DFA_TRANSITION(dfa, nstate, mid); \
+                               if ((__input__) >= t->lowin) { \
+                                       min = mid + 1; \
+                               } else { \
+                                       max = mid; \
+                               } \
+                       } \
+                       min -= (min > 0) ? 1 : 0; \
+                       t = REX_DFA_TRANSITION(__dfa__, __nstate__, min); \
+                       (((__input__) >= t->lowin && (__input__) <= t->highin) ? t->state : 0); \
+               })
+
+
 /*
  * Sub-state info definition
  */
index bbec916..7a5d6a2 100644 (file)
@@ -35,13 +35,6 @@ extern "C" {
 
 typedef struct rexstate_s rexstate_t;
 
-typedef enum {
-       REX_STATETYPE_NONE = 0,
-       REX_STATETYPE_START = 1,
-       REX_STATETYPE_ACCEPT = 2,
-       REX_STATETYPE_DEAD = 3,
-} rex_statetype_t;
-
 struct rexstate_s {
        robject_t obj;
        rarray_t *etrans;
index 9953a17..4885f8a 100644 (file)
@@ -229,7 +229,7 @@ static int rex_cc_output_dfa(rexcc_t *pCC, FILE *out)
 int rex_cc_output(rexcc_t *pCC, FILE *out)
 {
 
-       rex_cc_fprintf(out, 0, "#include \"rex/rexdfa.h\"\n\n");
+       rex_cc_fprintf(out, 0, "#include \"rexdfa.h\"\n\n");
 
        rex_cc_output_accsubstates(pCC, out);
        rex_cc_fprintf(out, 0, "\n\n");
index a6d6212..36d27f7 100644 (file)
@@ -183,7 +183,9 @@ int main(int argc, const char *argv[])
        for (i = 1; i < argc; i++) {
                if (strcmp(argv[i], "-f") == 0) {
                        if (++i < argc) {
+#if 0
                                rbuffer_t *text = rex_buffer_map_file(argv[i]);
+#endif
                        }
                }
        }
@@ -210,7 +212,7 @@ int main(int argc, const char *argv[])
        if (pCC->startuid < 0)
                goto error;
        tempdb = rex_db_createdfa(pCC->nfa, pCC->startuid);
-       pCC->dfa = rex_dfa_create_from_db(tempdb);
+       pCC->dfa = rex_db_todfa(tempdb);
        rex_db_destroy(tempdb);
 
        for (i = 1; i < argc; i++) {
index 3b2dad2..91c47df 100644 (file)
@@ -95,10 +95,10 @@ int rex_grep_dfamatch(rexgrep_t *pGrep, const char* input, const char *end)
        rexdfs_t *s;
 
        while ((inc = r_utf8_mbtowc(&wc, (const unsigned char*)input, (const unsigned char*)end)) > 0) {
-               if ((nstate = rex_dfa_next(dfa, nstate, wc)) <= 0)
+               if ((nstate = REX_DFA_NEXT(dfa, nstate, wc)) <= 0)
                        break;
                input += inc;
-               s = rex_dfa_state(dfa, nstate);
+               s = REX_DFA_STATE(dfa, nstate);
                if (s->type == REX_STATETYPE_ACCEPT)
                        ret = input - start;
        }
index 2cd5280..4ebfa91 100644 (file)
@@ -209,7 +209,7 @@ int main(int argc, const char *argv[])
 
        if (pGrep->usedfa) {
                rexdb_t *dfadb = rex_db_createdfa(pGrep->nfa, pGrep->startuid);
-               pGrep->dfa = rex_dfa_create_from_db(dfadb);
+               pGrep->dfa = rex_db_todfa(dfadb);
                rex_db_destroy(dfadb);
        }
 
index 24849c9..8102945 100644 (file)
--- a/rjs/rjs.c
+++ b/rjs/rjs.c
@@ -515,7 +515,6 @@ static void rjs_op_propldr(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
        rvmreg_t *arg1 = RVM_CPUREG_PTR(cpu, ins->op1);
        rvmreg_t *arg2 = RVM_CPUREG_PTR(cpu, ins->op2);
-       rvmreg_t tmp = rvm_reg_create_signed(0);
        rpointer value;
        long index;
 
@@ -663,7 +662,7 @@ static void rjs_op_propprev(rvmcpu_t *cpu, rvm_asmins_t *ins)
        RVM_REG_SETL(arg1, index);
 }
 
-
+#if 0
 static void rjs_op_mapdel(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
        int ret;
@@ -913,7 +912,7 @@ static void rjs_op_mapstr(rvmcpu_t *cpu, rvm_asmins_t *ins)
                RVM_ABORT(cpu, RVM_E_ILLEGAL);
        r_map_setvalue(a, index, arg1);
 }
-
+#endif
 
 const char *rjs_version()
 {