RPA Toolkit
Start reusing orphan records, instead of just generating waste of space.
authorMartin Stoilov <martin@rpasearch.com>
Thu, 28 Apr 2011 06:38:14 +0000 (23:38 -0700)
committerMartin Stoilov <martin@rpasearch.com>
Thu, 28 Apr 2011 06:38:14 +0000 (23:38 -0700)
rgrep/rpagrep.c
rgrep/rpagrep.h
rgrep/unix/main.c
rjs/ecma262.rpa
rpa2/rpacompiler.c
rpa2/rpacompiler.h
rpa2/rpastat.c
rpa2/rpastat.h
rpa2/rpavm.c
rpa2/rpavm.h

index ad90f09..ec1293b 100644 (file)
@@ -212,6 +212,9 @@ int rpa_grep_match(rpa_grep_t *pGrep, const char* buffer, unsigned long size)
                rpa_grep_output_utf8_string(pGrep, "\n");
        }
        pGrep->cachehit = hStat->cache->hit;
+       pGrep->orphrecords = r_array_length(hStat->orphans);
+       pGrep->emitstacksize = r_array_length(hStat->emitstack);
+
        rpa_stat_destroy(hStat);
        return 0;
 }
@@ -254,6 +257,9 @@ int rpa_grep_parse(rpa_grep_t *pGrep, const char* buffer, unsigned long size)
                r_array_destroy(records);
        }
        pGrep->cachehit = hStat->cache->hit;
+       pGrep->orphrecords = r_array_length(hStat->orphans);
+       pGrep->emitstacksize = r_array_length(hStat->emitstack);
+
        rpa_stat_destroy(hStat);
        return 0;
 }
@@ -273,10 +279,13 @@ int rpa_grep_scan(rpa_grep_t *pGrep, const char* buffer, unsigned long size)
        rpa_stat_encodingset(hStat, pGrep->encoding);
        hStat->debug = pGrep->execdebug;
        pGrep->cachehit = hStat->cache->hit;
+       pGrep->orphrecords = r_array_length(hStat->orphans);
 
 again:
        ret = rpa_stat_scan(hStat, pGrep->hPattern, input, start, end, &matched);
        pGrep->cachehit += hStat->cache->hit;
+       pGrep->orphrecords += r_array_length(hStat->orphans);
+
        if (ret > 0) {
                if (!displayed) {
                        displayed = 1;
@@ -290,6 +299,7 @@ again:
                goto again;
        }
 
+       pGrep->emitstacksize = r_array_length(hStat->emitstack);
        rpa_stat_destroy(hStat);
        return 0;
 }
index bf07cba..0260c31 100644 (file)
@@ -50,6 +50,8 @@ typedef struct rpa_grep_s {
        unsigned long scanmilisec;
        unsigned long usedstack;
        unsigned long cachehit;
+       unsigned long orphrecords;
+       unsigned long emitstacksize;            // Must be ZERO, if not something is going wrong
        unsigned int icase;
        unsigned int encoding;
        unsigned int greptype;
index f50b2f2..9d0271b 100644 (file)
@@ -274,8 +274,10 @@ end:
                        milsec = 1;
                minutes = milsec/60000;
                sec = (milsec%60000)/1000.0;
-               fprintf(stdout, "\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld\n",
-                               minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (rlong)r_debug_get_maxmem()/1000, (rlong)r_debug_get_allocmem(), pGrep->cachehit);
+               fprintf(stdout, "\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld, "
+                               "orphan records: %ld, emitstacksize: %ld\n",
+                               minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (rlong)r_debug_get_maxmem()/1000, (rlong)r_debug_get_allocmem(),
+                               pGrep->cachehit, pGrep->orphrecords, pGrep->emitstacksize);
        }
 
        rpa_grep_destroy(pGrep);
index 6d31869..108e19a 100644 (file)
@@ -92,7 +92,7 @@ Token                                 ::= <IdentifierName> |
 
 # 7.6 Identifier Names and Identifiers
 
-Identifier                             ::= <IdentifierName> - (<ReservedWord> - <ReservedWord> <IdentifierPart>)
+Identifier                             ::= <IdentifierName>  - (<ReservedWord> - <ReservedWord> <IdentifierPart>)
 IdentifierName                 ::= <IdentifierStart> <IdentifierPart>*
 IdentifierStart                        ::= <UnicodeLetter> | '$' | '_' | '\' <UnicodeLetter>
 UnicodeLetter                  ::= [#0x0041-#0x005A] | [#0x00C0-#0x00DE] | [#0x0100-#0x0232] | [#0x0061-#0x007A] | [#0x00C0-#0x00DE]       # <Lu> | <Ll>
index 47a13ba..8cab264 100644 (file)
@@ -291,14 +291,13 @@ rint rpa_compiler_loop_begin(rpa_compiler_t *co, const rchar *name, ruint namesi
        rpa_ruledef_t exp;
        rlong ruleuid = RPA_RECORD_INVALID_UID;
        rulong flags = 0;
-       rpa_rulepref_t *rulepref = rpa_compiler_rulepref_lookup(co, name, namesize);
-
-       if (rulepref) {
-               flags = rulepref->flags;
-               ruleuid = rulepref->ruleuid;
-       }
 
        r_memset(&exp, 0, sizeof(exp));
+       exp.rulepref = rpa_compiler_rulepref_lookup(co, name, namesize);
+       if (exp.rulepref) {
+               flags = exp.rulepref->flags;
+               ruleuid = exp.rulepref->ruleuid;
+       }
        exp.start = rvm_codegen_getcodesize(co->cg);
        exp.startidx = rvm_codegen_addlabel(co->cg, name, namesize);
        exp.endidx = rpa_codegen_invalid_add_numlabel_s(co->cg, "__end", exp.start);
@@ -313,8 +312,10 @@ rint rpa_compiler_loop_begin(rpa_compiler_t *co, const rchar *name, ruint namesi
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_REC)|BIT(R_LOO)|BIT(R_TOP)|BIT(LR)));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R0, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_LOO, R3, XX, 0));
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
-
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_PUSHSTARTREC, DA, XX, XX, 0));
+       }
        r_array_add(co->expressions, &exp);
        return 0;
 }
@@ -335,19 +336,28 @@ rint rpa_compiler_loop_end(rpa_compiler_t *co)
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_LOO)|BIT(R_OTP)|BIT(LR)));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R_OTP, 0));
        rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BRANCH, exp.failidx, rvm_asm(RVM_BEQ, DA, XX, XX, 0));  // ------------- R_TOP is the same
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITEND, DA, R_OTP, R0, 0));     //          |
-       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R3, R0, XX, 0));                                                                                            //          |
-       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_TOP, R_OTP, XX, 0));                                                                                      //          |
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BRANCH, exp.loopidx, rvm_asm(RVM_B, DA, XX, XX, 0));            //          |
-       rvm_codegen_redefinelabel(co->cg, exp.failidx);                                                                                 //          |
-       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_REC, R1, XX, 0));                 //        <-------------------------------------
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {                                            //       |
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITEND, DA, R_OTP, R0, 0)); //      |
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPSTARTREC, DA, XX, XX, 0));//      |
+       }
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R3, R0, XX, 0));                                                //          |
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_TOP, R_OTP, XX, 0));                                          //          |
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BRANCH, exp.loopidx, rvm_asm(RVM_B, DA, XX, XX, 0));        //          |
+       rvm_codegen_redefinelabel(co->cg, exp.failidx);                                                             //          |
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_REC, R1, XX, 0));          //        <-------------------------------------
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPORPHANREC, DA, XX, XX, 0));
+       }
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
 
        /*
         *  END FAILED:
         */
        rvm_codegen_redefinelabel(co->cg, exp.endidx);
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPORPHANREC, DA, XX, XX, 0));
+       }
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R3, R_LOO, XX, 0));         // Save LOO to R3 before restoring the old one
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R0, XX, XX, 0));            // Pop the accumulated ret, use it to save the status for return
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_REC)|BIT(R_LOO)|BIT(R_TOP)|BIT(LR)));
@@ -366,14 +376,13 @@ rint rpa_compiler_rule_begin(rpa_compiler_t *co, const rchar *name, ruint namesi
        rpa_ruledef_t exp;
        rlong ruleuid = RPA_RECORD_INVALID_UID;
        rulong flags = 0;
-       rpa_rulepref_t *rulepref = rpa_compiler_rulepref_lookup(co, name, namesize);
-
-       if (rulepref) {
-               flags = rulepref->flags;
-               ruleuid = rulepref->ruleuid;
-       }
 
        r_memset(&exp, 0, sizeof(exp));
+       exp.rulepref = rpa_compiler_rulepref_lookup(co, name, namesize);
+       if (exp.rulepref) {
+               flags = exp.rulepref->flags;
+               ruleuid = exp.rulepref->ruleuid;
+       }
        exp.start = rvm_codegen_getcodesize(co->cg);
        exp.startidx = rvm_codegen_addlabel(co->cg, name, namesize);
        exp.endidx = rpa_codegen_invalid_add_numlabel_s(co->cg, "__end", exp.start);
@@ -384,7 +393,11 @@ rint rpa_compiler_rule_begin(rpa_compiler_t *co, const rchar *name, ruint namesi
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R_REC, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R_TOP, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_PUSHSTARTREC, DA, XX, XX, 0));
+       }
 
        r_array_add(co->expressions, &exp);
        return 0;
@@ -405,12 +418,21 @@ rint rpa_compiler_rule_end(rpa_compiler_t *co)
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R_OTP, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R_OTP, 0));
-       rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 4));
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITEND, DA, R_OTP, R0, 0));
+
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 5));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITEND, DA, R_OTP, R0, 0));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPSTARTREC, DA, XX, XX, 0));
+       } else {
+               rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 3));
+       }
        rvm_codegen_addins(co->cg, rvm_asml(RPA_SETCACHE, DA, R1, R_REC, exp.start));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_REC, R1, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPORPHANREC, DA, XX, XX, 0));
+       }
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
        rvm_codegen_redefinelabel(co->cg, exp.endidx);
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, LR, XX, XX, 0));
@@ -418,6 +440,9 @@ rint rpa_compiler_rule_end(rpa_compiler_t *co)
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R_REC, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_OTP, R_TOP, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPORPHANREC, DA, XX, XX, 0));
+       }
        rvm_codegen_addins(co->cg, rvm_asml(RPA_SETCACHE, DA, R_REC, R_REC, exp.start));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
        return 0;
@@ -429,14 +454,14 @@ rint rpa_compiler_inlinerule_begin(rpa_compiler_t *co, const rchar *name, ruint
        rpa_ruledef_t exp;
        rlong ruleuid = RPA_RECORD_INVALID_UID;
        rulong ruleflags = 0;
-       rpa_rulepref_t *rulepref = rpa_compiler_rulepref_lookup(co, name, namesize);
 
-       if (rulepref) {
-               ruleflags = rulepref->flags;
-               ruleuid = rulepref->ruleuid;
+       r_memset(&exp, 0, sizeof(exp));
+       exp.rulepref = rpa_compiler_rulepref_lookup(co, name, namesize);
+       if (exp.rulepref) {
+               ruleflags = exp.rulepref->flags;
+               ruleuid = exp.rulepref->ruleuid;
        }
 
-       r_memset(&exp, 0, sizeof(exp));
        exp.branch = rvm_codegen_addins(co->cg, rvm_asm(RVM_B, DA, XX, XX, 0));
        exp.start = rvm_codegen_getcodesize(co->cg);
        exp.startidx = rpa_codegen_add_numlabel_s(co->cg, "__inlined", exp.start);
@@ -446,7 +471,10 @@ rint rpa_compiler_inlinerule_begin(rpa_compiler_t *co, const rchar *name, ruint
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R_REC, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R_TOP, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_PUSHSTARTREC, DA, XX, XX, 0));
+       }
        r_array_add(co->expressions, &exp);
        return 0;
 }
@@ -466,18 +494,28 @@ rint rpa_compiler_inlinerule_end(rpa_compiler_t *co)
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R_OTP, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R_OTP, 0));
-       rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 3));
-       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITEND, DA, R_OTP, R0, 0));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 4));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_EMITEND, DA, R_OTP, R0, 0));
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPSTARTREC, DA, XX, XX, 0));
+       } else {
+               rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 2));
+       }
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R_REC, R1, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPORPHANREC, DA, XX, XX, 0));
+       }
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
        rvm_codegen_redefinelabel(co->cg, exp.endidx);
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, LR, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R_TOP, XX, XX, 0));
        rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R_REC, XX, XX, 0));
-
        rvm_codegen_addins(co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
+       if (exp.rulepref && (exp.rulepref->flags & RPA_RFLAG_EMITRECORD)) {
+               rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_BLOB, exp.dataidx, rvm_asm(RPA_POPORPHANREC, DA, XX, XX, 0));
+       }
        rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
        rvm_codegen_replaceins(co->cg, exp.branch, rvm_asm(RVM_B, DA, XX, XX, rvm_codegen_getcodesize(co->cg) - exp.branch));
        rpa_compiler_index_reference(co, exp.startidx, (exp.flags & RPA_MATCH_MASK));
index fce0813..3530621 100644 (file)
@@ -11,6 +11,19 @@ extern "C" {
 
 #define RPA_RULENAME_MAXSIZE 256
 
+
+#define RPA_COMPILER_CURRENTEXP(__co__) ((rpa_ruledef_t*)r_array_lastslot((__co__)->expressions))
+
+/*
+ * The rpa_rulepref_t user preferences are compiled into
+ * rpa_ruledata_t and access in runtime.
+ */
+typedef struct rpa_rulepref_s {
+       rlong ruleuid;
+       rulong flags;
+} rpa_rulepref_t;
+
+
 /*
  * This should be renamed to rpa_expdef_s
  */
@@ -26,20 +39,10 @@ typedef struct rpa_ruledef_s {
        rlong againidx;
        ruint ruleuid;
        ruint flags;
+       rpa_rulepref_t *rulepref;
 } rpa_ruledef_t;
 
 
-#define RPA_COMPILER_CURRENTEXP(__co__) ((rpa_ruledef_t*)r_array_lastslot((__co__)->expressions))
-
-/*
- * The rpa_rulepref_t user preferences are compiled into
- * rpa_ruledata_t and access in runtime.
- */
-typedef struct rpa_rulepref_s {
-       rlong ruleuid;
-       rulong flags;
-} rpa_rulepref_t;
-
 typedef struct rpa_compiler_s {
        rvm_codegen_t *cg;
        rharray_t *ruleprefs;
index 8060c76..ff80deb 100644 (file)
@@ -24,6 +24,8 @@ rpastat_t *rpa_stat_create(rpadbex_t *dbex, rulong stacksize)
        }
        stat->dbex = dbex;
        stat->records = r_array_create(sizeof(rparecord_t));
+       stat->emitstack = r_array_create(sizeof(rlong));
+       stat->orphans = r_array_create(sizeof(rlong));
        stat->cpu->userdata1 = stat;
 
        return stat;
@@ -35,7 +37,9 @@ void rpa_stat_destroy(rpastat_t *stat)
        if (stat) {
                if (stat->instack)
                        r_free(stat->instackbuffer);
-               r_object_destroy((robject_t*)stat->records);
+               r_array_destroy(stat->records);
+               r_array_destroy(stat->emitstack);
+               r_array_destroy(stat->orphans);
                rpavm_cpu_destroy(stat->cpu);
                rpa_cache_destroy(stat->cache);
                r_free(stat);
@@ -67,7 +71,8 @@ rint rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, cons
        stat->end = end;
        stat->input = input;
        stat->error = 0;
-       stat->cursize = 0;
+       r_array_setlength(stat->orphans, 0);
+       r_array_setlength(stat->emitstack, 0);
        stat->cache->hit = 0;
        if (stat->instacksize < size) {
                stat->instackbuffer = r_realloc(stat->instackbuffer, (size + 2) * sizeof(rpainput_t));
index db085d5..5ff542b 100644 (file)
@@ -35,10 +35,11 @@ struct rpastat_s {
        ruint encoding;
        ruint debug;
        rarray_t *records;
+       rarray_t *emitstack;
+       rarray_t *orphans;
        rpainput_t *instackbuffer;
        rpainput_t *instack;                    /* instack = &instackbuffer[1]; This allows R_TOP = -1, without any additional checks */
        rulong instacksize;
-       rulong cursize;
        rpacache_t *cache;
        rpainmap_t ip;
        rvmcpu_t *cpu;
index 0d1f175..98b8add 100644 (file)
@@ -252,8 +252,14 @@ static void rpavm_swi_emitstart(rvmcpu_t *cpu, rvm_asmins_t *ins)
        if (!(ruledata->flags & RPA_RFLAG_EMITRECORD))
                return;
        R_ASSERT(RVM_CPUREG_GETL(cpu, R_REC) >= 0);
-//     index = r_array_replace(stat->records, index + 1, NULL);
-       index = r_array_add(stat->records, NULL);
+       if (r_array_length(stat->orphans) > 0) {
+               /*
+                * First let see if there is an orphan record that we can adopt.
+                */
+               index = r_array_pop(stat->orphans, rlong);
+       } else {
+               index = r_array_add(stat->records, NULL);
+       }
 
        /*
         * Important: get the pointer to crec after modifying the array, because if
@@ -268,6 +274,7 @@ static void rpavm_swi_emitstart(rvmcpu_t *cpu, rvm_asmins_t *ins)
        rec->type = RPA_RECORD_START;
        rec->input = stat->instack[tp].input;
        rec->inputsiz = 0;
+       rec->next = 0;
        crec->next = index;
 }
 
@@ -289,9 +296,15 @@ static void rpavm_swi_emitend(rvmcpu_t *cpu, rvm_asmins_t *ins)
                return;
 
        R_ASSERT(RVM_CPUREG_GETL(cpu, R_REC) >= 0);
-//     index = r_array_replace(stat->records, index + 1, NULL);
-       index = r_array_add(stat->records, NULL);
 
+       if (r_array_length(stat->orphans) > 0) {
+               /*
+                * First let see if there is an orphan record that we can adopt.
+                */
+               index = r_array_pop(stat->orphans, rlong);
+       } else {
+               index = r_array_add(stat->records, NULL);
+       }
        /*
         * Important: get the pointer to crec after modifying the array, because if
         * it gets reallocated the pointer will be invalid.
@@ -306,6 +319,7 @@ static void rpavm_swi_emitend(rvmcpu_t *cpu, rvm_asmins_t *ins)
        rec->ruleuid = ruledata->ruleuid;
        rec->input = stat->instack[tp].input;
        rec->inputsiz = stat->instack[tp + tplen].input - stat->instack[tp].input;
+       rec->next = 0;
        crec->next = index;
 
        if (tplen) {
@@ -314,60 +328,67 @@ static void rpavm_swi_emitend(rvmcpu_t *cpu, rvm_asmins_t *ins)
 }
 
 
-static void rpavm_swi_prninfo(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpavm_swi_pushstart(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
        rpastat_t *stat = (rpastat_t *)cpu->userdata1;
        rpa_ruledata_t *ruledata = RVM_CPUREG_GETP(cpu, ins->op1);
-       rstr_t name = {"unknown", 7};
-       if (!stat->debug)
+       rlong index = RVM_CPUREG_GETL(cpu, R_REC);
+       if (!(ruledata->flags & RPA_RFLAG_EMITRECORD))
                return;
-       if (ruledata) {
-               name.str = (rchar*)ruledata + ruledata->name;
-               name.size = ruledata->namesize;
-       }
 
-       r_printf("%s: ", name.str);
-       rvm_cpu_dumpregs(cpu, ins);
+       r_array_add(stat->emitstack, &index);
 }
 
 
-static void rpavm_swi_getnextrec(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpavm_swi_popstart(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
+       rlong index;
        rpastat_t *stat = (rpastat_t *)cpu->userdata1;
-       rlong rec = RVM_CPUREG_GETL(cpu, ins->op2);
-
-       rparecord_t *prec = (rparecord_t *)r_array_slot(stat->records, rec);
-
-//     r_printf("%s, rec = %ld, next = %ld\n", __FUNCTION__, rec, prec->next);
-       RVM_CPUREG_SETL(cpu, ins->op1, prec->next);
+       rpa_ruledata_t *ruledata = RVM_CPUREG_GETP(cpu, ins->op1);
+       if (!(ruledata->flags & RPA_RFLAG_EMITRECORD))
+               return;
+       index = r_array_pop(stat->emitstack, rlong);
 }
 
 
-static void rpavm_swi_getreclen(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpavm_swi_poporphan(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
+       rlong index;
        rpastat_t *stat = (rpastat_t *)cpu->userdata1;
-       RVM_CPUREG_SETU(cpu, ins->op1, r_array_length(stat->records));
+       rpa_ruledata_t *ruledata = RVM_CPUREG_GETP(cpu, ins->op1);
+       if (!(ruledata->flags & RPA_RFLAG_EMITRECORD))
+               return;
+       index = r_array_pop(stat->emitstack, rlong);
+       r_array_add(stat->orphans, &index);
 }
 
 
-static void rpavm_swi_setreclen(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpavm_swi_prninfo(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
        rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rpa_ruledata_t *ruledata = RVM_CPUREG_GETP(cpu, ins->op1);
+       rstr_t name = {"unknown", 7};
+       if (!stat->debug)
+               return;
+       if (ruledata) {
+               name.str = (rchar*)ruledata + ruledata->name;
+               name.size = ruledata->namesize;
+       }
 
-       r_array_setlength(stat->records, (ruint)RVM_CPUREG_GETU(cpu, ins->op1));
-       RVM_CPUREG_SETL(cpu, R_REC, RVM_CPUREG_GETU(cpu, ins->op1) - 1);
+       r_printf("%s: ", name.str);
+       rvm_cpu_dumpregs(cpu, ins);
 }
 
 
-static void rpavm_swi_getcurrec(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpavm_swi_getnextrec(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
-       RVM_CPUREG_SETL(cpu, ins->op1, RVM_CPUREG_GETL(cpu, R_REC));
-}
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rlong rec = RVM_CPUREG_GETL(cpu, ins->op2);
 
+       rparecord_t *prec = (rparecord_t *)r_array_slot(stat->records, rec);
 
-static void rpavm_swi_setcurrec(rvmcpu_t *cpu, rvm_asmins_t *ins)
-{
-       RVM_CPUREG_SETL(cpu, R_REC, RVM_CPUREG_GETU(cpu, ins->op1));
+//     r_printf("%s, rec = %ld, next = %ld\n", __FUNCTION__, rec, prec->next);
+       RVM_CPUREG_SETL(cpu, ins->op1, prec->next);
 }
 
 
@@ -391,23 +412,8 @@ static void rpavm_swi_setcache(rvmcpu_t *cpu, rvm_asmins_t *ins)
                prec = (rparecord_t *)r_array_slot(stat->records, startrec);
 //             r_printf("Set the cache for: %s (%ld, %ld), top = %ld, ret = %ld, ruleid=%ld\n", prec->rule, startrec, endrec, prec->top, r0, ruleid);
                rpa_cache_set(stat->cache, top, ruleid, r0, startrec, endrec);
-
-               /*
-                * The next optimization is supposed to reduce the size of
-                * garbage records.
-                */
-               if (stat->cursize < endrec + 1)
-                       stat->cursize = endrec + 1;
        } else {
                rpa_cache_set(stat->cache, top, ruleid, r0, 0, 0);
-               /*
-                * The next optimization is supposed to reduce the size of
-                * garbage records.
-                */
-               if (stat->cursize < endrec + 1) {
-                       r_array_setlength(stat->records, endrec + 1);
-                       stat->cursize = endrec + 1;
-               }
        }
 }
 
@@ -446,28 +452,6 @@ static void rpavm_swi_checkcache(rvmcpu_t *cpu, rvm_asmins_t *ins)
 }
 
 
-static void rpavm_swi_matchchrinstr_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
-{
-       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
-       rchar *str = RVM_CPUREG_GETSTR(cpu, ins->op1);
-       rword matched = 0;
-       rword wc;
-
-       while (*str) {
-               wc = *str++;
-               if (rpa_stat_matchchr(stat, RVM_CPUREG_GETL(cpu, R_TOP), wc) > 0) {
-                       rpavm_swi_shift(cpu, ins);
-                       matched = 1;
-                       break;
-               }
-       }
-       cpu->status = matched ? 0 : RVM_STATUS_N;
-       RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
-}
-
-
-
-
 static rvm_switable_t rpavm_swi_table[] = {
                {"RPA_MATCHCHR_NAN", rpavm_swi_matchchr_nan},
                {"RPA_MATCHCHR_OPT", rpavm_swi_matchchr_opt},
@@ -491,12 +475,9 @@ static rvm_switable_t rpavm_swi_table[] = {
                {"RPA_EMITTAIL", rpavm_swi_emittail},
                {"RPA_GETNEXTREC", rpavm_swi_getnextrec},
                {"RPA_PRNINFO", rpavm_swi_prninfo},
-               {"RPA_MATCHCHRINSTR_NAN", rpavm_swi_matchchrinstr_nan},
-
-               {"RPA_GETRECLEN", rpavm_swi_getreclen},
-               {"RPA_SETRECLEN", rpavm_swi_setreclen},
-               {"RPA_GETCURREC", rpavm_swi_getcurrec},
-               {"RPA_SETCURREC", rpavm_swi_setcurrec},
+               {"RPA_PUSHSTARTREC", rpavm_swi_pushstart},
+               {"RPA_POPSTARTREC", rpavm_swi_popstart},
+               {"RPA_POPORPHANREC", rpavm_swi_poporphan},
                {NULL, NULL},
 };
 
index 3188beb..15fb4d8 100644 (file)
@@ -53,7 +53,9 @@ extern "C" {
 #define RPA_EMITTAIL           RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 19))
 #define RPA_GETNEXTREC         RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 20))
 #define RPA_PRNINFO                    RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 21))
-#define RPA_MATCHCHRINSTR_NAN  RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 21))
+#define RPA_PUSHSTARTREC       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 22))
+#define RPA_POPSTARTREC                RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 23))
+#define RPA_POPORPHANREC       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 24))