RPA Toolkit
work on RVM based regex
authorMartin Stoilov <martin@rpasearch.com>
Fri, 11 Feb 2011 05:17:11 +0000 (21:17 -0800)
committerMartin Stoilov <martin@rpasearch.com>
Fri, 11 Feb 2011 05:17:11 +0000 (21:17 -0800)
rvm/rvmcpu.c
rvm/rvmcpu.h
rvm/rvmscope.c
rvm/rvmscope.h
tests/regex-test.c

index 3ddfe6f..4905d63 100644 (file)
@@ -1133,7 +1133,7 @@ static void rvm_cpu_dumpregs(rvm_asmins_t *pi, rvmcpu_t *vm)
        buffer[50] = '\0';
        rvm_printf("%s", buffer);
 
-       rvm_printf("0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, TP=%p, FP=%ld, SP=%ld, LR=%ld, PC=%ld, DA=0x%lx, S( %c%c%c%c )",
+       rvm_printf("0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, 0x%lx, TP=%p, FP=%ld, SP=%ld, LR=%ld, PC=%ld, DA=0x%lx, S( %c%c%c%c%c )",
                RVM_CPUREG_GETU(vm, 0), RVM_CPUREG_GETU(vm, 1), RVM_CPUREG_GETU(vm, 2), RVM_CPUREG_GETU(vm, 3),
                RVM_CPUREG_GETU(vm, 4), RVM_CPUREG_GETU(vm, 5), RVM_CPUREG_GETU(vm, 6), RVM_CPUREG_GETU(vm, 7),
                RVM_CPUREG_GETU(vm, 8), RVM_CPUREG_GETP(vm, TP), (long int)RVM_CPUREG_GETU(vm, FP), (long int)RVM_CPUREG_GETU(vm, SP),
@@ -1953,6 +1953,40 @@ rvm_asmins_t rvm_asmp(rword opcode, rword op1, rword op2, rword op3, rpointer da
 }
 
 
+rvm_asmins_t rvm_asms(rword opcode, rword op1, rword op2, rword op3, rword data)
+{
+       rvm_asmins_t a;
+
+       r_memset(&a, 0, sizeof(a));
+       a.opcode = (ruint8) opcode;
+       a.op1 = (ruint8)op1;
+       a.op2 = (ruint8)op2;
+       a.op3 = (ruint8)op3;
+       a.data.u = (rword)data;
+       a.type = RVM_DTYPE_SWIID;
+       if ((ruint8)op1 == DA || (ruint8)op2 == DA || (ruint8)op3 == DA)
+               a.da = 1;
+       return a;
+}
+
+
+rvm_asmins_t rvm_asmf(rword opcode, rword op1, rword op2, rword op3, rword data)
+{
+       rvm_asmins_t a;
+
+       r_memset(&a, 0, sizeof(a));
+       a.opcode = (ruint8) opcode;
+       a.op1 = (ruint8)op1;
+       a.op2 = (ruint8)op2;
+       a.op3 = (ruint8)op3;
+       a.data.u = (rword)data;
+       a.type = RVM_DTYPE_FUNCTION;
+       if ((ruint8)op1 == DA || (ruint8)op2 == DA || (ruint8)op3 == DA)
+               a.da = 1;
+       return a;
+}
+
+
 rvm_asmins_t rvm_asm(rword opcode, rword op1, rword op2, rword op3, rword data)
 {
        rvm_asmins_t a;
index a2e86ea..72b6294 100644 (file)
@@ -335,6 +335,8 @@ rvm_asmins_t rvm_asml(rword opcode, rword op1, rword op2, rword op3, rlong data)
 rvm_asmins_t rvm_asmb(rword opcode, rword op1, rword op2, rword op3, rword data);
 rvm_asmins_t rvm_asmd(rword opcode, rword op1, rword op2, rword op3, rdouble data);
 rvm_asmins_t rvm_asmp(rword opcode, rword op1, rword op2, rword op3, rpointer data);
+rvm_asmins_t rvm_asms(rword opcode, rword op1, rword op2, rword op3, rword data);
+rvm_asmins_t rvm_asmf(rword opcode, rword op1, rword op2, rword op3, rword data);
 rvm_asmins_t rvm_asm2(rword opcode, rword op1, rword op2, rword op3, ruint32 p1, ruint32 p2);
 rvm_asmins_t rvm_asmr(rword opcode, rword op1, rword op2, rword op3, rpointer pReloc);
 rvm_asmins_t rvm_asmx(rword opcode, rword op1, rword op2, rword op3, rpointer pReloc);
index dc13ef4..8f15719 100644 (file)
@@ -105,6 +105,18 @@ void rvm_scope_addpointer(rvm_scope_t *scope, const rchar *name, ruint namesize,
 }
 
 
+void rvm_scope_addoffset_s(rvm_scope_t *scope, const rchar *name, ruint32 off)
+{
+       rvm_scope_addoffset(scope, name ,r_strlen(name), off);
+}
+
+
+void rvm_scope_addpointer_s(rvm_scope_t *scope, const rchar *name, rpointer ptr)
+{
+       rvm_scope_addpointer(scope, name ,r_strlen(name), ptr);
+}
+
+
 rvm_varmap_t *rvm_scope_lookup(rvm_scope_t *scope, const rchar *name, ruint namesize)
 {
        ruint scopelen = r_array_length(scope->varstack);
index 4819a52..662fc4f 100644 (file)
@@ -38,6 +38,8 @@ rchar *rvm_scope_addname(rvm_scope_t *scope, const rchar *name, ruint namesize);
 rchar *rvm_scope_addstrname(rvm_scope_t *scope, const rchar *name);
 void rvm_scope_addoffset(rvm_scope_t *scope, const rchar *name, ruint namesize, ruint32 off);
 void rvm_scope_addpointer(rvm_scope_t *scope, const rchar *name, ruint namesize, rpointer ptr);
+void rvm_scope_addoffset_s(rvm_scope_t *scope, const rchar *name, ruint32 off);
+void rvm_scope_addpointer_s(rvm_scope_t *scope, const rchar *name, rpointer ptr);
 void rvm_scope_push(rvm_scope_t* scope);
 void rvm_scope_pop(rvm_scope_t* scope);
 ruint rvm_scope_count(rvm_scope_t* scope);
index 238d13d..9eaa6d8 100644 (file)
@@ -1,5 +1,7 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include "rvmcodegen.h"
+#include "rvmscope.h"
 #include "rvmcpu.h"
 #include "rmem.h"
 #include "rutf.h"
 static ruint regextable;
 static int debuginfo = 0;
 static int parseinfo = 0;
+static int compileonly = 0;
+
 
 #define RPA_MATCHCHR           RVM_OPSWI(RVM_SWI_ID(regextable, 0))
-#define RPA_MATCHCHR_OPT       RVM_OPSWI(RVM_SWI_ID(regextable, 1))
-#define RPA_MATCHCHR_MUL       RVM_OPSWI(RVM_SWI_ID(regextable, 2))
-#define RPA_MATCHCHR_MOP       RVM_OPSWI(RVM_SWI_ID(regextable, 3))
-#define RPA_MATCHRNG           RVM_OPSWI(RVM_SWI_ID(regextable, 4))
-#define RPA_MATCHRNG_OPT       RVM_OPSWI(RVM_SWI_ID(regextable, 5))
-#define RPA_MATCHRNG_MUL       RVM_OPSWI(RVM_SWI_ID(regextable, 6))
-#define RPA_MATCHRNG_MOP       RVM_OPSWI(RVM_SWI_ID(regextable, 7))
-#define RPA_SHIFT                      RVM_OPSWI(RVM_SWI_ID(regextable, 8))
+#define RPA_EQMATCHCHR                 RVM_OPSWI(RVM_SWI_ID(regextable, 1))
+#define RPA_NEQMATCHCHR                RVM_OPSWI(RVM_SWI_ID(regextable, 2))
+#define RPA_MATCHRNG           RVM_OPSWI(RVM_SWI_ID(regextable, 3))
+#define RPA_EQMATCHRNG                 RVM_OPSWI(RVM_SWI_ID(regextable, 4))
+#define RPA_NEQMATCHRNG        RVM_OPSWI(RVM_SWI_ID(regextable, 5))
+#define RPA_SHIFT                      RVM_OPSWI(RVM_SWI_ID(regextable, 6))
+#define RPA_EQSHIFT                    RVM_OPSWI(RVM_SWI_ID(regextable, 7))
+#define RPA_NEQSHIFT           RVM_OPSWI(RVM_SWI_ID(regextable, 8))
+
+
+typedef struct rpa_compiler_s {
+       rvm_codegen_t *cg;
+       rboolean optimized;
+       rvm_scope_t *scope;
+       rulong fpoff;
+} rpa_compiler_t;
 
 
 typedef struct rpainput_s {
@@ -44,6 +56,26 @@ typedef struct rpastat_s {
 } rpastat_t;
 
 
+rpa_compiler_t *rpa_compiler_create()
+{
+       rpa_compiler_t *co;
+
+       co = r_malloc(sizeof(*co));
+       r_memset(co, 0, sizeof(*co));
+       co->cg = rvm_codegen_create();
+       co->scope = rvm_scope_create();
+       return co;
+}
+
+
+void rpa_compiler_destroy(rpa_compiler_t *co)
+{
+       if (co) {
+               rvm_codegen_destroy(co->cg);
+               rvm_scope_destroy(co->scope);
+       }
+}
+
 rpastat_t *rpa_stat_create()
 {
        rpastat_t *stat = (rpastat_t *) r_zmalloc(sizeof(*stat));
@@ -87,58 +119,52 @@ void rpa_stat_destroy(rpastat_t *stat)
 }
 
 
-static void rpa_matchchr(rvmcpu_t *cpu, rvm_asmins_t *ins)
-{
-       rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
 
-       res = op2;
-       RVM_CPUREG_SETU(cpu, R0, res);
-}
-
-
-static void rpa_matchchr_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpa_matchchr(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rlong tp = RVM_CPUREG_GETL(cpu, TP);
+       rword op1 = RVM_CPUREG_GETU(cpu, ins->op1);
 
+       RVM_STATUS_UPDATE(cpu, RVM_STATUS_Z, (!(cpu->status & RVM_STATUS_V) && stat->instack[tp].wc == op1) ? 1 : 0);
 }
 
 
-static void rpa_matchchr_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpa_eqmatchchr(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
-
+       if (cpu->status & RVM_STATUS_Z)
+               rpa_matchchr(cpu, ins);
 }
 
 
-static void rpa_matchchr_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpa_neqmatchchr(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
-
+       if (!(cpu->status & RVM_STATUS_Z))
+               rpa_matchchr(cpu, ins);
 }
 
 
 static void rpa_matchrng(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
-       rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
-
-       res = op2;
-       RVM_CPUREG_SETU(cpu, R0, res);
-}
-
-
-static void rpa_matchrng_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
-{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rlong tp = RVM_CPUREG_GETL(cpu, TP);
+       rpair op1 = RVM_CPUREG_GETPAIR(cpu, ins->op1);
 
+       RVM_STATUS_UPDATE(cpu, RVM_STATUS_Z, (!(cpu->status & RVM_STATUS_V) && stat->instack[tp].wc >= op1.p1 && stat->instack[tp].wc <= op1.p2) ? 1 : 0);
 }
 
 
-static void rpa_matchrng_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpa_eqmatchrng(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
-
+       if (cpu->status & RVM_STATUS_Z)
+               rpa_matchrng(cpu, ins);
 }
 
 
-static void rpa_matchrng_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
+static void rpa_neqmatchrng(rvmcpu_t *cpu, rvm_asmins_t *ins)
 {
-
-
+       if (!(cpu->status & RVM_STATUS_Z))
+               rpa_matchrng(cpu, ins);
 }
 
 
@@ -148,7 +174,7 @@ static void rpa_shift(rvmcpu_t *cpu, rvm_asmins_t *ins)
        rlong tp = RVM_CPUREG_GETL(cpu, TP);
 
        if (stat->ip.input >= stat->end) {
-               RVM_STATUS_UPDATE(cpu, RVM_STATUS_E, 1);
+               RVM_STATUS_UPDATE(cpu, RVM_STATUS_V, 1);
                return;
        }
 
@@ -162,30 +188,72 @@ static void rpa_shift(rvmcpu_t *cpu, rvm_asmins_t *ins)
        }
        RVM_CPUREG_SETL(cpu, IP, stat->instack[tp].wc);
        RVM_CPUREG_SETL(cpu, TP, tp);
+       RVM_STATUS_UPDATE(cpu, RVM_STATUS_V, 0);
+}
+
+
+static void rpa_eqshift(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       if (cpu->status & RVM_STATUS_Z)
+               rpa_shift(cpu, ins);
+}
+
+
+static void rpa_neqshift(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       if (!(cpu->status & RVM_STATUS_Z))
+               rpa_shift(cpu, ins);
 }
 
 
 static rvm_switable_t switable[] = {
                {"RPA_MATCHCHR", rpa_matchchr},
-               {"RPA_MATCHCHR_OPT", rpa_matchchr_opt},
-               {"RPA_MATCHCHR_MUL", rpa_matchchr_mul},
-               {"RPA_MATCHCHR_MOP", rpa_matchchr_mop},
-               {"RPA_MATCHRNG", rpa_matchrng},
-               {"RPA_MATCHRNG_OPT", rpa_matchrng_opt},
-               {"RPA_MATCHRNG_MUL", rpa_matchrng_mul},
-               {"RPA_MATCHRNG_MOP", rpa_matchrng_mop},
+               {"RPA_EQMATCHCHR", rpa_eqmatchchr},
+               {"RPA_NEQMATCHCHR", rpa_neqmatchchr},
+               {"RPA_MATCHCHR", rpa_matchrng},
+               {"RPA_EQMATCHCHR", rpa_eqmatchrng},
+               {"RPA_NEQMATCHCHR", rpa_neqmatchrng},
                {"RPA_SHIFT", rpa_shift},
+               {"RPA_EQSHIFT", rpa_eqshift},
+               {"RPA_NEQSHIFT", rpa_neqshift},
                {NULL, NULL},
 };
 
 
+void codegen_rpa_match(rpa_compiler_t *co)
+{
+       rulong off, l1, l2;
+
+       rvm_scope_addoffset_s(co->scope, "rpa_match", co->fpoff);
+       l1 = rvm_codegen_getcodesize(co->cg);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_ADDRS, R1, FP, DA, co->fpoff++));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, l1 + 5));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_SETTYPE, R0, DA, XX, RVM_DTYPE_FUNCTION));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_STRR, R0, R1, XX, 0));
+       l2 = rvm_codegen_getcodesize(co->cg);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_B, DA, XX, XX, 0));                                                      /* Will be re-written later */
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(FP)|BIT(SP)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_CALL, R0, DA, XX, -rvm_codegen_getcodesize(co->cg)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, SP, FP, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BITS(FP,LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_EQSHIFT, XX, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+       off = rvm_codegen_getcodesize(co->cg);
+       rvm_codegen_replaceins(co->cg, l2, rvm_asm(RVM_B, DA, XX, XX, off - l2));
+
+
+}
+
+
 int main(int argc, char *argv[])
 {
        rvmcpu_t *cpu;
        rvm_asmins_t code[1024];
+       rpa_compiler_t *co;
        ruint off = 0;
        rint i;
 
+       co = rpa_compiler_create();
        cpu = rvm_cpu_create_default();
        cpu->userdata1 = rpa_stat_create();
        regextable = rvm_cpu_addswitable(cpu, switable);
@@ -194,6 +262,8 @@ int main(int argc, char *argv[])
                if (r_strcmp(argv[i], "-L") == 0) {
                } else if (r_strcmp(argv[i], "-d") == 0) {
                        debuginfo = 1;
+               } else if (r_strcmp(argv[i], "-c") == 0) {
+                       compileonly = 1;
                } else if (r_strcmp(argv[i], "-p") == 0) {
                        parseinfo = 1;
                }
@@ -209,14 +279,46 @@ int main(int argc, char *argv[])
        }
 
 
-       code[off++] = rvm_asml(RVM_MOV, TP, DA, XX, -1);
-       code[off++] = rvm_asm(RPA_SHIFT, XX, XX, XX, 0);
-       code[off++] = rvm_asm(RPA_SHIFT, XX, XX, XX, 0);
-       code[off++] = rvm_asm(RPA_SHIFT, XX, XX, XX, 0);
-       code[off++] = rvm_asm(RPA_SHIFT, XX, XX, XX, 0);
-       code[off++] = rvm_asm(RPA_SHIFT, XX, XX, XX, 0);
+       codegen_rpa_match(co);
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, TP, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, FP, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, SP, DA, XX, co->fpoff));
+
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR, DA, XX, XX, 'a'));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_EQSHIFT, XX, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 'b'));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_LDS, R1, FP, DA, rvm_scope_lookup_s(co->scope, "rpa_match")->data.offset));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BL, R1, DA, XX, -rvm_codegen_getcodesize(co->cg)));
+
+//     rvm_codegen_addins(co->cg, rvm_asm(RPA_EQMATCHCHR, DA, XX, XX, 'b'));
+//     rvm_codegen_addins(co->cg, rvm_asm(RPA_EQSHIFT, XX, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_EQMATCHCHR, DA, XX, XX, 'c'));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_EQSHIFT, XX, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_EQMATCHCHR, DA, XX, XX, 'd'));
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_EQSHIFT, XX, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
+
+
+       rvm_cpu_exec_debug(cpu, rvm_codegen_getcode(co->cg, 0), 0);
+
+       if (debuginfo) {
+               fprintf(stdout, "\nGenerated Code:\n");
+               rvm_asm_dump(rvm_codegen_getcode(co->cg, 0), rvm_codegen_getcodesize(co->cg));
+               if (rvm_codegen_getcodesize(co->cg)) {
+                       if (!compileonly) {
+                               fprintf(stdout, "\nExecution:\n");
+                               rvm_cpu_exec_debug(cpu, rvm_codegen_getcode(co->cg, 0), 0);
+                       }
+               }
+       } else {
+               if (!compileonly)
+                       rvm_cpu_exec(cpu, rvm_codegen_getcode(co->cg, 0), 0);
+       }
+
 
-       rvm_cpu_exec_debug(cpu, code, 0);
        rpa_stat_destroy((rpastat_t *)cpu->userdata1);
        rvm_cpu_destroy(cpu);