RPA Toolkit
work on RVM based parser - RPA2
authorMartin Stoilov <martin@rpasearch.com>
Tue, 22 Feb 2011 07:37:19 +0000 (23:37 -0800)
committerMartin Stoilov <martin@rpasearch.com>
Tue, 22 Feb 2011 07:37:19 +0000 (23:37 -0800)
12 files changed:
build/linux/build.mk
rpa2/build/linux/rpa.mk [new file with mode: 0644]
rpa2/build/linux/x86_64/Makefile [new file with mode: 0644]
rpa2/rpacompiler.c [new file with mode: 0644]
rpa2/rpacompiler.h [new file with mode: 0644]
rpa2/rpastat.c [new file with mode: 0644]
rpa2/rpastat.h [new file with mode: 0644]
rpa2/rpavm.c [new file with mode: 0644]
rpa2/rpavm.h [new file with mode: 0644]
rvm/rvmcpu.h
tests/build/linux/robject-tests.mk
tests/rpavm-test.c [new file with mode: 0644]

index af9a7ef..0179794 100644 (file)
@@ -2,6 +2,7 @@ all:
        +make -C $(SRCDIR)/rlib/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rast/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rpa/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rlib/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rast/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rpa/build/$(OS)/$(ARCHDIR) all
+       +make -C $(SRCDIR)/rpa2/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rvm/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/tests/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rgrep/unix all
        +make -C $(SRCDIR)/rvm/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/tests/build/$(OS)/$(ARCHDIR) all
        +make -C $(SRCDIR)/rgrep/unix all
@@ -10,6 +11,7 @@ distclean: clean
        +make -C $(SRCDIR)/rlib/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rast/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rpa/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rlib/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rast/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rpa/build/$(OS)/$(ARCHDIR) distclean
+       +make -C $(SRCDIR)/rpa2/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rvm/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/tests/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rgrep/unix distclean
        +make -C $(SRCDIR)/rvm/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/tests/build/$(OS)/$(ARCHDIR) distclean
        +make -C $(SRCDIR)/rgrep/unix distclean
@@ -18,6 +20,7 @@ clean:
        +make -C $(SRCDIR)/rlib/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rast/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rpa/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rlib/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rast/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rpa/build/$(OS)/$(ARCHDIR) clean
+       +make -C $(SRCDIR)/rpa2/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rvm/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/tests/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rgrep/unix clean
        +make -C $(SRCDIR)/rvm/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/tests/build/$(OS)/$(ARCHDIR) clean
        +make -C $(SRCDIR)/rgrep/unix clean
diff --git a/rpa2/build/linux/rpa.mk b/rpa2/build/linux/rpa.mk
new file mode 100644 (file)
index 0000000..9a016f9
--- /dev/null
@@ -0,0 +1,42 @@
+RPA_SRCDIR = $(SRCDIR)/rpa2
+RPA_LIB = $(OUTDIR)/librpa2.a
+RPA_SO = $(OUTDIR)/librpa2.so.1.0
+
+CFLAGS += -I$(RVM_SRCDIR)/config -I$(SRCDIR)/rlib -I$(SRCDIR)/rvm
+
+RPA_OBJECTS =  \
+       $(OUTDIR)/rpastat.o \
+       $(OUTDIR)/rpavm.o \
+       $(OUTDIR)/rpacompiler.o \
+
+
+ifeq ($(OS), linux)
+all: $(OUTDIR) $(RPA_LIB) $(RPA_SO)
+else
+all: $(OUTDIR) $(RPA_LIB)
+endif
+
+
+$(OUTDIR)/%.o: $(RPA_SRCDIR)/%.c
+       $(CC) $(CFLAGS) -o $(OUTDIR)/$*.o -c $(RPA_SRCDIR)/$*.c
+
+$(RPA_LIB): $(RPA_OBJECTS)
+       $(AR) -cr $@ $^
+
+$(RPA_SO): $(RPA_OBJECTS)
+       $(CC) $(LDFLAGS) -shared -Wl,-soname,librpa2.so -o $@ $^
+
+$(OUTDIR):
+       @mkdir $(OUTDIR)
+
+distclean: clean
+       @rm -f .depend
+       @rm -rf $(OUTDIR)
+
+clean:
+       @rm -f $(RPA_LIB)
+       @rm -f $(RPA_SO)
+       @rm -f $(RPA_OBJECTS)
+       @rm -f *~
+       @rm -f $(SRCDIR)/*~
+
diff --git a/rpa2/build/linux/x86_64/Makefile b/rpa2/build/linux/x86_64/Makefile
new file mode 100644 (file)
index 0000000..c72fee8
--- /dev/null
@@ -0,0 +1,25 @@
+SRCDIR = ../../../..
+ARCHDIR = $(shell basename $(shell pwd))
+OUTDIR = out
+OS = $(shell uname | tr "[:upper:]" "[:lower:]")
+INCLUDE = -I$(SRCDIR)/arch/$(OS)/$(ARCHDIR)
+
+CC = gcc
+AR = ar
+LD = ld
+
+ifeq ($(BLDCFG), release)
+CFLAGS := -fPIC -O3
+else
+CFLAGS := -fPIC -O0 -g -Wall 
+endif
+
+ifeq ($(CCBLD), yes)
+CFLAGS += -fprofile-arcs -ftest-coverage
+endif
+
+CFLAGS += $(MACH) $(INCLUDE)
+LDFLAGS := $(MACH)
+
+
+include ../rpa.mk
diff --git a/rpa2/rpacompiler.c b/rpa2/rpacompiler.c
new file mode 100644 (file)
index 0000000..9a69a67
--- /dev/null
@@ -0,0 +1,91 @@
+#include "rmem.h"
+#include "rpacompiler.h"
+
+
+void rpacompiler_mnode_nan(rpa_compiler_t *co)
+{
+       rvm_codegen_addlabel_s(co->cg, "rpacompiler_mnode_nan");
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, R_WHT, XX, XX, 0));
+}
+
+
+void rpacompiler_mnode_opt(rpa_compiler_t *co)
+{
+       rvm_codegen_addlabel_s(co->cg, "rpacompiler_mnode_opt");
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, LR, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BXGRE, LR, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_CMP, R0, R0, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+}
+
+
+void rpacompiler_mnode_mul(rpa_compiler_t *co)
+{
+       rvm_codegen_addlabel_s(co->cg, "rpacompiler_mnode_mul");
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 2));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_CLR, R1, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R0, R0, R1, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R0, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, -5));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_ADDS, R0, R1, DA, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
+}
+
+
+void rpacompiler_mnode_mop(rpa_compiler_t *co)
+{
+       rvm_codegen_addlabel_s(co->cg, "rpacompiler_mnode_mop");
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 4));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, R0, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_CLR, R1, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R0, R0, R1, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R0, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, -5));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_ADDS, R0, R1, DA, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
+}
+
+
+rpa_compiler_t *rpa_compiler_create()
+{
+       rpa_compiler_t *co;
+
+       co = r_malloc(sizeof(*co));
+       r_memset(co, 0, sizeof(*co));
+       co->cg = rvm_codegen_create();
+       co->scope = rvm_scope_create();
+       rpacompiler_mnode_nan(co);
+       rpacompiler_mnode_opt(co);
+       rpacompiler_mnode_mul(co);
+       rpacompiler_mnode_mop(co);
+       return co;
+}
+
+
+void rpa_compiler_destroy(rpa_compiler_t *co)
+{
+       if (co) {
+               rvm_codegen_destroy(co->cg);
+               rvm_scope_destroy(co->scope);
+       }
+       r_free(co);
+}
diff --git a/rpa2/rpacompiler.h b/rpa2/rpacompiler.h
new file mode 100644 (file)
index 0000000..b6e16cf
--- /dev/null
@@ -0,0 +1,29 @@
+#ifndef _RPACOMPILER_H_
+#define _RPACOMPILER_H_
+
+#include "rvmcodegen.h"
+#include "rvmscope.h"
+#include "rpavm.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef struct rpa_compiler_s {
+       rvm_codegen_t *cg;
+       rboolean optimized;
+       rvm_scope_t *scope;
+       rulong fpoff;
+} rpa_compiler_t;
+
+
+rpa_compiler_t *rpa_compiler_create();
+void rpa_compiler_destroy(rpa_compiler_t *co);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/rpa2/rpastat.c b/rpa2/rpastat.c
new file mode 100644 (file)
index 0000000..193ebc7
--- /dev/null
@@ -0,0 +1,66 @@
+#include "rmem.h"
+#include "rarray.h"
+#include "rpastat.h"
+
+
+rpastat_t *rpa_stat_create(rulong stacksize)
+{
+       rpastat_t *stat = (rpastat_t *) r_zmalloc(sizeof(*stat));
+       stat->cpu = rpavm_cpu_create(stacksize);
+       if (!stat->cpu) {
+               r_free(stat);
+               return NULL;
+       }
+       stat->records = r_array_create(sizeof(rparecord_t));
+       stat->cpu->userdata1 = stat;
+       return stat;
+}
+
+
+void rpa_stat_destroy(rpastat_t *stat)
+{
+       if (stat->instack)
+               r_free(stat->instack);
+       r_object_destroy((robject_t*)stat->records);
+       rpavm_cpu_destroy(stat->cpu);
+       r_free(stat);
+}
+
+
+rint rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end)
+{
+       rulong size;
+
+       if (start > end) {
+
+               return -1;
+       }
+       if (input < start || input > end) {
+
+               return -1;
+       }
+       size = end - start;
+       stat->start = start;
+       stat->end = end;
+       stat->input = input;
+       stat->error = 0;
+       stat->cursize = 0;
+       if (stat->instacksize < size) {
+               stat->instack = r_realloc(stat->instack, (size + 1) * sizeof(rpainput_t));
+               stat->instacksize = size + 1;
+       }
+       stat->ip.input = input;
+       stat->ip.serial = 0;
+       r_array_setlength(stat->records, 0);
+       return 0;
+}
+
+
+rint rpa_stat_parse(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end)
+{
+       if (rpa_stat_parse(stat, input, start, end) < 0)
+               return -1;
+
+
+       return 0;
+}
diff --git a/rpa2/rpastat.h b/rpa2/rpastat.h
new file mode 100644 (file)
index 0000000..510c07c
--- /dev/null
@@ -0,0 +1,38 @@
+#ifndef _RPASTAT_H_
+#define _RPASTAT_H_
+
+#include "rtypes.h"
+#include "rarray.h"
+#include "rvmreg.h"
+#include "rpavm.h"
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+typedef struct rpastat_s {
+       const rchar *input;
+       const rchar *start;
+       const rchar *end;
+       ruint error;
+       rarray_t *records;
+       rpainput_t *instack;
+       rulong instacksize;
+       rulong cursize;
+       rpainmap_t ip;
+       rvmcpu_t *cpu;
+} rpastat_t;
+
+
+rpastat_t *rpa_stat_create(rulong stacksize);
+void rpa_stat_destroy(rpastat_t *stat);
+rint rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end);
+rint rpa_stat_parse(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/rpa2/rpavm.c b/rpa2/rpavm.c
new file mode 100644 (file)
index 0000000..2d17348
--- /dev/null
@@ -0,0 +1,288 @@
+#include "rpavm.h"
+#include "rpastat.h"
+#include "rutf.h"
+
+
+static void rpavm_swi_shift(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rlong tp = RVM_CPUREG_GETL(cpu, R_TOP);
+       rpainput_t * ptp = &stat->instack[tp];
+
+       if (ptp->eof)
+               return;
+       ptp++;
+       tp++;
+       if (tp >= (rlong)stat->ip.serial) {
+               rint inc = 0;
+               ptp->input = stat->ip.input;
+               if (ptp->input < stat->end) {
+                       inc = r_utf8_mbtowc(&ptp->wc, (const ruchar*)stat->ip.input, (const ruchar*)stat->end);
+                       stat->ip.input += inc;
+                       stat->ip.serial += 1;
+                       ptp->eof = 0;
+               } else {
+                       ptp->wc = (ruint32)-1;
+                       ptp->eof = 1;
+               }
+       }
+       RVM_CPUREG_SETL(cpu, R_TOP, tp);
+}
+
+
+static void rpavm_matchchr_do(rvmcpu_t *cpu, rvm_asmins_t *ins, rword flags)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rword wc = RVM_CPUREG_GETU(cpu, ins->op1);
+       rword matched = 0;
+
+       if (flags == RPA_MATCH_OPTIONAL) {
+               if (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched = 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_Z;
+               RVM_CPUREG_SETU(cpu, R0, matched);
+       } else if (flags == RPA_MATCH_MULTIPLE) {
+               while (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched += 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_N;
+               RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
+       } else if (flags == RPA_MATCH_MULTIOPT) {
+               while (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched += 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_Z;
+               RVM_CPUREG_SETU(cpu, R0, matched );
+       } else {
+               if (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched = 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_N;
+               RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
+       }
+}
+
+
+static void rpavm_matchspchr_do(rvmcpu_t *cpu, rvm_asmins_t *ins, rword flags)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rword wc = RVM_CPUREG_GETU(cpu, ins->op1);
+       rword matched = 0;
+
+       switch (wc) {
+               case 't':
+                       wc = '\t';
+                       break;
+               case 'r':
+                       wc = '\r';
+                       break;
+               case 'n':
+                       wc = '\n';
+                       break;
+               case '.':
+                       wc = (rword)-1;
+                       break;
+               default:
+                       break;
+       };
+
+       if (flags == RPA_MATCH_OPTIONAL) {
+               if (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && (wc == (rword)-1 || stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc)) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched = 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_Z;
+               RVM_CPUREG_SETU(cpu, R0, matched);
+       } else if (flags == RPA_MATCH_MULTIPLE) {
+               while (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && (wc == (rword)-1 || stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc)) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched += 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_N;
+               RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
+       } else if (flags == RPA_MATCH_MULTIOPT) {
+               while (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && (wc == (rword)-1 || stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc)) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched += 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_Z;
+               RVM_CPUREG_SETU(cpu, R0, matched );
+       } else {
+               if (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && (wc == (rword)-1 || stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc)) {
+                       rpavm_swi_shift(cpu, ins);
+                       matched = 1;
+               }
+               cpu->status = matched ? 0 : RVM_STATUS_N;
+               RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
+       }
+}
+
+
+static void rpavm_swi_matchchr_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchchr_do(cpu, ins, RPA_MATCH_NONE);
+}
+
+
+static void rpavm_swi_matchchr_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchchr_do(cpu, ins, RPA_MATCH_OPTIONAL);
+}
+
+
+static void rpavm_swi_matchchr_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchchr_do(cpu, ins, RPA_MATCH_MULTIPLE);
+}
+
+
+static void rpavm_swi_matchchr_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchchr_do(cpu, ins, RPA_MATCH_MULTIOPT);
+}
+
+
+
+
+static void rpavm_swi_matchspchr_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchspchr_do(cpu, ins, RPA_MATCH_NONE);
+}
+
+
+static void rpavm_swi_matchspchr_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchspchr_do(cpu, ins, RPA_MATCH_OPTIONAL);
+}
+
+
+static void rpavm_swi_matchspchr_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchspchr_do(cpu, ins, RPA_MATCH_MULTIPLE);
+}
+
+
+static void rpavm_swi_matchspchr_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpavm_matchspchr_do(cpu, ins, RPA_MATCH_MULTIOPT);
+}
+
+
+
+
+static void rpavm_swi_matchany_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+
+       RVM_STATUS_UPDATE(cpu, RVM_STATUS_N, (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof) ? 0 : 1);
+       if (!(cpu->status & RVM_STATUS_N))
+               rpavm_swi_shift(cpu, ins);
+}
+
+
+static void rpavm_swi_matcheol_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+
+       RVM_STATUS_UPDATE(cpu, RVM_STATUS_N, (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && r_strchr("\r\n", stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc)) ? 0 : 1);
+       if (!(cpu->status & RVM_STATUS_N))
+               rpavm_swi_shift(cpu, ins);
+}
+
+
+static void rpavm_swi_emitstart(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rparecord_t *rec;
+       rlong index;
+       rword tp = RVM_CPUREG_GETU(cpu, ins->op2);
+       rstr_t name = {RVM_CPUREG_GETSTR(cpu, ins->op1), RVM_CPUREG_GETSIZE(cpu, ins->op1)};
+
+       index = r_array_add(stat->records, NULL);
+       rec = (rparecord_t *)r_array_slot(stat->records, index);
+       rec->rule = name.str;
+       rec->top = tp;
+       rec->type = RPA_RECORD_START;
+//     r_printf("START: %s(%ld)\n", name.str, (rulong)tp);
+}
+
+
+static void rpavm_swi_emitend(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rpastat_t *stat = (rpastat_t *)cpu->userdata1;
+       rparecord_t *rec;
+       rlong index;
+       rword tp = RVM_CPUREG_GETU(cpu, ins->op2);
+       rword tplen = RVM_CPUREG_GETU(cpu, ins->op3);
+       rstr_t name = {RVM_CPUREG_GETSTR(cpu, ins->op1), RVM_CPUREG_GETSIZE(cpu, ins->op1)};
+
+       index = r_array_add(stat->records, NULL);
+       rec = (rparecord_t *)r_array_slot(stat->records, index);
+       rec->rule = name.str;
+       rec->top = tp;
+       rec->size = tplen;
+       rec->type = RPA_RECORD_START;
+
+       if (tplen) {
+               rec->type = RPA_RECORD_END | RPA_RECORD_MATCH;
+//             r_printf("MATCHED: %s(%ld, %ld): %p(%d)\n", name.str, (rulong)tp, (rulong)tplen, name.str, name.size);
+       } else {
+               rec->type = RPA_RECORD_END;
+//             r_printf("MATCHED: %s(%ld, %ld)\n", name.str, (rulong)tp, (rulong)tplen);
+       }
+}
+
+
+static void rpavm_swi_bxlwht(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+       rword wht = RVM_CPUREG_GETU(cpu, ins->op2);
+
+       RVM_CPUREG_SETU(cpu, R_WHT, wht);
+       RVM_CPUREG_SETIP(cpu, LR, RVM_CPUREG_GETIP(cpu, PC));
+       RVM_CPUREG_SETIP(cpu, PC, RVM_CPUREG_GETIP(cpu, ins->op1));
+}
+
+
+static rvm_switable_t rpavm_swi_table[] = {
+               {"RPA_MATCHCHR_NAN", rpavm_swi_matchchr_nan},
+               {"RPA_MATCHCHR_OPT", rpavm_swi_matchchr_opt},
+               {"RPA_MATCHCHR_MUL", rpavm_swi_matchchr_mul},
+               {"RPA_MATCHCHR_MOP", rpavm_swi_matchchr_mop},
+               {"RPA_MATCHSPCHR_NAN", rpavm_swi_matchspchr_nan},
+               {"RPA_MATCHSPCHR_OPT", rpavm_swi_matchspchr_opt},
+               {"RPA_MATCHSPCHR_MUL", rpavm_swi_matchspchr_mul},
+               {"RPA_MATCHSPCHR_MOP", rpavm_swi_matchspchr_mop},
+               {"RPA_SHIFT", rpavm_swi_shift},
+               {"RPA_EMITSTART", rpavm_swi_emitstart},
+               {"RPA_EMITEND", rpavm_swi_emitend},
+               {"RPA_MATCHANY_NAN", rpavm_swi_matchany_nan},
+               {"RPA_MATCHEOL_NAN", rpavm_swi_matcheol_nan},
+               {"RPA_BXLWHT", rpavm_swi_bxlwht},
+               {NULL, NULL},
+};
+
+
+rvmcpu_t *rpavm_cpu_create(rulong stacksize)
+{
+       rvmcpu_t *cpu = rvm_cpu_create(stacksize);
+       rint tableid = rvm_cpu_addswitable(cpu, rpavm_swi_table);
+
+       if (tableid != RPAVM_SWI_TABLEID) {
+               rpavm_cpu_destroy(cpu);
+               return NULL;
+       }
+       return cpu;
+}
+
+
+void rpavm_cpu_destroy(rvmcpu_t * cpu)
+{
+       rvm_cpu_destroy(cpu);
+}
+
+
diff --git a/rpa2/rpavm.h b/rpa2/rpavm.h
new file mode 100644 (file)
index 0000000..4053af0
--- /dev/null
@@ -0,0 +1,76 @@
+#ifndef _RPAVM_H_
+#define _RPAVM_H_
+
+#include "rvmcpu.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#define RPA_RECORD_NONE (0)
+#define RPA_RECORD_START (1 << 0)
+#define RPA_RECORD_END (1 << 1)
+#define RPA_RECORD_MATCH (1 << 2)
+
+#define RPA_MATCH_NONE 0
+#define RPA_MATCH_MULTIPLE (1 << 0)
+#define RPA_MATCH_OPTIONAL (1 << 1)
+#define RPA_MATCH_MULTIOPT (RPA_MATCH_MULTIPLE | RPA_MATCH_OPTIONAL)
+#define R_MNODE_NAN R4
+#define R_MNODE_MUL R5
+#define R_MNODE_OPT R6
+#define R_MNODE_MOP R7
+#define R_ARG R8
+#define R_WHT FP
+#define R_TOP TP
+#define RPAVM_SWI_TABLEID 0
+
+#define RPA_MATCHCHR_NAN       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 0))
+#define RPA_MATCHCHR_OPT       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 1))
+#define RPA_MATCHCHR_MUL       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 2))
+#define RPA_MATCHCHR_MOP       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 3))
+#define RPA_MATCHSPCHR_NAN     RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 4))
+#define RPA_MATCHSPCHR_OPT     RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 5))
+#define RPA_MATCHSPCHR_MUL     RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 6))
+#define RPA_MATCHSPCHR_MOP     RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 7))
+#define RPA_SHIFT                      RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 8))
+#define RPA_EMITSTART          RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 9))
+#define RPA_EMITEND                    RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 10))
+#define RPA_MATCHANY_NAN       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 11))
+#define RPA_MATCHEOL_NAN       RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 12))
+#define RPA_BXLWHT                     RVM_OPSWI(RVM_SWI_ID(RPAVM_SWI_TABLEID, 13))
+
+
+typedef struct rparecord_s {
+       rlist_t head;
+       rlink_t lnk;
+       const char *rule;
+       rword top;
+       rword size;
+       rword type;
+} rparecord_t;
+
+
+typedef struct rpainput_s {
+       const rchar *input;
+       ruint32 wc;
+       ruchar eof;
+} rpainput_t;
+
+
+typedef struct rpainmap_s {
+       const rchar *input;
+       rulong serial;
+} rpainmap_t;
+
+
+rvmcpu_t *rpavm_cpu_create(rulong stacksize);
+void rpavm_cpu_destroy(rvmcpu_t * vm);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
index c76fc05..350f072 100644 (file)
@@ -337,7 +337,7 @@ rint rvm_cpu_getswi(rvmcpu_t *cpu, const rchar *swiname, rsize_t size);
 rint rvm_cpu_getswi_s(rvmcpu_t *cpu, const rchar *swiname);
 rvmreg_t *rvm_cpu_alloc_global(rvmcpu_t *cpu);
 int rvm_cpu_setreg(rvmcpu_t *cpu, rword regnum, const rvmreg_t *src);
 rint rvm_cpu_getswi_s(rvmcpu_t *cpu, const rchar *swiname);
 rvmreg_t *rvm_cpu_alloc_global(rvmcpu_t *cpu);
 int rvm_cpu_setreg(rvmcpu_t *cpu, rword regnum, const rvmreg_t *src);
-rvmreg_t * rvm_cpu_getreg(rvmcpu_t *cpu, rword regnum);
+rvmreg_t *rvm_cpu_getreg(rvmcpu_t *cpu, rword regnum);
 rvm_asmins_t rvm_asm(rword opcode, rword op1, rword op2, rword op3, rword data);
 rvm_asmins_t rvm_asma(rword opcode, rword op1, rword op2, rword op3, rchar *data, rulong size);
 rvm_asmins_t rvm_asml(rword opcode, rword op1, rword op2, rword op3, rlong data);
 rvm_asmins_t rvm_asm(rword opcode, rword op1, rword op2, rword op3, rword data);
 rvm_asmins_t rvm_asma(rword opcode, rword op1, rword op2, rword op3, rchar *data, rulong size);
 rvm_asmins_t rvm_asml(rword opcode, rword op1, rword op2, rword op3, rlong data);
index ce83023..7cca56a 100644 (file)
@@ -2,17 +2,23 @@ ROBJECT_SRCDIR = $(SRCDIR)/robject
 RLIB_SRCDIR = $(SRCDIR)/rlib
 RVM_SRCDIR = $(SRCDIR)/rvm
 RPA_SRCDIR = $(SRCDIR)/rpa
 RLIB_SRCDIR = $(SRCDIR)/rlib
 RVM_SRCDIR = $(SRCDIR)/rvm
 RPA_SRCDIR = $(SRCDIR)/rpa
+RPA2_SRCDIR = $(SRCDIR)/rpa2
 RAST_SRCDIR = $(SRCDIR)/rast
 TESTS_SRCDIR = $(SRCDIR)/tests
 RAST_SRCDIR = $(SRCDIR)/rast
 TESTS_SRCDIR = $(SRCDIR)/tests
-INCLUDE = -I$(SRCDIR)/arch/$(OS)/$(ARCHDIR) -I$(ROBJECT_SRCDIR) -I$(RLIB_SRCDIR) -I$(RVM_SRCDIR) -I$(RPA_SRCDIR) -I$(RAST_SRCDIR)
+INCLUDE = -I$(SRCDIR)/arch/$(OS)/$(ARCHDIR) -I$(ROBJECT_SRCDIR) -I$(RLIB_SRCDIR) -I$(RVM_SRCDIR) -I$(RAST_SRCDIR)
+INCLUDE_RPA = -I$(RPA_SRCDIR)
+INCLUDE_RPA2 = -I$(RPA2_SRCDIR)
+
 LIBS =  -L$(ROBJECT_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RLIB_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RVM_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RPA_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RAST_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS =  -L$(ROBJECT_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RLIB_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RVM_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RPA_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
 LIBS += -L$(RAST_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
-LIBS += -lrvm -lrpa -lrpasx -lrast -lrlib -lpthread -lm --static
+LIBS += -L$(RPA2_SRCDIR)/build/$(OS)/$(ARCHDIR)/out 
+LIBS += -lrvm -lrast -lrlib -lpthread -lm --static
 
 
 
 
+TESTS  += $(OUTDIR)/rpavm-test
 TESTS  += $(OUTDIR)/regex-test
 TESTS  += $(OUTDIR)/rast-test
 TESTS  += $(OUTDIR)/rpagen-test
 TESTS  += $(OUTDIR)/regex-test
 TESTS  += $(OUTDIR)/rast-test
 TESTS  += $(OUTDIR)/rpagen-test
@@ -55,17 +61,20 @@ TESTS   += $(OUTDIR)/asm-eadd
 all : $(OUTDIR) $(TESTS)
 
 $(OUTDIR)/%.o: $(TESTS_SRCDIR)/%.c
 all : $(OUTDIR) $(TESTS)
 
 $(OUTDIR)/%.o: $(TESTS_SRCDIR)/%.c
-       + $(CC) $(CFLAGS) -c -o $(OUTDIR)/$*.o $(TESTS_SRCDIR)/$*.c $(INCLUDE)
+       + $(CC) $(CFLAGS) -c -o $(OUTDIR)/$*.o $(TESTS_SRCDIR)/$*.c $(INCLUDE) $(INCLUDE_RPA)
 
 $(OUTDIR)/rpagen-test : $(OUTDIR)/ecma262.o $(OUTDIR)/rpagen-test.o
 
 $(OUTDIR)/rpagen-test : $(OUTDIR)/ecma262.o $(OUTDIR)/rpagen-test.o
-       $(CC) $(CFLAGS)  -o $@ $^ $(LIBS)
+       $(CC) $(CFLAGS)  -o $@ $^ -lrpa -lrpasx $(LIBS)
 
 $(OUTDIR)/rast-test : $(OUTDIR)/astecma262.o $(OUTDIR)/rast-test.o
 
 $(OUTDIR)/rast-test : $(OUTDIR)/astecma262.o $(OUTDIR)/rast-test.o
-       $(CC) $(CFLAGS)  -o $@ $^ $(LIBS)
+       $(CC) $(CFLAGS)  -o $@ $^ -lrpa -lrpasx $(LIBS)
+
+$(OUTDIR)/calc-test : $(OUTDIR)/calc-test.o
+       $(CC) $(CFLAGS)  -o $@ $^ -lrpa -lrpasx $(LIBS)
 
 
 $(OUTDIR)/%: $(TESTS_SRCDIR)/%.c
 
 
 $(OUTDIR)/%: $(TESTS_SRCDIR)/%.c
-       + $(CC) $(CFLAGS) -o $(OUTDIR)/$* $(TESTS_SRCDIR)/$*.c $(LIBS) $(INCLUDE)
+       + $(CC) $(CFLAGS) -o $(OUTDIR)/$* $(TESTS_SRCDIR)/$*.c  -lrpa2 $(LIBS) $(INCLUDE) $(INCLUDE_RPA2)
 
 
 $(OUTDIR)/%.o: $(TESTS_SRCDIR)/%.rpa
 
 
 $(OUTDIR)/%.o: $(TESTS_SRCDIR)/%.rpa
diff --git a/tests/rpavm-test.c b/tests/rpavm-test.c
new file mode 100644 (file)
index 0000000..3384309
--- /dev/null
@@ -0,0 +1,342 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "rmem.h"
+#include "rpacompiler.h"
+#include "rpastat.h"
+
+
+static int debuginfo = 0;
+static int parseinfo = 0;
+static int compileonly = 0;
+
+
+
+void codegen_rpa_match_aorb(rpa_compiler_t *co)
+{
+       rulong ruleidx;
+       const rchar *rule = "rpa_match_aorb";
+       const rchar *ruleend = "rpa_match_aorb_end";
+
+       ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
+       rvm_codegen_addlabel_s(co->cg, rule);
+
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'a'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, 'b'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+       rvm_codegen_addlabel_s(co->cg, ruleend);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+}
+
+
+void codegen_rpa_match_xyz(rpa_compiler_t *co)
+{
+       rulong ruleidx;
+       const rchar *rule = "rpa_match_xyz";
+       const rchar *ruleend = "rpa_match_xyz_end";
+
+       ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
+       rvm_codegen_addlabel_s(co->cg, rule);
+
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_squared", rvm_asm(RPA_BXLWHT, R_MNODE_MOP, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'x'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, 'y'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'z'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+       rvm_codegen_addlabel_s(co->cg, ruleend);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+}
+
+
+void codegen_rpa_match_abc(rpa_compiler_t *co)
+{
+       rulong ruleidx;
+       const rchar *rule = "rpa_match_abc";
+       const rchar *ruleend = "rpa_match_abc_end";
+
+       ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
+       rvm_codegen_addlabel_s(co->cg, rule);
+
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'a'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, 'b'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'c'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+       rvm_codegen_addlabel_s(co->cg, ruleend);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+}
+
+
+void codegen_rpa_match_xyzorabc(rpa_compiler_t *co)
+{
+       rulong ruleidx;
+       const rchar *rule = "rpa_match_xyzorabc";
+       const rchar *ruleend = "rpa_match_xyzorabc_end";
+
+       ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
+       rvm_codegen_addlabel_s(co->cg, rule);
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+
+
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_xyz", rvm_asm(RPA_BXLWHT, R_MNODE_MOP, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
+
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_abc", rvm_asm(RPA_BXLWHT, R_MNODE_MOP, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+       rvm_codegen_addlabel_s(co->cg, ruleend);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+}
+
+
+
+void codegen_rpa_match_squared(rpa_compiler_t *co)
+{
+       rulong ruleidx;
+       const rchar *rule = "rpa_match_squared";
+       const rchar *ruleend = "rpa_match_squared_end";
+
+       ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
+       rvm_codegen_addlabel_s(co->cg, rule);
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, '['));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_xyzorabc", rvm_asm(RPA_BXLWHT, R_MNODE_MOP, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_aorb", rvm_asm(RPA_BXLWHT, R_MNODE_MOP, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, ']'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHSPCHR_MOP, DA, XX, XX, 'n'));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
+       rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+       rvm_codegen_addlabel_s(co->cg, ruleend);
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
+}
+
+
+
+
+
+
+void codegen_unmap_file(rstr_t *buf)
+{
+       if (buf) {
+               munmap(buf->str, buf->size);
+               r_free(buf);
+       }
+}
+
+
+rstr_t *codegen_map_file(const char *filename)
+{
+       struct stat st;
+       rstr_t *str;
+       char *buffer;
+
+
+       int fd = open(filename, O_RDONLY);
+       if (fd < 0) {
+               return (void*)0;
+       }
+       if (fstat(fd, &st) < 0) {
+               close(fd);
+               return (void*)0;
+       }
+       buffer = (char*)mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+       if (buffer == (void*)-1) {
+               close(fd);
+               return (void*)0;
+       }
+       str = (rstr_t *)r_malloc(sizeof(*str));
+       if (!str)
+               goto error;
+       r_memset(str, 0, sizeof(*str));
+       str->str = buffer;
+       str->size = st.st_size;
+       close(fd);
+       return str;
+
+error:
+       munmap(buffer, st.st_size);
+       close(fd);
+       return str;
+}
+
+
+int main(int argc, char *argv[])
+{
+       rstr_t *script = NULL, *unmapscript = NULL;
+       rvm_codelabel_t *err;
+       rpa_compiler_t *co;
+       rpastat_t *stat;
+       ruint mainoff;
+       rint i;
+
+       co = rpa_compiler_create();
+       stat = rpa_stat_create(4096);
+
+       for (i = 1; i < argc; i++) {
+               if (r_strcmp(argv[i], "-L") == 0) {
+               } else if (r_strcmp(argv[i], "-d") == 0) {
+                       debuginfo = 1;
+               } else if (r_strcmp(argv[i], "-c") == 0) {
+                       compileonly = 1;
+               } else if (r_strcmp(argv[i], "-p") == 0) {
+                       parseinfo = 1;
+               }
+       }
+
+       for (i = 1; i < argc; i++) {
+               if (r_strcmp(argv[i], "-e") == 0) {
+                       if (++i < argc) {
+                               rstr_t bnfexpr = { argv[i], r_strlen(argv[i]) };
+                               rpa_stat_init(stat, bnfexpr.str, bnfexpr.str, bnfexpr.str + bnfexpr.size);
+                       }
+               }
+       }
+
+       for (i = 1; i < argc; i++) {
+               if (r_strcmp(argv[i], "-f") == 0) {
+                       if (++i < argc) {
+                               script = codegen_map_file(argv[i]);
+                               if (script) {
+                                       rpa_stat_init(stat, script->str, script->str, script->str + script->size);
+                                       unmapscript = script;
+                               }
+                       }
+                       goto exec;
+               }
+       }
+
+
+exec:
+
+       mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, FP, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, SP, DA, XX, 0));
+
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpacompiler_mnode_nan", rvm_asm(RVM_MOV, R_MNODE_NAN, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpacompiler_mnode_mul", rvm_asm(RVM_MOV, R_MNODE_MUL, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpacompiler_mnode_opt", rvm_asm(RVM_MOV, R_MNODE_OPT, DA, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpacompiler_mnode_mop", rvm_asm(RVM_MOV, R_MNODE_MOP, DA, XX, 0));
+
+       rvm_codegen_addins(co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
+       rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_squared", rvm_asm(RPA_BXLWHT, R_MNODE_MUL, DA, XX, 0));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_NOP, XX, XX, XX, 0xabc));
+       rvm_codegen_addins(co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
+
+       codegen_rpa_match_abc(co);
+       codegen_rpa_match_xyz(co);
+       codegen_rpa_match_xyzorabc(co);
+       codegen_rpa_match_aorb(co);
+       codegen_rpa_match_squared(co);
+
+       if (rvm_codegen_relocate(co->cg, &err) < 0) {
+               r_printf("Unresolved symbol: %s\n", err->name->str);
+               goto end;
+       }
+
+       if (debuginfo) {
+               fprintf(stdout, "\nGenerated Code:\n");
+               rvm_asm_dump(rvm_codegen_getcode(co->cg, 0), rvm_codegen_getcodesize(co->cg));
+               if (rvm_codegen_getcodesize(co->cg)) {
+                       if (!compileonly) {
+                               fprintf(stdout, "\nExecution:\n");
+                               rvm_cpu_exec_debug(stat->cpu, rvm_codegen_getcode(co->cg, 0), mainoff);
+                       }
+               }
+       } else {
+               if (!compileonly)
+                       rvm_cpu_exec(stat->cpu, rvm_codegen_getcode(co->cg, 0), mainoff);
+       }
+
+       r_printf("Matched: %d\n", RVM_CPUREG_GETU(stat->cpu, R0));
+end:
+
+       for (i = 0; 0 && i < r_array_length(stat->records); i++) {
+               rparecord_t *rec = (rparecord_t *)r_array_slot(stat->records, i);
+               if (rec->type & RPA_RECORD_MATCH) {
+                       r_printf("%d: rule: %s(%d, %d)\n", i, rec->rule, (rint)rec->top, (rint)rec->size);
+               }
+       }
+
+       rpa_stat_destroy(stat);
+       rpa_compiler_destroy(co);
+       if (unmapscript)
+               codegen_unmap_file(unmapscript);
+
+
+       if (1||debuginfo) {
+               r_printf("Max alloc mem: %ld\n", r_debug_get_maxmem());
+               r_printf("Leaked mem: %ld\n", r_debug_get_allocmem());
+       }
+       return 0;
+}