RLIB_OBJECTS += $(OUTDIR)/rhash.o
RLIB_OBJECTS += $(OUTDIR)/rstring.o
RLIB_OBJECTS += $(OUTDIR)/rlist.o
+RLIB_OBJECTS += $(OUTDIR)/rutf.o
ifeq ($(OS), linux)
--- /dev/null
+/*
+ * Regular Pattern Analyzer (RPA)
+ * Copyright (c) 2009-2010 Martin Stoilov
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Martin Stoilov <martin@rpasearch.com>
+ */
+
+#include "rutf.h"
+
+
+rint r_utf8_mbtowc(ruint32 *pwc, const ruchar *input, const ruchar *end)
+{
+ rint n;
+ ruchar c;
+
+ if (input >= end) {
+ *pwc = (ruint32)0;
+ return 0;
+ }
+
+ if ((c = input[0]) < 0x80) {
+ *pwc = c;
+ return 1;
+ }
+ n = (rint)(end - input);
+ if (c == 0xC0 || c == 0xC1 || (c >= 0xF5 && c <= 0xFF))
+ goto error;
+ if ((c >> 5) == 6) {
+ if (n < 2 || (input[1] >> 6 != 0x02))
+ goto error;
+ *pwc = ((ruint32) (c & 0x1f) << 6) | (ruint32) (input[1] ^ 0x80);
+ return 2;
+ } else if ((c >> 4) == 0x0E) {
+ if (n < 3 || (input[1] >> 6 != 0x02) || (input[2] >> 6 != 0x02))
+ goto error;
+ *pwc = ((ruint32) (c & 0x0f) << 12) | ((ruint32) (input[1] ^ 0x80) << 6) | (ruint32) (input[2] ^ 0x80);
+ return 3;
+ } else if ((c >> 3) == 0x1E) {
+ if (n < 4 || (input[1] >> 6 != 0x02) || (input[2] >> 6 != 0x02) || (input[3] >> 6 != 0x02))
+ goto error;
+ *pwc = ((ruint32) (c & 0x07) << 18)
+ | ((ruint32) (input[1] ^ 0x80) << 12)
+ | ((ruint32) (input[2] ^ 0x80) << 6)
+ | (ruint32) (input[3] ^ 0x80);
+ return 4;
+ }
+
+error:
+ *pwc = c;
+ return 1;
+}
+
+
+rint r_utf8_wctomb(ruint32 wc, ruchar *output, ruint32 size)
+{
+ ruint32 count;
+ if (wc <= 0x007F)
+ count = 1;
+ else if (wc <= 0x07FF)
+ count = 2;
+ else if (wc <= 0xFFFF)
+ count = 3;
+ else if (wc <= 0x10FFFF)
+ count = 4;
+ else
+ return 0;
+ if (size < count)
+ return 0;
+ switch (count) {
+ case 4:
+ output[3] = 0x80 | (wc & 0x3f);
+ wc = wc >> 6;
+ wc |= 0x10000;
+ case 3:
+ output[2] = 0x80 | (wc & 0x3f);
+ wc = wc >> 6;
+ wc |= 0x800;
+ case 2:
+ output[1] = 0x80 | (wc & 0x3f);
+ wc = wc >> 6;
+ wc |= 0xc0;
+ case 1:
+ output[0] = wc;
+ }
+ return count;
+}
+
+
+rint r_utf16_mbtowc(ruint32 *pwc, const ruchar *s, const ruchar *end)
+{
+ rint n = (rint)(end - s);
+ ruint32 wc1, wc2;
+
+ if (s >= end) {
+ *pwc = (ruint32)0;
+ return 0;
+ }
+
+ if (n < 2)
+ goto error;
+
+ wc1 = s[0] + (s[1] << 8);
+ if (wc1 >= 0xd800 && wc1 < 0xdc00) {
+ if (n < 4)
+ goto error;
+ wc2 = s[2] + (s[3] << 8);
+ if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
+ goto error;
+ *pwc = 0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00);
+ return 4;
+ } else if (wc1 >= 0xdc00 && wc1 < 0xe000) {
+ goto error;
+ } else {
+ *pwc = wc1;
+ return 2;
+ }
+
+error:
+ *pwc = s[0];
+ return 1;
+}
+
+
+rint r_utf16_wctomb(ruint32 wc, ruchar *output, ruint32 size)
+{
+ ruint32 wc1, wc2;
+
+ if (wc <= 0x10FFFF && (wc < 0xD800 || wc >= 0xE000)) {
+ if (wc < 0x10000) {
+ if (size < 2)
+ return 0;
+ output[0] = (ruchar) wc;
+ output[1] = (ruchar) (wc >> 8);
+ return 2;
+ } else if (wc <= 0x10FFFF) {
+ if (size < 4)
+ return 0;
+ wc1 = 0xd800 + ((wc - 0x10000) >> 10);
+ wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
+ output[0] = (ruchar) wc1;
+ output[1] = (ruchar) (wc1 >> 8);
+ output[2] = (ruchar) wc2;
+ output[3] = (ruchar) (wc2 >> 8);
+ return 4;
+ }
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc.
+ * This file is part of the GNU LIBICONV Library.
+ *
+ * The GNU LIBICONV Library is free software; you can redistribute it
+ * and/or modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * The GNU LIBICONV Library is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
+ * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
+ * Fifth Floor, Boston, MA 02110-1301, USA.
+ */
+
+#ifndef _RUTF_H_
+#define _RUTF_H_
+
+#include "rtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+rint r_utf8_mbtowc(ruint32 *pwc, const ruchar *input, const ruchar *end);
+rint r_utf8_wctomb(ruint32 wc, ruchar *output, ruint32 size);
+rint r_utf16_mbtowc(ruint32 *pwc, const ruchar *s, const ruchar *end);
+rint r_utf16_wctomb(ruint32 wc, ruchar *output, ruint32 size);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
}
+rvm_asmins_t rvm_asm2(rword opcode, rword op1, rword op2, rword op3, ruint32 p1, ruint32 p2)
+{
+ rvm_asmins_t a;
+
+ r_memset(&a, 0, sizeof(a));
+ a.opcode = (ruint32) RVM_ASMINS_OPCODE(opcode);
+ a.swi = (ruint32) RVM_ASMINS_SWI(opcode);
+ a.op1 = (ruint8)op1;
+ a.op2 = (ruint8)op2;
+ a.op3 = (ruint8)op3;
+ a.data.p.p1 = p1;
+ a.data.p.p2 = p2;
+ a.type = RVM_DTYPE_PAIR;
+ if ((ruint8)op1 == DA || (ruint8)op2 == DA || (ruint8)op3 == DA)
+ a.da = 1;
+ return a;
+}
+
+
+
rvm_asmins_t rvm_asmr(rword opcode, rword op1, rword op2, rword op3, rpointer pReloc)
{
rvm_asmins_t a;
rcarray_t *data;
struct rvm_opmap_s *opmap;
rvmreg_t *thisptr;
- void *userdata;
rvm_gc_t *gc;
+ void *userdata1;
+ void *userdata2;
+ void *userdata3;
+ void *userdata4;
+ void *userdata5;
+ void *userdata6;
+ void *userdata7;
};
rvm_asmins_t rvm_asmb(rword opcode, rword op1, rword op2, rword op3, rword data);
rvm_asmins_t rvm_asmd(rword opcode, rword op1, rword op2, rword op3, rdouble data);
rvm_asmins_t rvm_asmp(rword opcode, rword op1, rword op2, rword op3, rpointer data);
+rvm_asmins_t rvm_asm2(rword opcode, rword op1, rword op2, rword op3, ruint32 p1, ruint32 p2);
rvm_asmins_t rvm_asmr(rword opcode, rword op1, rword op2, rword op3, rpointer pReloc);
rvm_asmins_t rvm_asmx(rword opcode, rword op1, rword op2, rword op3, rpointer pReloc);
void rvm_asm_dump(rvm_asmins_t *pi, ruint count);
#define RVM_DTYPE_LONG 1
#define RVM_DTYPE_POINTER 2 /* Generic pointer, it can point to any memory object */
#define RVM_DTYPE_BOOLEAN 3
-#define RVM_DTYPE_STRING 4
-#define RVM_DTYPE_ARRAY 5
-#define RVM_DTYPE_HARRAY 6
-#define RVM_DTYPE_DOUBLE 7
-#define RVM_DTYPE_NAN 8
-#define RVM_DTYPE_UNDEF 9
-#define RVM_DTYPE_JSOBJECT 10
-#define RVM_DTYPE_FUNCTION 11
+#define RVM_DTYPE_DOUBLE 4
+#define RVM_DTYPE_PAIR 5
+#define RVM_DTYPE_STRING 6
+#define RVM_DTYPE_ARRAY 7
+#define RVM_DTYPE_HARRAY 8
+#define RVM_DTYPE_NAN 9
+#define RVM_DTYPE_UNDEF 10
+#define RVM_DTYPE_JSOBJECT 11
+#define RVM_DTYPE_FUNCTION 12
#define RVM_DTYPE_SWIID 13 /* SWI ID */
#define RVM_DTYPE_RELOCPTR 14 /* Relocation, using pointers */
#define RVM_DTYPE_RELOCINDEX 15 /* Relocation, using offsets */
LIBS += -lrvm -lrpa -lrpasx -lrast -lrlib -lpthread -lm --static
+TESTS += $(OUTDIR)/regex-test
TESTS += $(OUTDIR)/rast-test
TESTS += $(OUTDIR)/rpagen-test
TESTS += $(OUTDIR)/calc-test
--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include "rvmcpu.h"
+#include "rmem.h"
+
+
+static ruint regextable;
+
+#define RPA_MATCHCHR RVM_OPSWI(RVM_SWI_ID(regextable, 0))
+#define RPA_MATCHCHR_OPT RVM_OPSWI(RVM_SWI_ID(regextable, 1))
+#define RPA_MATCHCHR_MUL RVM_OPSWI(RVM_SWI_ID(regextable, 2))
+#define RPA_MATCHCHR_MOP RVM_OPSWI(RVM_SWI_ID(regextable, 3))
+#define RPA_MATCHRNG RVM_OPSWI(RVM_SWI_ID(regextable, 4))
+#define RPA_MATCHRNG_OPT RVM_OPSWI(RVM_SWI_ID(regextable, 5))
+#define RPA_MATCHRNG_MUL RVM_OPSWI(RVM_SWI_ID(regextable, 6))
+#define RPA_MATCHRNG_MOP RVM_OPSWI(RVM_SWI_ID(regextable, 7))
+
+
+typedef struct rpainput_s {
+ ruint32 wc;
+ const rchar *input;
+} rpainput_t;
+
+
+typedef struct rpastat_s {
+ const rchar *input;
+ const rchar *start;
+ const rchar *end;
+ ruint error;
+ rpainput_t *instack;
+ rulong instacksize;
+} rpastat_t;
+
+
+rpastat_t *rpa_stat_create()
+{
+ rpastat_t *stat = (rpastat_t *) r_zmalloc(sizeof(*stat));
+ return stat;
+}
+
+
+int rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end)
+{
+ rulong size;
+
+ if (start > end) {
+
+ return -1;
+ }
+ if (input < start || input > end) {
+
+ return -1;
+ }
+ size = end - start;
+ stat->start = start;
+ stat->end = end;
+ stat->end = input;
+ stat->error = 0;
+ if (size < stat->instacksize) {
+ stat->instack = r_realloc(stat->instack, size * sizeof(rpainput_t));
+ stat->instacksize = size;
+ }
+ return 0;
+}
+
+
+void rpa_stat_destroy(rpastat_t *stat)
+{
+ if (stat->instack)
+ r_free(stat->instack);
+ r_free(stat);
+}
+
+
+static void rpa_matchchr(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchchr_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchchr_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchchr_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchrng(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchrng_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchrng_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+static void rpa_matchrng_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
+{
+ rword res, op2 = RVM_CPUREG_GETU(cpu, R0), op3 = RVM_CPUREG_GETU(cpu, R1);
+
+ res = op2;
+ RVM_CPUREG_SETU(cpu, R0, res);
+}
+
+
+
+
+static rvm_switable_t switable[] = {
+ {"RPA_MATCHCHR", rpa_matchchr},
+ {"RPA_MATCHCHR_OPT", rpa_matchchr_opt},
+ {"RPA_MATCHCHR_MUL", rpa_matchchr_mul},
+ {"RPA_MATCHCHR_MOP", rpa_matchchr_mop},
+ {"RPA_MATCHRNG", rpa_matchrng},
+ {"RPA_MATCHRNG_OPT", rpa_matchrng_opt},
+ {"RPA_MATCHRNG_MUL", rpa_matchrng_mul},
+ {"RPA_MATCHRNG_MOP", rpa_matchrng_mop},
+ {NULL, NULL},
+};
+
+
+int main(int argc, char *argv[])
+{
+ rvmcpu_t *cpu;
+ rvm_asmins_t code[1024];
+ ruint off = 0;
+
+ cpu = rvm_cpu_create_default();
+ regextable = rvm_cpu_addswitable(cpu, switable);
+
+ code[off++] = rvm_asm(RVM_MOV, R0, DA, XX, 1);
+ code[off++] = rvm_asm(RVM_MOV, R1, DA, XX, 2);
+ code[off++] = rvm_asm(RVM_ADD, R2, R0, R1, 0);
+ code[off++] = rvm_asm(RPA_MATCHCHR, DA, XX, XX, 0);
+ code[off++] = rvm_asm(RPA_MATCHCHR, DA, XX, XX, 0);
+ code[off++] = rvm_asm(RVM_EXT, XX, XX, XX, 0);
+ rvm_cpu_exec_debug(cpu, code, 0);
+ rvm_cpu_destroy(cpu);
+
+
+ fprintf(stdout, "It works!\n");
+ return 0;
+}
static void test_swi_cat(rvmcpu_t *cpu, rvm_asmins_t *ins)
{
- rvm_testctx_t *ctx = (rvm_testctx_t *)cpu->userdata;
+ rvm_testctx_t *ctx = (rvm_testctx_t *)cpu->userdata1;
rvm_opmap_invoke_binary_handler(ctx->opmap, RVM_OPID_CAT, cpu, RVM_CPUREG_PTR(cpu, ins->op1), RVM_CPUREG_PTR(cpu, ins->op2), RVM_CPUREG_PTR(cpu, ins->op3));
}
ctx.opmap = opmap = rvm_opmap_create();
cpu = rvm_cpu_create_default();
- cpu->userdata = &ctx;
+ cpu->userdata1 = &ctx;
cg = rvm_codegen_create();