9 #include "rvmcodegen.h"
16 static ruint regextable;
17 static int debuginfo = 0;
18 static int parseinfo = 0;
19 static int compileonly = 0;
21 #define RPA_RECORD_NONE (0)
22 #define RPA_RECORD_START (1 << 0)
23 #define RPA_RECORD_END (1 << 1)
24 #define RPA_RECORD_MATCH (1 << 2)
26 #define RPA_MATCH_NONE 0
27 #define RPA_MATCH_MULTIPLE (1 << 0)
28 #define RPA_MATCH_OPTIONAL (1 << 1)
29 #define RPA_MATCH_MULTIOPT (RPA_MATCH_MULTIPLE | RPA_MATCH_OPTIONAL)
37 #define R_MNODE_NAN R4
38 #define R_MNODE_MUL R5
39 #define R_MNODE_OPT R6
40 #define R_MNODE_MOP R7
43 #define RPA_MATCHCHR_NAN RVM_OPSWI(RVM_SWI_ID(regextable, 0))
44 #define RPA_MATCHCHR_OPT RVM_OPSWI(RVM_SWI_ID(regextable, 1))
45 #define RPA_MATCHCHR_MUL RVM_OPSWI(RVM_SWI_ID(regextable, 2))
46 #define RPA_MATCHCHR_MOP RVM_OPSWI(RVM_SWI_ID(regextable, 3))
47 #define RPA_SHIFT RVM_OPSWI(RVM_SWI_ID(regextable, 4))
48 #define RPA_EQSHIFT RVM_OPSWI(RVM_SWI_ID(regextable, 5))
49 #define RPA_NEQSHIFT RVM_OPSWI(RVM_SWI_ID(regextable, 6))
50 #define RPA_EMITSTART RVM_OPSWI(RVM_SWI_ID(regextable, 7))
51 #define RPA_EMITEND RVM_OPSWI(RVM_SWI_ID(regextable, 8))
52 #define RPA_MATCHANY_NAN RVM_OPSWI(RVM_SWI_ID(regextable, 9))
53 #define RPA_MATCHEOL_NAN RVM_OPSWI(RVM_SWI_ID(regextable, 10))
54 #define RPA_BXLWHT RVM_OPSWI(RVM_SWI_ID(regextable, 11))
57 typedef struct rpa_compiler_s {
65 typedef struct rparecord_s {
75 typedef struct rpainput_s {
82 typedef struct rpainmap_s {
88 typedef struct rpastat_s {
101 rpa_compiler_t *rpa_compiler_create()
105 co = r_malloc(sizeof(*co));
106 r_memset(co, 0, sizeof(*co));
107 co->cg = rvm_codegen_create();
108 co->scope = rvm_scope_create();
113 void rpa_compiler_destroy(rpa_compiler_t *co)
116 rvm_codegen_destroy(co->cg);
117 rvm_scope_destroy(co->scope);
122 rpastat_t *rpa_stat_create()
124 rpastat_t *stat = (rpastat_t *) r_zmalloc(sizeof(*stat));
125 stat->records = r_array_create(sizeof(rparecord_t));
130 int rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end)
138 if (input < start || input > end) {
148 if (stat->instacksize < size) {
149 stat->instack = r_realloc(stat->instack, (size + 1) * sizeof(rpainput_t));
150 stat->instacksize = size + 1;
152 stat->ip.input = input;
154 r_array_setlength(stat->records, 0);
159 void rpa_stat_destroy(rpastat_t *stat)
162 r_free(stat->instack);
163 r_object_destroy((robject_t*)stat->records);
168 static void rpa_shift(rvmcpu_t *cpu, rvm_asmins_t *ins)
170 rpastat_t *stat = (rpastat_t *)cpu->userdata1;
171 rlong tp = RVM_CPUREG_GETL(cpu, R_TOP);
172 rpainput_t * ptp = &stat->instack[tp];
178 if (tp >= (rlong)stat->ip.serial) {
180 ptp->input = stat->ip.input;
181 if (ptp->input < stat->end) {
182 inc = r_utf8_mbtowc(&ptp->wc, (const ruchar*)stat->ip.input, (const ruchar*)stat->end);
183 stat->ip.input += inc;
184 stat->ip.serial += 1;
187 ptp->wc = (ruint32)-1;
191 RVM_CPUREG_SETL(cpu, R_TOP, tp);
195 static void rpa_eqshift(rvmcpu_t *cpu, rvm_asmins_t *ins)
197 if (cpu->status & RVM_STATUS_Z)
202 static void rpa_neqshift(rvmcpu_t *cpu, rvm_asmins_t *ins)
204 if (!(cpu->status & RVM_STATUS_Z))
209 static void rpa_matchchr_do(rvmcpu_t *cpu, rvm_asmins_t *ins, rword flags)
211 rpastat_t *stat = (rpastat_t *)cpu->userdata1;
212 rword wc = RVM_CPUREG_GETU(cpu, ins->op1);
215 if (flags == RPA_MATCH_OPTIONAL) {
216 if (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
220 cpu->status = matched ? 0 : RVM_STATUS_Z;
221 RVM_CPUREG_SETU(cpu, R0, matched);
222 } else if (flags == RPA_MATCH_MULTIPLE) {
223 while (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
227 cpu->status = matched ? 0 : RVM_STATUS_N;
228 RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
229 } else if (flags == RPA_MATCH_MULTIOPT) {
230 while (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
234 cpu->status = matched ? 0 : RVM_STATUS_Z;
235 RVM_CPUREG_SETU(cpu, R0, matched );
237 if (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) {
241 cpu->status = matched ? 0 : RVM_STATUS_N;
242 RVM_CPUREG_SETU(cpu, R0, matched ? matched : (rword)-1);
248 static void rpa_matchany_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
250 rpastat_t *stat = (rpastat_t *)cpu->userdata1;
252 RVM_STATUS_UPDATE(cpu, RVM_STATUS_N, (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof) ? 0 : 1);
253 if (!(cpu->status & RVM_STATUS_N))
258 static void rpa_matcheol_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
260 rpastat_t *stat = (rpastat_t *)cpu->userdata1;
262 RVM_STATUS_UPDATE(cpu, RVM_STATUS_N, (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && r_strchr("\r\n", stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc)) ? 0 : 1);
263 if (!(cpu->status & RVM_STATUS_N))
268 static void rpa_matchchr_nan(rvmcpu_t *cpu, rvm_asmins_t *ins)
270 // rpastat_t *stat = (rpastat_t *)cpu->userdata1;
271 // rword wc = RVM_CPUREG_GETU(cpu, ins->op1);
273 // RVM_STATUS_UPDATE(cpu, RVM_STATUS_N, (!stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].eof && stat->instack[RVM_CPUREG_GETL(cpu, R_TOP)].wc == wc) ? 0 : 1);
274 // if (!(cpu->status & RVM_STATUS_N))
275 // rpa_shift(cpu, ins);
277 rpa_matchchr_do(cpu, ins, RPA_MATCH_NONE);
281 static void rpa_matchchr_opt(rvmcpu_t *cpu, rvm_asmins_t *ins)
283 rpa_matchchr_do(cpu, ins, RPA_MATCH_OPTIONAL);
287 static void rpa_matchchr_mul(rvmcpu_t *cpu, rvm_asmins_t *ins)
289 rpa_matchchr_do(cpu, ins, RPA_MATCH_MULTIPLE);
293 static void rpa_matchchr_mop(rvmcpu_t *cpu, rvm_asmins_t *ins)
295 rpa_matchchr_do(cpu, ins, RPA_MATCH_MULTIOPT);
299 static void rpa_emitstart(rvmcpu_t *cpu, rvm_asmins_t *ins)
301 rpastat_t *stat = (rpastat_t *)cpu->userdata1;
304 rword tp = RVM_CPUREG_GETU(cpu, ins->op2);
305 rstr_t name = {RVM_CPUREG_GETSTR(cpu, ins->op1), RVM_CPUREG_GETSIZE(cpu, ins->op1)};
307 index = r_array_add(stat->records, NULL);
308 rec = (rparecord_t *)r_array_slot(stat->records, index);
309 rec->rule = name.str;
311 rec->type = RPA_RECORD_START;
312 // r_printf("START: %s(%ld)\n", name.str, (rulong)tp);
316 static void rpa_emitend(rvmcpu_t *cpu, rvm_asmins_t *ins)
318 rpastat_t *stat = (rpastat_t *)cpu->userdata1;
321 rword tp = RVM_CPUREG_GETU(cpu, ins->op2);
322 rword tplen = RVM_CPUREG_GETU(cpu, ins->op3);
323 rstr_t name = {RVM_CPUREG_GETSTR(cpu, ins->op1), RVM_CPUREG_GETSIZE(cpu, ins->op1)};
325 index = r_array_add(stat->records, NULL);
326 rec = (rparecord_t *)r_array_slot(stat->records, index);
327 rec->rule = name.str;
330 rec->type = RPA_RECORD_START;
333 rec->type = RPA_RECORD_END | RPA_RECORD_MATCH;
334 // r_printf("MATCHED: %s(%ld, %ld): %p(%d)\n", name.str, (rulong)tp, (rulong)tplen, name.str, name.size);
336 rec->type = RPA_RECORD_END;
337 // r_printf("MATCHED: %s(%ld, %ld)\n", name.str, (rulong)tp, (rulong)tplen);
342 static void rpa_bxlwht(rvmcpu_t *cpu, rvm_asmins_t *ins)
344 rword wht = RVM_CPUREG_GETU(cpu, ins->op2);
346 RVM_CPUREG_SETU(cpu, R_WHT, wht);
347 RVM_CPUREG_SETIP(cpu, LR, RVM_CPUREG_GETIP(cpu, PC));
348 RVM_CPUREG_SETIP(cpu, PC, RVM_CPUREG_GETIP(cpu, ins->op1));
352 static rvm_switable_t switable[] = {
353 {"RPA_MATCHCHR_NAN", rpa_matchchr_nan},
354 {"RPA_MATCHCHR_OPT", rpa_matchchr_opt},
355 {"RPA_MATCHCHR_MUL", rpa_matchchr_mul},
356 {"RPA_MATCHCHR_MOP", rpa_matchchr_mop},
357 {"RPA_SHIFT", rpa_shift},
358 {"RPA_EQSHIFT", rpa_eqshift},
359 {"RPA_NEQSHIFT", rpa_neqshift},
360 {"RPA_EMITSTART", rpa_emitstart},
361 {"RPA_EMITEND", rpa_emitend},
362 {"RPA_MATCHANY_NAN", rpa_matchany_nan},
363 {"RPA_MATCHEOL_NAN", rpa_matcheol_nan},
364 {"RPA_BXLWHT", rpa_bxlwht},
369 void codegen_rpa_match(rpa_compiler_t *co)
371 rvm_codegen_addlabel_s(co->cg, "rpa_match");
372 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BITS(R_TOP,LR)));
373 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, FP, SP, XX, 0));
374 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
375 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, SP, FP, XX, 0));
376 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BITS(R_TOP,LR)));
377 rvm_codegen_addins(co->cg, rvm_asm(RVM_CMP, R0, DA, XX, 0));
378 rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 2));
379 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R_TOP, R_TOP, R0, 0));
380 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
384 void codegen_rpa_match_mul(rpa_compiler_t *co)
386 rvm_codegen_addlabel_s(co->cg, "rpa_match_mul");
387 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R1, DA, XX, 0));
388 rvm_codegen_addlabel_s(co->cg, "rpa_match_mul_again");
389 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R1, XX, XX, 0)); // Ret
390 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BITS(R_TOP,LR)));
391 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, FP, SP, XX, 0));
392 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
393 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, SP, FP, XX, 0));
394 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BITS(R_TOP,LR)));
395 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0)); // Ret
396 rvm_codegen_addins(co->cg, rvm_asm(RVM_CMP, R0, DA, XX, 0));
397 rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 4));
398 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R_TOP, R_TOP, R0, 0));
399 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R1, R1, R0, 0));
400 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, "rpa_match_mul_again", rvm_asm(RVM_B, DA, XX, XX, 0));
401 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, R1, XX, 0));
402 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
406 void codegen_rpa_match_char(rpa_compiler_t *co, rword wc, rchar q)
409 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_OPT, DA, XX, XX, wc));
410 } else if (q == '+') {
411 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MUL, DA, XX, XX, wc));
412 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXNEQ, LR, XX, XX, 0));
413 } else if (q == '*') {
414 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, wc));
416 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, wc));
417 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXNEQ, LR, XX, XX, 0));
422 void codegen_rpa_match_mnode(rpa_compiler_t *co)
424 rvm_codegen_addlabel_s(co->cg, "rpa_match_mnode");
425 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BITS(R_TOP,LR)));
426 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, FP, SP, XX, 0));
427 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
428 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
429 rvm_codegen_addins(co->cg, rvm_asm(RVM_BLES, DA, XX, XX, 3));
430 rvm_codegen_addins(co->cg, rvm_asm(RVM_TST, R0, R_FLG, DA, RPA_MATCH_MULTIPLE));
431 rvm_codegen_addins(co->cg, rvm_asm(RVM_BNEQ, DA, XX, XX, -4));
432 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R1, R_TOP, XX, 0));
433 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, SP, FP, XX, 0));
434 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BITS(R_TOP,LR)));
435 rvm_codegen_addins(co->cg, rvm_asm(RVM_SWP, R1, R_TOP, XX, 0));
436 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
437 rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 2));
438 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
439 rvm_codegen_addins(co->cg, rvm_asm(RVM_TST, R_FLG, DA, XX, RPA_MATCH_OPTIONAL));
440 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXNEQ, LR, XX, XX, 0));
441 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADDS, R0, R0, DA, -1));
442 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
446 void codegen_rpa_mnode_nan(rpa_compiler_t *co)
448 rvm_codegen_addlabel_s(co->cg, "rpa_mnode_nan");
449 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, R_WHT, XX, XX, 0));
453 void codegen_rpa_mnode_opt(rpa_compiler_t *co)
455 rvm_codegen_addlabel_s(co->cg, "rpa_mnode_opt");
456 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
457 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
458 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, LR, XX, XX, 0));
459 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
460 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXGRE, LR, XX, XX, 0));
461 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 0));
462 rvm_codegen_addins(co->cg, rvm_asm(RVM_CMP, R0, R0, XX, 0));
463 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
467 void codegen_rpa_mnode_mul(rpa_compiler_t *co)
469 rvm_codegen_addlabel_s(co->cg, "rpa_mnode_mul");
470 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
471 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
472 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
473 rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 2));
474 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
475 rvm_codegen_addins(co->cg, rvm_asm(RVM_CLR, R1, XX, XX, 0));
476 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R0, R0, R1, 0));
477 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R0, XX, XX, 0));
478 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
479 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0));
480 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
481 rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, -5));
482 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADDS, R0, R1, DA, 0));
483 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
487 void codegen_rpa_mnode_mop(rpa_compiler_t *co)
490 const rchar *rule = "rpa_mnode_mop";
492 ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
494 rvm_codegen_addlabel_s(co->cg, "rpa_mnode_mop");
495 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, LR, XX, XX, 0));
496 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
497 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
498 rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 4));
499 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 0));
500 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, R0, XX, 0));
501 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
502 rvm_codegen_addins(co->cg, rvm_asm(RVM_CLR, R1, XX, XX, 0));
503 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADD, R0, R0, R1, 0));
504 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSH, R0, XX, XX, 0));
505 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXL, R_WHT, XX, XX, 0));
506 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, R1, XX, XX, 0));
507 rvm_codegen_addins(co->cg, rvm_asml(RVM_CMP, R0, DA, XX, 0));
508 rvm_codegen_addins(co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, -5));
509 rvm_codegen_addins(co->cg, rvm_asm(RVM_ADDS, R0, R1, DA, 0));
510 rvm_codegen_addins(co->cg, rvm_asm(RVM_POP, PC, XX, XX, 0));
514 void codegen_rpa_match_aorb(rpa_compiler_t *co)
517 const rchar *rule = "rpa_match_aorb";
518 const rchar *ruleend = "rpa_match_aorb_end";
520 ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
521 rvm_codegen_addlabel_s(co->cg, rule);
523 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
524 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
526 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'a'));
527 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
529 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, 'b'));
530 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
532 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
533 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
534 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
535 rvm_codegen_addlabel_s(co->cg, ruleend);
536 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
537 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
538 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
539 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
543 void codegen_rpa_match_xyz(rpa_compiler_t *co)
546 const rchar *rule = "rpa_match_xyz";
547 const rchar *ruleend = "rpa_match_xyz_end";
549 ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
550 rvm_codegen_addlabel_s(co->cg, rule);
552 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
553 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
555 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_squared", rvm_asm(RPA_BXLMOP, DA, XX, XX, 0));
556 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
558 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'x'));
559 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
561 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, 'y'));
562 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
564 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'z'));
565 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
567 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
568 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
569 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
570 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
571 rvm_codegen_addlabel_s(co->cg, ruleend);
572 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
573 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
574 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
578 void codegen_rpa_match_abc(rpa_compiler_t *co)
581 const rchar *rule = "rpa_match_abc";
582 const rchar *ruleend = "rpa_match_abc_end";
584 ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
585 rvm_codegen_addlabel_s(co->cg, rule);
587 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
588 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
590 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'a'));
591 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
593 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_MOP, DA, XX, XX, 'b'));
594 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
596 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, 'c'));
597 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
600 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
601 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
602 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
603 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
604 rvm_codegen_addlabel_s(co->cg, ruleend);
605 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
606 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
607 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
611 void codegen_rpa_match_xyzorabc(rpa_compiler_t *co)
614 const rchar *rule = "rpa_match_xyzorabc";
615 const rchar *ruleend = "rpa_match_xyzorabc_end";
617 ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
618 rvm_codegen_addlabel_s(co->cg, rule);
619 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
620 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
623 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_xyz", rvm_asm(RPA_BXLMOP, DA, XX, XX, 0));
624 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
626 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_abc", rvm_asm(RPA_BXLMOP, DA, XX, XX, 0));
627 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
629 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
630 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
631 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
632 rvm_codegen_addlabel_s(co->cg, ruleend);
633 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
634 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
635 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
636 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
641 void codegen_rpa_match_squared(rpa_compiler_t *co)
644 const rchar *rule = "rpa_match_squared";
645 const rchar *ruleend = "rpa_match_squared_end";
647 ruleidx = rvm_codegen_addstring_s(co->cg, NULL, rule);
648 rvm_codegen_addlabel_s(co->cg, rule);
649 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITSTART, DA, R_TOP, XX, 0));
650 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
652 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, '['));
653 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
655 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_xyzorabc", rvm_asm(RPA_BXLMOP, DA, XX, XX, 0));
656 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
658 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_aorb", rvm_asm(RPA_BXLMOP, DA, XX, XX, 0));
659 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
661 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, ']'));
662 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
664 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_matcheol_char", rvm_asm(RVM_MOV, R_WHT, DA, XX, 0));
665 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, "rpa_mnode_mop", rvm_asm(RVM_BL, DA, XX, XX, 0));
666 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, ruleend, rvm_asm(RVM_BLES, DA, XX, XX, 0));
668 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R1)|BIT(R_WHT)|BIT(LR)));
669 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
670 rvm_codegen_index_addrelocins(co->cg, RVM_RELOC_STRING, ruleidx, rvm_asm(RPA_EMITEND, DA, R1, R0, 0));
671 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
672 rvm_codegen_addlabel_s(co->cg, ruleend);
673 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BIT(R_TOP)|BIT(R_WHT)|BIT(LR)));
674 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
675 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
686 * R0 = 0 Didn't match, but it was optional
687 * R0 > 0 matched R0 TPs
690 void rpa_match_char(rpa_compiler_t *co)
692 rvm_codegen_addlabel_s(co->cg, "rpa_match_char");
693 rvm_codegen_addins(co->cg, rvm_asm(RVM_PUSHM, DA, XX, XX, BITS(R_TOP,LR)));
694 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, FP, SP, XX, 0));
695 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, R_ARG, XX, XX, 0));
696 rvm_codegen_addins(co->cg, rvm_asm(RVM_BNEQ, DA, XX, XX, 3));
697 rvm_codegen_addins(co->cg, rvm_asm(RVM_TST, R0, R_FLG, DA, RPA_MATCH_MULTIPLE));
698 rvm_codegen_addins(co->cg, rvm_asm(RVM_BNEQ, DA, XX, XX, -3));
699 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R1, R_TOP, XX, 0));
700 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, SP, FP, XX, 0));
701 rvm_codegen_addins(co->cg, rvm_asm(RVM_POPM, DA, XX, XX, BITS(R_TOP,LR)));
702 rvm_codegen_addins(co->cg, rvm_asm(RVM_SWP, R1, R_TOP, XX, 0));
703 rvm_codegen_addins(co->cg, rvm_asm(RVM_SUBS, R0, R_TOP, R1, 0));
704 rvm_codegen_addins(co->cg, rvm_asm(RVM_BEQ, DA, XX, XX, 2));
705 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
706 rvm_codegen_addins(co->cg, rvm_asm(RVM_TST, R0, R_FLG, DA, RPA_MATCH_OPTIONAL));
707 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXNEQ, LR, XX, XX, 0));
708 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
709 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
713 void rpa_matchonly_char(rpa_compiler_t *co)
715 rvm_codegen_addlabel_s(co->cg, "rpa_matchonly_char");
716 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
717 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHCHR_NAN, R_ARG, XX, XX, 0));
718 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXLES, LR, XX, XX, 0));
719 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 1));
720 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
724 void rpa_matchany_char(rpa_compiler_t *co)
726 rvm_codegen_addlabel_s(co->cg, "rpa_matchany_char");
727 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
728 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHANY_NAN, R_ARG, XX, XX, 0));
729 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXLES, LR, XX, XX, 0));
730 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 1));
731 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
735 void rpa_matcheol_char(rpa_compiler_t *co)
737 rvm_codegen_addlabel_s(co->cg, "rpa_matcheol_char");
738 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, -1));
739 rvm_codegen_addins(co->cg, rvm_asm(RPA_MATCHEOL_NAN, R_ARG, XX, XX, 0));
740 rvm_codegen_addins(co->cg, rvm_asm(RVM_BXLES, LR, XX, XX, 0));
741 rvm_codegen_addins(co->cg, rvm_asm(RVM_MOV, R0, DA, XX, 1));
742 rvm_codegen_addins(co->cg, rvm_asm(RVM_BX, LR, XX, XX, 0));
746 void codegen_unmap_file(rstr_t *buf)
749 munmap(buf->str, buf->size);
755 rstr_t *codegen_map_file(const char *filename)
762 int fd = open(filename, O_RDONLY);
766 if (fstat(fd, &st) < 0) {
770 buffer = (char*)mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
771 if (buffer == (void*)-1) {
775 str = (rstr_t *)r_malloc(sizeof(*str));
778 r_memset(str, 0, sizeof(*str));
780 str->size = st.st_size;
785 munmap(buffer, st.st_size);
791 int main(int argc, char *argv[])
793 rstr_t *script = NULL, *unmapscript = NULL;
795 rvm_codelabel_t *err;
800 co = rpa_compiler_create();
801 cpu = rvm_cpu_create_default();
802 cpu->userdata1 = stat = rpa_stat_create();
803 regextable = rvm_cpu_addswitable(vm, "switable", switable);
805 for (i = 1; i < argc; i++) {
806 if (r_strcmp(argv[i], "-L") == 0) {
807 } else if (r_strcmp(argv[i], "-d") == 0) {
809 } else if (r_strcmp(argv[i], "-c") == 0) {
811 } else if (r_strcmp(argv[i], "-p") == 0) {
816 for (i = 1; i < argc; i++) {
817 if (r_strcmp(argv[i], "-e") == 0) {
819 rstr_t bnfexpr = { argv[i], r_strlen(argv[i]) };
820 rpa_stat_init((rpastat_t *)cpu->userdata1, bnfexpr.str, bnfexpr.str, bnfexpr.str + bnfexpr.size);
825 for (i = 1; i < argc; i++) {
826 if (r_strcmp(argv[i], "-f") == 0) {
828 script = codegen_map_file(argv[i]);
830 rpa_stat_init((rpastat_t *)cpu->userdata1, script->str, script->str, script->str + script->size);
831 unmapscript = script;
841 rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
842 rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, FP, DA, XX, 0));
843 rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, SP, DA, XX, co->fpoff));
845 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_mnode_nan", rvm_asm(RPA_SETBXLNAN, DA, XX, XX, 0));
846 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_mnode_mul", rvm_asm(RPA_SETBXLMUL, DA, XX, XX, 0));
847 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_mnode_opt", rvm_asm(RPA_SETBXLOPT, DA, XX, XX, 0));
848 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_mnode_mop", rvm_asm(RPA_SETBXLMOP, DA, XX, XX, 0));
850 rvm_codegen_addins(co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
853 // rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_FLG, DA, XX, RPA_MATCH_NONE));
854 // rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_squared", rvm_asm(RVM_MOV, R_WHT, DA, XX, 0));
855 // rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, "rpa_mnode_mul", rvm_asm(RVM_BL, DA, XX, XX, 0));
857 rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_squared", rvm_asm(RPA_BXLMUL, DA, XX, XX, 0));
860 // rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_JUMP, "rpa_match_xyz_p", rvm_asm(RVM_MOV, R_WHT, DA, XX, 0));
861 // rvm_codegen_addrelocins_s(co->cg, RVM_RELOC_BRANCH, "rpa_match_mnode", rvm_asm(RVM_BL, DA, XX, XX, 0));
863 rvm_codegen_addins(co->cg, rvm_asm(RVM_NOP, XX, XX, XX, 0xabc));
864 rvm_codegen_addins(co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
867 rpa_matchonly_char(co);
868 codegen_rpa_match_abc(co);
869 codegen_rpa_match_xyz(co);
870 codegen_rpa_match_xyzorabc(co);
871 codegen_rpa_match_aorb(co);
872 codegen_rpa_match_squared(co);
873 codegen_rpa_match_mnode(co);
874 codegen_rpa_match(co);
875 codegen_rpa_match_mul(co);
876 rpa_matcheol_char(co);
877 rpa_matchany_char(co);
879 codegen_rpa_mnode_nan(co);
880 codegen_rpa_mnode_opt(co);
881 codegen_rpa_mnode_mul(co);
882 codegen_rpa_mnode_mop(co);
885 if (rvm_codegen_relocate(co->cg, &err) < 0) {
886 r_printf("Unresolved symbol: %s\n", err->name->str);
891 fprintf(stdout, "\nGenerated Code:\n");
892 rvm_asm_dump(rvm_codegen_getcode(co->cg, 0), rvm_codegen_getcodesize(co->cg));
893 if (rvm_codegen_getcodesize(co->cg)) {
895 fprintf(stdout, "\nExecution:\n");
896 rvm_cpu_exec_debug(cpu, rvm_codegen_getcode(co->cg, 0), 0);
901 rvm_cpu_exec(cpu, rvm_codegen_getcode(co->cg, 0), 0);
904 r_printf("Matched: %d\n", RVM_CPUREG_GETU(cpu, R0));
907 for (i = 0; 0 && i < r_array_length(stat->records); i++) {
908 rparecord_t *rec = (rparecord_t *)r_array_slot(stat->records, i);
909 if (rec->type & RPA_RECORD_MATCH) {
910 r_printf("%d: rule: %s(%d, %d)\n", i, rec->rule, (rint)rec->top, (rint)rec->size);
914 rpa_stat_destroy((rpastat_t *)cpu->userdata1);
915 rvm_cpu_destroy(cpu);
916 rpa_compiler_destroy(co);
918 codegen_unmap_file(unmapscript);
922 r_printf("Max alloc mem: %ld\n", r_debug_get_maxmem());
923 r_printf("Leaked mem: %ld\n", r_debug_get_allocmem());