2 * Regular Pattern Analyzer (RPA)
3 * Copyright (c) 2009-2010 Martin Stoilov
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * Martin Stoilov <martin@rpasearch.com>
26 #include "rlib/rmem.h"
27 #include "rlib/rutf.h"
28 #include "rpa/rpabitmap.h"
29 #include "rpa/rpadbexpriv.h"
30 #include "rpa/rpadbex.h"
31 #include "rpa/rpastatpriv.h"
34 #define RPA_DBEX_SETERRINFO_CODE(__d__, __e__) do { (__d__)->err.code = __e__; (__d__)->err.mask |= RPA_ERRINFO_CODE; } while (0)
35 #define RPA_DBEX_SETERRINFO_OFFSET(__d__, __o__) do { (__d__)->err.offset = __o__; (__d__)->err.mask |= RPA_ERRINFO_OFFSET; } while (0)
36 #define RPA_DBEX_SETERRINFO_LINE(__d__, __l__) do { (__d__)->err.line = __l__; (__d__)->err.mask |= RPA_ERRINFO_LINE; } while (0)
37 #define RPA_DBEX_SETERRINFO_NAME(__d__, __n__, __s__) do { \
38 (__d__)->err.mask |= RPA_ERRINFO_NAME; \
39 r_memset((__d__)->err.name, 0, sizeof((__d__)->err.name)); \
40 r_strncpy((__d__)->err.name, __n__, R_MIN(__s__, (sizeof((__d__)->err.name) - 1))); } while (0)
44 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid);
45 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, long rec);
46 static int rpa_dbex_rulename(rpadbex_t *dbex, long rec, const char **name, rsize_t *namesize);
47 static int rpa_parseinfo_loopdetect(rpadbex_t *dbex, long parent, long loopto);
48 static long rpa_dbex_firstinlined(rpadbex_t *dbex);
49 static int rpa_dbex_findinlined(rpadbex_t *dbex, long startrec);
50 static int rpa_dbex_playchildrecords(rpadbex_t *dbex, long rec);
51 static int rpa_dbex_playreversechildrecords(rpadbex_t *dbex, long rec);
52 static int rpa_dbex_playrecord(rpadbex_t *dbex, long rec);
53 static int rpa_dbex_rh_default(rpadbex_t *dbex, long rec);
56 void rpa_dbex_debug_recordhead(rpadbex_t *dbex, long rec)
59 rarray_t *records = dbex->records;
60 rparecord_t *prec = (rparecord_t *) r_array_slot(records, rec);
61 dbex->headoff = rvm_codegen_getcodesize(dbex->co->cg);
62 if (prec->type & RPA_RECORD_START) {
63 rpa_record_dump(records, rec);
70 void rpa_dbex_debug_recordtail(rpadbex_t *dbex, long rec)
73 rarray_t *records = dbex->records;
74 rparecord_t *prec = (rparecord_t *) r_array_slot(records, rec);
75 rvm_asm_dump(rvm_codegen_getcode(dbex->co->cg, dbex->headoff), rvm_codegen_getcodesize(dbex->co->cg) - dbex->headoff);
76 if (prec->type & RPA_RECORD_END) {
77 rpa_record_dump(records, rec);
83 static int rpa_dbex_rh_default(rpadbex_t *dbex, long rec)
85 rarray_t *records = dbex->records;
88 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
89 prec = rpa_dbex_record(dbex, rec);
91 rpa_dbex_debug_recordhead(dbex, rec);
92 rpa_dbex_debug_recordtail(dbex, rec);
93 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
95 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
96 prec = rpa_dbex_record(dbex, rec);
98 rpa_dbex_debug_recordhead(dbex, rec);
99 rpa_dbex_debug_recordtail(dbex, rec);
105 static int rpa_dbex_playrecord(rpadbex_t *dbex, long rec)
107 rarray_t *records = dbex->records;
108 rparecord_t *prec = (rparecord_t *)r_array_slot(records, rec);
110 if (prec->ruleuid >= 0 && prec->ruleuid < RPA_PRODUCTION_COUNT && dbex->handlers[prec->ruleuid]) {
111 return dbex->handlers[prec->ruleuid](dbex, rec);
113 return rpa_dbex_rh_default(dbex, rec);
117 static int rpa_dbex_playchildrecords(rpadbex_t *dbex, long rec)
120 rarray_t *records = dbex->records;
122 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_START); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_START)) {
123 if (rpa_dbex_playrecord(dbex, child) < 0)
130 static int rpa_dbex_playreversechildrecords(rpadbex_t *dbex, long rec)
133 rarray_t *records = dbex->records;
135 for (child = rpa_recordtree_lastchild(records, rec, RPA_RECORD_START); child >= 0; child = rpa_recordtree_prev(records, child, RPA_RECORD_START)) {
136 if (rpa_dbex_playrecord(dbex, child) < 0)
144 static long rpa_dbex_getmatchchr(unsigned long matchtype)
146 switch (matchtype & RPA_MATCH_MASK) {
149 return RPA_MATCHCHR_NAN;
151 case RPA_MATCH_MULTIPLE:
152 return RPA_MATCHCHR_MUL;
154 case RPA_MATCH_OPTIONAL:
155 return RPA_MATCHCHR_OPT;
157 case RPA_MATCH_MULTIOPT:
158 return RPA_MATCHCHR_MOP;
161 return RPA_MATCHCHR_NAN;
165 static long rpa_dbex_getmatchspecialchr(unsigned long matchtype)
167 switch (matchtype & RPA_MATCH_MASK) {
170 return RPA_MATCHSPCHR_NAN;
172 case RPA_MATCH_MULTIPLE:
173 return RPA_MATCHSPCHR_MUL;
175 case RPA_MATCH_OPTIONAL:
176 return RPA_MATCHSPCHR_OPT;
178 case RPA_MATCH_MULTIOPT:
179 return RPA_MATCHSPCHR_MOP;
182 return RPA_MATCHSPCHR_NAN;
186 int rpa_record2long(rparecord_t *prec, ruint32 *num)
191 if (!prec || !num || prec->inputsiz == 0 || prec->inputsiz >= sizeof(buffer))
193 r_memset(buffer, 0, sizeof(buffer));
194 r_memcpy(buffer, prec->input, prec->inputsiz);
195 if (prec->ruleuid == RPA_PRODUCTION_HEX) {
196 *num = (ruint32)r_strtoul(prec->input, &endptr, 16);
197 } else if (prec->ruleuid == RPA_PRODUCTION_DEC) {
198 *num = (ruint32)r_strtoul(prec->input, &endptr, 10);
206 static int rpa_dbex_rh_uid(rpadbex_t *dbex, long rec)
208 const char *name = NULL;
211 rparecord_t *pnumrec;
212 rarray_t *records = dbex->records;
215 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
216 prec = rpa_dbex_record(dbex, rec);
218 rpa_dbex_debug_recordhead(dbex, rec);
219 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
220 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
223 pnumrec = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
225 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
228 if (rpa_record2long(pnumrec, &uid) < 0) {
229 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
232 rpa_compiler_rulepref_set_ruleuid(dbex->co, name, namesize, uid);
233 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
234 rpa_dbex_debug_recordtail(dbex, rec);
235 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
237 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
238 prec = rpa_dbex_record(dbex, rec);
240 rpa_dbex_debug_recordhead(dbex, rec);
241 rpa_dbex_debug_recordtail(dbex, rec);
246 static int rpa_dbex_rh_abort(rpadbex_t *dbex, long rec)
248 const char *name = NULL;
250 rarray_t *records = dbex->records;
253 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
254 prec = rpa_dbex_record(dbex, rec);
256 rpa_dbex_debug_recordhead(dbex, rec);
257 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
260 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_ABORTONFAIL);
261 rpa_dbex_debug_recordtail(dbex, rec);
262 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
264 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
265 prec = rpa_dbex_record(dbex, rec);
267 rpa_dbex_debug_recordhead(dbex, rec);
268 rpa_dbex_debug_recordtail(dbex, rec);
273 static int rpa_dbex_rh_emit(rpadbex_t *dbex, long rec)
275 const char *name = NULL;
277 rarray_t *records = dbex->records;
280 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
281 prec = rpa_dbex_record(dbex, rec);
283 rpa_dbex_debug_recordhead(dbex, rec);
284 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
287 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
288 rpa_dbex_debug_recordtail(dbex, rec);
289 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
291 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
292 prec = rpa_dbex_record(dbex, rec);
294 rpa_dbex_debug_recordhead(dbex, rec);
295 rpa_dbex_debug_recordtail(dbex, rec);
300 static int rpa_dbex_rh_noemit(rpadbex_t *dbex, long rec)
302 const char *name = NULL;
304 rarray_t *records = dbex->records;
307 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
308 prec = rpa_dbex_record(dbex, rec);
310 rpa_dbex_debug_recordhead(dbex, rec);
311 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
314 rpa_compiler_rulepref_clear_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
315 rpa_dbex_debug_recordtail(dbex, rec);
316 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
318 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
319 prec = rpa_dbex_record(dbex, rec);
321 rpa_dbex_debug_recordhead(dbex, rec);
322 rpa_dbex_debug_recordtail(dbex, rec);
327 static int rpa_dbex_setemit(rpadbex_t *dbex, rboolean emit)
330 rpa_ruleinfo_t *info;
332 for (i = 0; i < r_array_length(dbex->rules->names); i++) {
333 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
334 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
335 if (info->type == RPA_RULEINFO_NAMEDRULE) {
337 rpa_compiler_rulepref_set_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
339 rpa_compiler_rulepref_clear_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
347 static int rpa_dbex_rh_emitall(rpadbex_t *dbex, long rec)
349 rarray_t *records = dbex->records;
352 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
353 prec = rpa_dbex_record(dbex, rec);
355 rpa_dbex_debug_recordhead(dbex, rec);
356 rpa_dbex_setemit(dbex, TRUE);
357 rpa_dbex_debug_recordtail(dbex, rec);
358 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
360 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
361 prec = rpa_dbex_record(dbex, rec);
363 rpa_dbex_debug_recordhead(dbex, rec);
364 rpa_dbex_debug_recordtail(dbex, rec);
369 static int rpa_dbex_rh_emitnone(rpadbex_t *dbex, long rec)
371 rarray_t *records = dbex->records;
374 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
375 prec = rpa_dbex_record(dbex, rec);
377 rpa_dbex_debug_recordhead(dbex, rec);
378 rpa_dbex_setemit(dbex, FALSE);
379 rpa_dbex_debug_recordtail(dbex, rec);
380 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
382 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
383 prec = rpa_dbex_record(dbex, rec);
385 rpa_dbex_debug_recordhead(dbex, rec);
386 rpa_dbex_debug_recordtail(dbex, rec);
391 static int rpa_dbex_rh_namedrule(rpadbex_t *dbex, long rec)
393 const char *name = NULL;
395 rarray_t *records = dbex->records;
396 rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
398 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
399 prec = rpa_dbex_record(dbex, rec);
401 rpa_dbex_debug_recordhead(dbex, rec);
402 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
406 if (!r_array_empty(dbex->inlinestack)) {
407 rpa_compiler_inlinerule_begin(dbex->co, name, namesize, 0);
409 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
410 if (RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec))) {
411 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EXITONBITMAP, DA, XX, XX, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec))));
413 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BL, DA, XX, XX, 3));
414 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
415 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
417 if ((prec->usertype & RPA_LOOP_PATH)) {
418 rpa_compiler_loop_begin(dbex->co, name, namesize);
420 rpa_compiler_rule_begin(dbex->co, name, namesize, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec)));
423 r_array_add(dbex->inlinestack, &rec);
424 rpa_dbex_debug_recordtail(dbex, rec);
425 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
427 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
428 prec = rpa_dbex_record(dbex, rec);
430 rpa_dbex_debug_recordhead(dbex, rec);
431 r_array_removelast(dbex->inlinestack);
432 if (!r_array_empty(dbex->inlinestack)) {
433 rpa_compiler_inlinerule_end(dbex->co);
435 if ((prec->usertype & RPA_LOOP_PATH)) {
436 rpa_compiler_loop_end(dbex->co);
438 rpa_compiler_rule_end(dbex->co);
441 rpa_dbex_debug_recordtail(dbex, rec);
446 static int rpa_dbex_rh_anonymousrule(rpadbex_t *dbex, long rec)
448 rarray_t *records = dbex->records;
449 rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
451 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
452 prec = rpa_dbex_record(dbex, rec);
454 rpa_dbex_debug_recordhead(dbex, rec);
455 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
456 if (RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec))) {
457 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EXITONBITMAP, DA, XX, XX, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec))));
459 rpa_compiler_exp_begin(dbex->co, RPA_MATCH_NONE, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec)));
460 rpa_dbex_debug_recordtail(dbex, rec);
461 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
463 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
464 prec = rpa_dbex_record(dbex, rec);
466 rpa_dbex_debug_recordhead(dbex, rec);
467 rpa_compiler_exp_end(dbex->co);
468 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
469 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
470 rpa_dbex_debug_recordtail(dbex, rec);
476 static int rpa_dbex_rh_char(rpadbex_t *dbex, long rec)
479 rarray_t *records = dbex->records;
482 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
483 prec = rpa_dbex_record(dbex, rec);
485 rpa_dbex_debug_recordhead(dbex, rec);
486 rpa_dbex_debug_recordtail(dbex, rec);
487 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
489 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
490 prec = rpa_dbex_record(dbex, rec);
492 rpa_dbex_debug_recordhead(dbex, rec);
493 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
497 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
498 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
499 rpa_dbex_debug_recordtail(dbex, rec);
504 static int rpa_dbex_rh_specialchar(rpadbex_t *dbex, long rec)
507 rarray_t *records = dbex->records;
510 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
511 prec = rpa_dbex_record(dbex, rec);
513 rpa_dbex_debug_recordhead(dbex, rec);
514 rpa_dbex_debug_recordtail(dbex, rec);
515 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
517 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
518 prec = rpa_dbex_record(dbex, rec);
520 rpa_dbex_debug_recordhead(dbex, rec);
521 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
525 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchspecialchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
526 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
527 rpa_dbex_debug_recordtail(dbex, rec);
532 static int rpa_dbex_rh_cls(rpadbex_t *dbex, long rec)
534 rarray_t *records = dbex->records;
537 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
538 prec = rpa_dbex_record(dbex, rec);
540 rpa_dbex_debug_recordhead(dbex, rec);
541 rpa_compiler_class_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
542 rpa_dbex_debug_recordtail(dbex, rec);
543 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
545 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
546 prec = rpa_dbex_record(dbex, rec);
548 rpa_dbex_debug_recordhead(dbex, rec);
549 rpa_compiler_class_end(dbex->co);
550 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
551 rpa_dbex_debug_recordtail(dbex, rec);
556 static int rpa_dbex_rh_clschar(rpadbex_t *dbex, long rec)
559 rarray_t *records = dbex->records;
562 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
563 prec = rpa_dbex_record(dbex, rec);
565 rpa_dbex_debug_recordhead(dbex, rec);
566 rpa_dbex_debug_recordtail(dbex, rec);
567 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
569 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
570 prec = rpa_dbex_record(dbex, rec);
572 rpa_dbex_debug_recordhead(dbex, rec);
573 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
577 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, wc));
578 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
579 rpa_dbex_debug_recordtail(dbex, rec);
584 static int rpa_dbex_rh_minexp(rpadbex_t *dbex, long rec)
586 rarray_t *records = dbex->records;
589 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
590 prec = rpa_dbex_record(dbex, rec);
592 rpa_dbex_debug_recordhead(dbex, rec);
593 rpa_compiler_exp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec)));
594 rpa_dbex_debug_recordtail(dbex, rec);
595 if (rpa_dbex_playreversechildrecords(dbex, rec) < 0)
597 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
598 prec = rpa_dbex_record(dbex, rec);
600 rpa_dbex_debug_recordhead(dbex, rec);
601 rpa_compiler_exp_end(dbex->co);
602 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
603 rpa_dbex_debug_recordtail(dbex, rec);
608 static int rpa_dbex_rh_exp(rpadbex_t *dbex, long rec)
610 rarray_t *records = dbex->records;
613 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
614 prec = rpa_dbex_record(dbex, rec);
616 rpa_dbex_debug_recordhead(dbex, rec);
617 rpa_compiler_exp_begin(dbex->co, (prec->usertype & RPA_MATCH_MASK), 0);
618 rpa_dbex_debug_recordtail(dbex, rec);
619 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
621 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
622 prec = rpa_dbex_record(dbex, rec);
624 rpa_dbex_debug_recordhead(dbex, rec);
625 rpa_compiler_exp_end(dbex->co);
626 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
627 rpa_dbex_debug_recordtail(dbex, rec);
632 static int rpa_dbex_rh_orop(rpadbex_t *dbex, long rec)
634 rarray_t *records = dbex->records;
637 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
638 prec = rpa_dbex_record(dbex, rec);
640 rpa_dbex_debug_recordhead(dbex, rec);
641 rpa_compiler_altexp_begin(dbex->co, (prec->usertype & RPA_MATCH_MASK), RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec)));
642 rpa_dbex_debug_recordtail(dbex, rec);
643 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
645 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
646 prec = rpa_dbex_record(dbex, rec);
648 rpa_dbex_debug_recordhead(dbex, rec);
649 rpa_compiler_altexp_end(dbex->co);
650 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
651 rpa_dbex_debug_recordtail(dbex, rec);
656 static int rpa_dbex_rh_norop(rpadbex_t *dbex, long rec)
658 rarray_t *records = dbex->records;
661 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
662 prec = rpa_dbex_record(dbex, rec);
664 rpa_dbex_debug_recordhead(dbex, rec);
665 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK, 0);
666 rpa_dbex_debug_recordtail(dbex, rec);
667 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
669 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
670 prec = rpa_dbex_record(dbex, rec);
672 rpa_dbex_debug_recordhead(dbex, rec);
673 rpa_compiler_altexp_end(dbex->co);
674 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
675 rpa_dbex_debug_recordtail(dbex, rec);
680 static int rpa_dbex_rh_notop(rpadbex_t *dbex, long rec)
682 rarray_t *records = dbex->records;
685 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
686 prec = rpa_dbex_record(dbex, rec);
688 rpa_dbex_debug_recordhead(dbex, rec);
689 rpa_compiler_notexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
690 rpa_dbex_debug_recordtail(dbex, rec);
691 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
693 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
694 prec = rpa_dbex_record(dbex, rec);
696 rpa_dbex_debug_recordhead(dbex, rec);
697 rpa_compiler_notexp_end(dbex->co);
698 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
699 rpa_dbex_debug_recordtail(dbex, rec);
704 static int rpa_dbex_rh_range(rpadbex_t *dbex, long rec)
706 rarray_t *records = dbex->records;
709 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
710 prec = rpa_dbex_record(dbex, rec);
712 rpa_dbex_debug_recordhead(dbex, rec);
713 dbex->co->currange.p1 = 0;
714 dbex->co->currange.p2 = 0;
715 rpa_dbex_debug_recordtail(dbex, rec);
716 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
718 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
719 prec = rpa_dbex_record(dbex, rec);
721 rpa_dbex_debug_recordhead(dbex, rec);
722 if (dbex->co->currange.p1 < dbex->co->currange.p2)
723 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
725 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
726 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
727 rpa_dbex_debug_recordtail(dbex, rec);
732 static int rpa_dbex_rh_numrange(rpadbex_t *dbex, long rec)
734 rarray_t *records = dbex->records;
738 * Fix me: probably we don't need to access the children from here. There should be a way just to
739 * play them a regular records!
742 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
743 prec = rpa_dbex_record(dbex, rec);
745 rpa_dbex_debug_recordhead(dbex, rec);
746 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
747 if (rpa_record2long(child, &dbex->co->currange.p1) < 0)
749 child = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
750 if (rpa_record2long(child, &dbex->co->currange.p2) < 0)
752 rpa_dbex_debug_recordtail(dbex, rec);
753 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
755 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
756 prec = rpa_dbex_record(dbex, rec);
758 rpa_dbex_debug_recordhead(dbex, rec);
759 if (dbex->co->currange.p1 < dbex->co->currange.p2)
760 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
762 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
763 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
764 rpa_dbex_debug_recordtail(dbex, rec);
769 static int rpa_dbex_rh_clsnum(rpadbex_t *dbex, long rec)
771 rarray_t *records = dbex->records;
776 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
777 prec = rpa_dbex_record(dbex, rec);
779 rpa_dbex_debug_recordhead(dbex, rec);
780 rpa_dbex_debug_recordtail(dbex, rec);
781 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
783 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
784 prec = rpa_dbex_record(dbex, rec);
786 rpa_dbex_debug_recordhead(dbex, rec);
787 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
788 if (rpa_record2long(child, &wc) < 0)
790 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
791 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
792 rpa_dbex_debug_recordtail(dbex, rec);
797 static int rpa_dbex_rh_beginchar(rpadbex_t *dbex, long rec)
799 rarray_t *records = dbex->records;
803 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
804 prec = rpa_dbex_record(dbex, rec);
806 rpa_dbex_debug_recordhead(dbex, rec);
807 rpa_dbex_debug_recordtail(dbex, rec);
808 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
810 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
811 prec = rpa_dbex_record(dbex, rec);
813 rpa_dbex_debug_recordhead(dbex, rec);
814 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
818 dbex->co->currange.p1 = wc;
819 rpa_dbex_debug_recordtail(dbex, rec);
824 static int rpa_dbex_rh_endchar(rpadbex_t *dbex, long rec)
826 rarray_t *records = dbex->records;
830 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
831 prec = rpa_dbex_record(dbex, rec);
833 rpa_dbex_debug_recordhead(dbex, rec);
834 rpa_dbex_debug_recordtail(dbex, rec);
835 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
837 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
838 prec = rpa_dbex_record(dbex, rec);
840 rpa_dbex_debug_recordhead(dbex, rec);
841 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
845 dbex->co->currange.p2 = wc;
846 rpa_dbex_debug_recordtail(dbex, rec);
851 static int rpa_dbex_rh_branch(rpadbex_t *dbex, long rec)
853 rarray_t *records = dbex->records;
856 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
857 prec = rpa_dbex_record(dbex, rec);
859 rpa_dbex_debug_recordhead(dbex, rec);
860 if (prec->usertype & RPA_NONLOOP_PATH) {
861 rpa_compiler_nonloopybranch_begin(dbex->co, (prec->usertype & RPA_MATCH_MASK));
863 rpa_compiler_branch_begin(dbex->co, (prec->usertype & RPA_MATCH_MASK), RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(prec)));
865 rpa_dbex_debug_recordtail(dbex, rec);
866 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
868 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
869 prec = rpa_dbex_record(dbex, rec);
871 rpa_dbex_debug_recordhead(dbex, rec);
872 if (prec->usertype & RPA_NONLOOP_PATH) {
873 rpa_compiler_nonloopybranch_end(dbex->co);
875 rpa_compiler_branch_end(dbex->co);
877 rpa_dbex_debug_recordtail(dbex, rec);
882 static void rpa_dbex_rh_loopref(rpadbex_t *dbex, rparecord_t *prec)
885 * We ignore, it doesn't make sense for loops:
888 rpa_compiler_exp_begin(dbex->co, (prec->usertype & RPA_MATCH_OPTIONAL), 0);
889 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_CMP, R_LOO, DA, XX, 0));
890 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 3));
891 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
892 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
893 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_ADD, R_TOP, R_TOP, R_LOO, 0));
894 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, R_LOO, XX, 0));
895 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
896 rpa_compiler_exp_end(dbex->co);
900 static int rpa_dbex_rh_aref(rpadbex_t *dbex, long rec)
902 const char *name = NULL;
904 rpa_ruleinfo_t *info;
905 rarray_t *records = dbex->records;
908 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
909 prec = rpa_dbex_record(dbex, rec);
911 rpa_dbex_debug_recordhead(dbex, rec);
912 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
917 if ((prec->usertype & RPA_LOOP_PATH) && rpa_parseinfo_loopdetect(dbex, rec, rpa_dbex_firstinlined(dbex))) {
918 info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
920 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVEDSYMBOL);
921 RPA_DBEX_SETERRINFO_NAME(dbex, name, namesize);
924 if (rpa_dbex_findinlined(dbex, info->startrec)) {
925 rpa_dbex_rh_loopref(dbex, prec);
927 if (prec->usertype & RPA_MATCH_OPTIONAL) {
929 * Most probably this is useless case - loop refs shouldn't have quantitative modifiers
930 * but in case they do we wrap the inlined production rule in quantitative expression.
931 * The inlined named rule can take the quantitative argument, but I just don't have
932 * a clean way to pass it from here - so, lets play the records inside an expression that
933 * has the right quantitative argument.
934 * We ignore, it doesn't make sense for loops:
937 rpa_compiler_exp_begin(dbex->co, RPA_MATCH_OPTIONAL, 0);
938 rpa_dbex_playrecord(dbex, info->startrec);
939 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
940 rpa_compiler_exp_end(dbex->co);
942 rpa_dbex_playrecord(dbex, info->startrec);
946 rpa_compiler_reference(dbex->co, name, namesize, (prec->usertype & RPA_MATCH_MASK));
948 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
949 rpa_dbex_debug_recordtail(dbex, rec);
950 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
952 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
953 prec = rpa_dbex_record(dbex, rec);
955 rpa_dbex_debug_recordhead(dbex, rec);
956 rpa_dbex_debug_recordtail(dbex, rec);
961 rpadbex_t *rpa_dbex_create(void)
963 rpadbex_t *dbex = (rpadbex_t *) r_zmalloc(sizeof(*dbex));
965 dbex->co = rpa_compiler_create();
966 dbex->pa = rpa_parser_create();
967 dbex->text = r_array_create(sizeof(char *));
968 dbex->records = r_array_create(sizeof(rparecord_t));
969 dbex->temprecords = r_array_create(sizeof(rparecord_t));
970 dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
971 dbex->recstack = r_array_create(sizeof(unsigned long));
972 dbex->inlinestack = r_array_create(sizeof(unsigned long));
973 dbex->handlers = r_zmalloc(sizeof(rpa_dbex_recordhandler) * RPA_PRODUCTION_COUNT);
974 rpa_dbex_cfgset(dbex, RPA_DBEXCFG_OPTIMIZATIONS, 1);
975 rpa_dbex_cfgset(dbex, RPA_DBEXCFG_BITMAP, 1);
977 dbex->handlers[RPA_PRODUCTION_NONE] = rpa_dbex_rh_default;
978 dbex->handlers[RPA_PRODUCTION_NAMEDRULE] = rpa_dbex_rh_namedrule;
979 dbex->handlers[RPA_PRODUCTION_ANONYMOUSRULE] = rpa_dbex_rh_anonymousrule;
980 dbex->handlers[RPA_PRODUCTION_CLS] = rpa_dbex_rh_cls;
981 dbex->handlers[RPA_PRODUCTION_CHAR] = rpa_dbex_rh_char;
982 dbex->handlers[RPA_PRODUCTION_SPECIALCHAR] = rpa_dbex_rh_specialchar;
983 dbex->handlers[RPA_PRODUCTION_CLSCHAR] = rpa_dbex_rh_clschar;
984 dbex->handlers[RPA_PRODUCTION_AREF] = rpa_dbex_rh_aref;
985 dbex->handlers[RPA_PRODUCTION_CREF] = rpa_dbex_rh_aref;
986 dbex->handlers[RPA_PRODUCTION_BRACKETEXP] = rpa_dbex_rh_exp;
987 dbex->handlers[RPA_PRODUCTION_OROP] = rpa_dbex_rh_orop;
988 dbex->handlers[RPA_PRODUCTION_NOTOP] = rpa_dbex_rh_notop;
989 dbex->handlers[RPA_PRODUCTION_ALTBRANCH] = rpa_dbex_rh_branch;
990 dbex->handlers[RPA_PRODUCTION_NEGBRANCH] = rpa_dbex_rh_branch;
991 dbex->handlers[RPA_PRODUCTION_CHARRNG] = rpa_dbex_rh_range;
992 dbex->handlers[RPA_PRODUCTION_NUMRNG] = rpa_dbex_rh_numrange;
993 dbex->handlers[RPA_PRODUCTION_CLSNUM] = rpa_dbex_rh_clsnum;
994 dbex->handlers[RPA_PRODUCTION_BEGINCHAR] = rpa_dbex_rh_beginchar;
995 dbex->handlers[RPA_PRODUCTION_ENDCHAR] = rpa_dbex_rh_endchar;
996 dbex->handlers[RPA_PRODUCTION_NOROP] = rpa_dbex_rh_norop;
997 dbex->handlers[RPA_PRODUCTION_REQOP] = rpa_dbex_rh_exp;
998 dbex->handlers[RPA_PRODUCTION_MINOP] = rpa_dbex_rh_minexp;
999 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMIT] = rpa_dbex_rh_emit;
1000 dbex->handlers[RPA_PRODUCTION_DIRECTIVEABORT] = rpa_dbex_rh_abort;
1001 dbex->handlers[RPA_PRODUCTION_DIRECTIVENOEMIT] = rpa_dbex_rh_noemit;
1002 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITALL] = rpa_dbex_rh_emitall;
1003 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITNONE] = rpa_dbex_rh_emitnone;
1004 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITID] = rpa_dbex_rh_uid;
1010 void rpa_dbex_destroy(rpadbex_t *dbex)
1014 for (i = 0; i < r_array_length(dbex->text); i++)
1015 r_free(r_array_index(dbex->text, i, char*));
1016 rpa_compiler_destroy(dbex->co);
1017 rpa_parser_destroy(dbex->pa);
1018 r_harray_destroy(dbex->rules);
1019 r_array_destroy(dbex->records);
1020 r_array_destroy(dbex->temprecords);
1021 r_array_destroy(dbex->recstack);
1022 r_array_destroy(dbex->inlinestack);
1023 r_array_destroy(dbex->text);
1024 r_free(dbex->handlers);
1030 static int rpa_parseinfo_loopdetect_do(rpadbex_t *dbex, long parent, long loopto, int inderction)
1038 if (parent == loopto && inderction > 0)
1040 for (i = 0; i < (long)r_array_length(dbex->recstack); i++) {
1041 if (parent == r_array_index(dbex->recstack, i, long))
1044 r_array_add(dbex->recstack, &parent);
1046 if (!(prec = (rparecord_t *)r_array_slot(dbex->records, parent)))
1048 if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)
1051 i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START);
1052 for (; i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
1053 prec = (rparecord_t *)r_array_slot(dbex->records, i);
1054 if (prec->ruleuid == RPA_PRODUCTION_RULENAME)
1056 if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF) {
1057 rpa_ruleinfo_t *info;
1058 if ((inderction > 0 || i != parent) && i == loopto) {
1060 * We found what we are looking for
1065 if (rpa_dbex_rulename(dbex, i, &name, &namesiz) < 0)
1067 info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesiz));
1070 if ((ret = rpa_parseinfo_loopdetect_do(dbex, info->startrec, loopto, inderction + 1)) > 0)
1073 if ((ret = rpa_parseinfo_loopdetect_do(dbex, i, loopto, inderction + 1)) > 0)
1077 if ((prec->usertype & RPA_MATCH_OPTIONAL) == 0 && (prec->ruleuid == RPA_PRODUCTION_CREF || prec->ruleuid == RPA_PRODUCTION_AREF ||
1078 prec->ruleuid == RPA_PRODUCTION_CHAR || prec->ruleuid == RPA_PRODUCTION_CLS || prec->ruleuid == RPA_PRODUCTION_SPECIALCHAR))
1083 r_array_removelast(dbex->recstack);
1088 static int rpa_parseinfo_loopdetect(rpadbex_t *dbex, long parent, long loopto)
1090 if (parent != loopto) {
1092 * Make sure we are dealing with a loop first
1094 if (!rpa_parseinfo_loopdetect_do(dbex, loopto, parent, 0))
1098 return (rpa_parseinfo_loopdetect_do(dbex, parent, loopto, 0)) ? 1 : 0;
1102 static void rpa_parseinfo_marklooppath(rpadbex_t *dbex, long parent)
1106 if (rpa_parseinfo_loopdetect(dbex, parent, parent) > 0) {
1107 rpa_record_setusertype(dbex->records, parent, RPA_LOOP_PATH, RVALSET_OR);
1108 for (i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
1109 rpa_parseinfo_marklooppath(dbex, i);
1115 static int rpa_parseinfo_rule_checkforloop(rpadbex_t *dbex, const char *name, rsize_t namesize, long loopto)
1117 rpa_ruleinfo_t *info = info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
1121 return rpa_parseinfo_loopdetect(dbex, info->startrec, loopto);
1125 static void rpa_dbex_buildloopinfo(rpadbex_t *dbex)
1128 rharray_t *rules = dbex->rules;
1129 rpa_ruleinfo_t *info;
1131 for (i = 0; i < r_array_length(rules->members); i++) {
1132 if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, i)) != NULL)
1133 rpa_parseinfo_marklooppath(dbex, info->startrec);
1137 * Mark the non-loop branches.
1139 for (i = 0; i < r_array_length(dbex->records); i++) {
1140 rparecord_t *prec = (rparecord_t *)r_array_slot(dbex->records, i);
1141 if (prec->type == RPA_RECORD_START &&
1142 (prec->ruleuid == RPA_PRODUCTION_ALTBRANCH) &&
1143 (prec->usertype & RPA_LOOP_PATH) == 0) {
1144 p = rpa_recordtree_parent(dbex->records, i, RPA_RECORD_START);
1146 prec = (rparecord_t *)r_array_slot(dbex->records, p);
1147 if (prec && (prec->usertype & RPA_LOOP_PATH))
1148 rpa_record_setusertype(dbex->records, i, RPA_NONLOOP_PATH, RVALSET_OR);
1155 static void rpa_dbex_buildruleinfo(rpadbex_t *dbex)
1158 rpa_ruleinfo_t info;
1159 unsigned int nrecords;
1160 const char *name = NULL;
1161 rsize_t namesize = 0;
1165 r_object_destroy((robject_t *)dbex->rules);
1168 dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
1170 for (i = 0, nrecords = r_array_length(dbex->records); i < nrecords; i++) {
1171 if (!(rec = rpa_dbex_record(dbex, i)))
1173 if ((rec->ruleuid == RPA_PRODUCTION_NAMEDRULE) && (rec->type & RPA_RECORD_START)) {
1174 r_memset(&info, 0, sizeof(info));
1175 info.type = RPA_RULEINFO_NAMEDRULE;
1177 info.sizerecs = rpa_recordtree_size(dbex->records, i);
1178 if (info.sizerecs < 0)
1180 if (rpa_dbex_rulename(dbex, i, &name, &namesize) < 0) {
1183 r_harray_add(dbex->rules, name, namesize, &info);
1184 i += info.sizerecs - 1;
1185 } else if ((rec->ruleuid == RPA_PRODUCTION_ANONYMOUSRULE) && (rec->type & RPA_RECORD_START)) {
1186 r_memset(&info, 0, sizeof(info));
1187 info.type = RPA_RULEINFO_ANONYMOUSRULE;
1189 info.sizerecs = rpa_recordtree_size(dbex->records, i);
1190 if (info.sizerecs < 0)
1192 if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
1193 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
1194 i += info.sizerecs - 1;
1195 } else if ((rec->type & RPA_RECORD_START) && (rec->ruleuid >= RPA_PRODUCTION_DIRECTIVEEMIT) && (rec->ruleuid <= RPA_PRODUCTION_DIRECTIVEEMITID)) {
1196 r_memset(&info, 0, sizeof(info));
1197 info.type = RPA_RULEINFO_DIRECTIVE;
1199 info.sizerecs = rpa_recordtree_size(dbex->records, i);
1200 if (info.sizerecs < 0)
1202 if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
1203 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
1204 i += info.sizerecs - 1;
1211 static long rpa_dbex_copy_handler(rarray_t *records, long rec, rpointer userdata)
1213 rpadbex_t *dbex = (rpadbex_t *)userdata;
1216 rparecord_t *prec = (rparecord_t *)r_array_slot(records, rec);
1217 if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & RPA_RECORD_START)) {
1221 } else if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & (RPA_RECORD_END))) {
1222 ruint32 usertype = RPA_MATCH_NONE;
1225 * Don't copy it but set the usertype of the previous record accordingly.
1227 switch (*prec->input) {
1229 usertype = RPA_MATCH_OPTIONAL;
1232 usertype = RPA_MATCH_MULTIPLE;
1235 usertype = RPA_MATCH_MULTIOPT;
1238 usertype = RPA_MATCH_NONE;
1241 lastrec = r_array_length(dbex->records) - 1;
1243 rpa_record_setusertype(dbex->records, lastrec, usertype, RVALSET_OR);
1244 } else if (prec->ruleuid) {
1245 index = r_array_add(dbex->records, prec);
1247 * Optimizations. Lets apply the optimizations while we copy the records.
1248 * This is probably not the most clean way to apply optimizations. In the future
1249 * we should probably think of optimization pass right before compiling.
1251 if (dbex->optimizations) {
1252 if (prec->ruleuid == RPA_PRODUCTION_OROP && (prec->type & RPA_RECORD_END)) {
1253 rpa_optimiztion_orop(dbex->records, rpa_recordtree_get(dbex->records, index, RPA_RECORD_START));
1262 static void rpa_dbex_copyrecords(rpadbex_t *dbex)
1265 rarray_t *records = dbex->temprecords;
1267 for (i = rpa_recordtree_get(records, 0, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(records, i, RPA_RECORD_START))
1268 rpa_recordtree_walk(records, i, 0, rpa_dbex_copy_handler, dbex);
1272 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, long rec)
1274 if (!dbex || !dbex->rules)
1276 return rpa_record_get(dbex->records, rec);
1280 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid)
1283 rpa_ruleinfo_t *info;
1286 if (!dbex || !dbex->rules)
1288 info = r_harray_get(dbex->rules, rid);
1291 rec = info->startrec + info->sizerecs - 1;
1292 if (rec < 0 || rec >= (rparule_t)r_array_length(dbex->records))
1294 prec = (rparecord_t *)r_array_slot(dbex->records, rec);
1299 static int rpa_dbex_rulename(rpadbex_t *dbex, long rec, const char **name, rsize_t *namesize)
1301 rparecord_t *pnamerec = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_START), RPA_RECORD_END));
1302 if (!pnamerec || !(pnamerec->ruleuid & RPA_PRODUCTION_RULENAME))
1304 *name = pnamerec->input;
1305 *namesize = pnamerec->inputsiz;
1310 int rpa_dbex_open(rpadbex_t *dbex)
1315 r_object_destroy((robject_t *)dbex->rules);
1323 void rpa_dbex_close(rpadbex_t *dbex)
1327 rpa_dbex_buildruleinfo(dbex);
1328 rpa_dbex_buildloopinfo(dbex);
1330 rpa_dbex_buildbitmapinfo(dbex);
1334 long rpa_dbex_load(rpadbex_t *dbex, const char *rules, rsize_t size)
1345 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTOPEN);
1349 text = r_strndup(rules, size);
1351 r_array_add(dbex->text, &text);
1352 r_array_setlength(dbex->temprecords, 0);
1353 if ((ret = rpa_parser_load(dbex->pa, text, size, dbex->temprecords)) < 0) {
1361 for (line = 1; ptext >= text; --ptext) {
1365 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
1366 RPA_DBEX_SETERRINFO_OFFSET(dbex, ret);
1367 RPA_DBEX_SETERRINFO_LINE(dbex, line);
1370 rpa_dbex_copyrecords(dbex);
1375 long rpa_dbex_load_s(rpadbex_t *dbex, const char *rules)
1377 return rpa_dbex_load(dbex, rules, r_strlen(rules));
1381 void rpa_dbex_dumpindented(rpadbex_t *dbex, long rec, int level, const char *rulelabel)
1385 rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1389 r_memset(buffer, 0, sizeof(buffer));
1391 r_printf("[ 0x%016lx ] ", prec->userdata);
1392 for (i = 0; i < level + 1; i++)
1395 r_printf("%s, %c, %c", rulelabel, rpa_record_optchar(prec, 'x'), rpa_record_loopchar(prec, 'x'));
1397 size = R_MIN(prec->inputsiz, sizeof(buffer) - 1);
1398 r_strncpy(buffer, prec->input, size);
1400 if (size == (sizeof(buffer) - 1))
1401 r_printf(" %s ...\n", buffer);
1403 r_printf(" %s\n", buffer);
1408 static long rpa_dbex_firstinlined(rpadbex_t *dbex)
1410 long ret = r_array_empty(dbex->inlinestack) ? -1 : r_array_index(dbex->inlinestack, 0, long);
1415 static int rpa_dbex_findinlined(rpadbex_t *dbex, long startrec)
1418 for (i = 0; i < r_array_length(dbex->inlinestack); i++) {
1419 if (r_array_index(dbex->inlinestack, i, long) == startrec)
1426 static void rpa_dbex_dumptree_do(rpadbex_t *dbex, long rec, int level)
1428 rparecord_t *prec = rpa_dbex_record(dbex, rec);
1429 if (prec && prec->ruleuid == RPA_PRODUCTION_RULENAME)
1431 if (prec && (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)) {
1432 const char *name = NULL;
1433 rsize_t namesize = 0;
1435 rpa_ruleinfo_t *info;
1437 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) >= 0) {
1438 loop = rpa_parseinfo_rule_checkforloop(dbex, name, namesize, rpa_dbex_firstinlined(dbex));
1439 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
1441 if (!rpa_dbex_findinlined(dbex, info->startrec)) {
1443 * Temporary set the quantitative flags for the inlined rule to the parent
1444 * reference, so they are printed correctly. After the printing is done
1445 * restore the original flags.
1447 rparecord_t *prulestart = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_START));
1448 rparecord_t *pruleend = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1449 unsigned long optional = (prulestart->usertype & RPA_MATCH_OPTIONAL);
1450 prulestart->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1451 pruleend->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1452 r_array_add(dbex->inlinestack, &info->startrec);
1453 rpa_dbex_dumptree_do(dbex, info->startrec, level);
1454 r_array_removelast(dbex->inlinestack);
1456 prulestart->usertype &= ~RPA_MATCH_OPTIONAL;
1457 pruleend->usertype &= ~RPA_MATCH_OPTIONAL;
1460 rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, "loopref");
1466 rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, prec->rule);
1467 for (rec = rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_START); rec >= 0; rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START)) {
1468 rpa_dbex_dumptree_do(dbex, rec, level + 1);
1473 int rpa_dbex_dumptree(rpadbex_t *dbex, rparule_t rid)
1475 rpa_ruleinfo_t *info;
1480 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_PARAM);
1484 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1487 if (!(info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid))) {
1488 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1491 r_array_add(dbex->inlinestack, &info->startrec);
1492 rpa_dbex_dumptree_do(dbex, info->startrec, 0);
1493 r_array_removelast(dbex->inlinestack);
1498 int rpa_dbex_dumpproductions(rpadbex_t *dbex)
1507 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1510 for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1511 ret = rpa_dbex_strncpy(dbex, buffer, rid, sizeof(buffer));
1513 if (ret == sizeof(buffer))
1514 r_printf(" %s ...\n", buffer);
1516 r_printf(" %s\n", buffer);
1524 int rpa_dbex_dumprecords(rpadbex_t *dbex)
1531 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1534 for (i = 0; i < r_array_length(dbex->records); i++) {
1535 rpa_record_dump(dbex->records, i);
1541 int rpa_dbex_dumpinfo(rpadbex_t *dbex)
1544 rpa_ruleinfo_t *info;
1549 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1552 for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1553 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
1554 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1555 switch (info->type) {
1556 case RPA_RULEINFO_NAMEDRULE:
1559 case RPA_RULEINFO_ANONYMOUSRULE:
1562 case RPA_RULEINFO_DIRECTIVE:
1569 r_printf("(%7d, %4d, code: %7ld, %5ld) : %s\n", info->startrec, info->sizerecs, info->codeoff, info->codesiz, name->str);
1575 int rpa_dbex_dumpuids(rpadbex_t *dbex)
1579 rpa_ruleinfo_t *info;
1580 char *buffer = r_zmalloc(32 * sizeof(char));
1585 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1588 for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1589 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1590 if (info->type == RPA_RULEINFO_DIRECTIVE) {
1591 rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1592 if (prec->ruleuid == RPA_PRODUCTION_DIRECTIVEEMITID && prec->inputsiz) {
1593 rec = rpa_recordtree_firstchild(dbex->records, info->startrec, RPA_RECORD_START);
1595 prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1596 if (prec->ruleuid == RPA_PRODUCTION_ALIASNAME) {
1598 if (rpa_record2long(rpa_dbex_record(dbex, rpa_recordtree_next(dbex->records, rec, RPA_RECORD_END)), &dec) < 0)
1600 buffer = r_realloc(buffer, prec->inputsiz + 1);
1601 r_memset(buffer, 0, prec->inputsiz + 1);
1602 r_memcpy(buffer, prec->input, prec->inputsiz);
1603 r_printf("#define %s %d\n", buffer, dec);
1606 rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START);
1616 int rpa_dbex_dumpcode(rpadbex_t* dbex, rparule_t rid)
1618 rpa_ruleinfo_t *info;
1622 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_PARAM);
1626 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1629 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1632 rvm_asm_dump(rvm_codegen_getcode(dbex->co->cg, info->codeoff), info->codesiz);
1637 rsize_t rpa_dbex_strlen(rpadbex_t *dbex, rparule_t rid)
1644 if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1645 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1648 size = prec->inputsiz;
1653 rsize_t rpa_dbex_strncpy(rpadbex_t *dbex, char *dst, rparule_t rid, rsize_t n)
1660 if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1661 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1664 size = prec->inputsiz;
1667 r_memset(dst, 0, n);
1668 r_strncpy(dst, prec->input, size);
1673 const char *rpa_dbex_name(rpadbex_t *dbex, rparule_t rid)
1680 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1683 if (rid >= (rparule_t)r_array_length(dbex->rules->members))
1685 name = r_array_index(dbex->rules->names, rid, rstr_t*);
1690 rparule_t rpa_dbex_first(rpadbex_t *dbex)
1695 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1699 if (r_array_length(dbex->rules->members) <= 0) {
1700 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1707 rparule_t rpa_dbex_last(rpadbex_t *dbex)
1712 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1716 if (r_array_length(dbex->rules->members) <= 0) {
1717 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1720 return r_array_length(dbex->rules->members) - 1;
1724 rparule_t rpa_dbex_lookup(rpadbex_t *dbex, const char *name, rsize_t namesize)
1732 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1736 ret = (rparule_t) r_harray_taillookup(dbex->rules, name, namesize);
1738 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1744 rparule_t rpa_dbex_lookup_s(rpadbex_t *dbex, const char *name)
1746 return rpa_dbex_lookup(dbex, name, r_strlen(name));
1750 rparule_t rpa_dbex_next(rpadbex_t *dbex, rparule_t rid)
1755 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1760 if (rid < (rparule_t)r_array_length(dbex->rules->members))
1766 rparule_t rpa_dbex_prev(rpadbex_t *dbex, rparule_t rid)
1771 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1781 long rpa_dbex_lasterror(rpadbex_t *dbex)
1785 return dbex->err.code;
1789 long rpa_dbex_lasterrorinfo(rpadbex_t *dbex, rpa_errinfo_t *errinfo)
1791 if (!dbex || !errinfo)
1793 r_memcpy(errinfo, &dbex->err, sizeof(rpa_errinfo_t));
1798 const char *rpa_dbex_version()
1804 static int rpa_dbex_compile_rule(rpadbex_t *dbex, rparule_t rid)
1807 rpa_ruleinfo_t *info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1811 codeoff = rvm_codegen_getcodesize(dbex->co->cg);
1813 * Set the rid in the rulepref, so the compiler associates this rule
1814 * with the correct rid.
1816 rpa_compiler_rulepref_set_ruleid_s(dbex->co, rpa_dbex_name(dbex, rid), rid);
1817 if (rpa_dbex_playrecord(dbex, info->startrec) < 0)
1819 info->codeoff = codeoff;
1820 info->codesiz = rvm_codegen_getcodesize(dbex->co->cg) - codeoff;
1825 int rpa_dbex_compile(rpadbex_t *dbex)
1828 rvm_codelabel_t *labelerr;
1833 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1837 * By default all production rules emit
1840 rpa_compiler_destroy(dbex->co);
1841 dbex->co = rpa_compiler_create();
1842 rpa_dbex_setemit(dbex, TRUE);
1844 for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1845 if (rpa_dbex_compile_rule(dbex, rid) < 0) {
1846 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_COMPILE);
1851 if (rvm_codegen_relocate(dbex->co->cg, &labelerr) < 0) {
1852 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVEDSYMBOL);
1853 RPA_DBEX_SETERRINFO_NAME(dbex, labelerr->name->str, labelerr->name->size);
1861 rvm_asmins_t *rpa_dbex_executable(rpadbex_t *dbex)
1866 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1869 if (!dbex->compiled || rvm_codegen_getcodesize(dbex->co->cg) == 0) {
1870 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCOMPILED);
1873 return rvm_codegen_getcode(dbex->co->cg, 0);
1877 long rpa_dbex_executableoffset(rpadbex_t *dbex, rparule_t rid)
1879 rpa_ruleinfo_t *info;
1884 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1887 if (!dbex->compiled) {
1888 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCOMPILED);
1891 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1893 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1896 return info->codeoff;
1900 long rpa_dbex_cfgset(rpadbex_t *dbex, unsigned long cfg, unsigned long val)
1904 if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1905 dbex->optimizations = val;
1907 } else if(cfg == RPA_DBEXCFG_DEBUG) {
1910 } else if(cfg == RPA_DBEXCFG_BITMAP) {
1918 long rpa_dbex_cfgget(rpadbex_t *dbex, unsigned long cfg)
1922 if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1923 return dbex->optimizations;
1924 } else if(cfg == RPA_DBEXCFG_DEBUG) {
1926 } else if(cfg == RPA_DBEXCFG_BITMAP) {
1927 return dbex->bitmap;