2 * Regular Pattern Analyzer (RPA)
3 * Copyright (c) 2009-2010 Martin Stoilov
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * Martin Stoilov <martin@rpasearch.com>
26 #include "rlib/rmem.h"
27 #include "rlib/rutf.h"
28 #include "rpa/rpabitmap.h"
29 #include "rpa/rpadbexpriv.h"
30 #include "rpa/rpadbex.h"
31 #include "rpa/rpastatpriv.h"
34 #define RPA_DBEX_SETERRINFO_CODE(__d__, __e__) do { (__d__)->err.code = __e__; (__d__)->err.mask |= RPA_ERRINFO_CODE; } while (0)
35 #define RPA_DBEX_SETERRINFO_OFFSET(__d__, __o__) do { (__d__)->err.offset = __o__; (__d__)->err.mask |= RPA_ERRINFO_OFFSET; } while (0)
36 #define RPA_DBEX_SETERRINFO_LINE(__d__, __l__) do { (__d__)->err.line = __l__; (__d__)->err.mask |= RPA_ERRINFO_LINE; } while (0)
37 #define RPA_DBEX_SETERRINFO_NAME(__d__, __n__, __s__) do { \
38 (__d__)->err.mask |= RPA_ERRINFO_NAME; \
39 r_memset((__d__)->err.name, 0, sizeof((__d__)->err.name)); \
40 r_strncpy((__d__)->err.name, __n__, R_MIN(__s__, (sizeof((__d__)->err.name) - 1))); } while (0)
44 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid);
45 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, long rec);
46 static int rpa_dbex_rulename(rpadbex_t *dbex, long rec, const char **name, rsize_t *namesize);
47 static int rpa_parseinfo_loopdetect(rpadbex_t *dbex, long parent, long loopto);
48 static long rpa_dbex_firstinlined(rpadbex_t *dbex);
49 static int rpa_dbex_findinlined(rpadbex_t *dbex, long startrec);
50 static int rpa_dbex_playchildrecords(rpadbex_t *dbex, long rec);
51 static int rpa_dbex_playreversechildrecords(rpadbex_t *dbex, long rec);
52 static int rpa_dbex_playrecord(rpadbex_t *dbex, long rec);
53 static int rpa_dbex_rh_default(rpadbex_t *dbex, long rec);
56 void rpa_dbex_debug_recordhead(rpadbex_t *dbex, long rec)
59 rarray_t *records = dbex->records;
60 rparecord_t *prec = (rparecord_t *) r_array_slot(records, rec);
61 dbex->headoff = rvm_codegen_getcodesize(dbex->co->cg);
62 if (prec->type & RPA_RECORD_START) {
63 rpa_record_dump(records, rec);
70 void rpa_dbex_debug_recordtail(rpadbex_t *dbex, long rec)
73 rarray_t *records = dbex->records;
74 rparecord_t *prec = (rparecord_t *) r_array_slot(records, rec);
75 rvm_asm_dump(rvm_codegen_getcode(dbex->co->cg, dbex->headoff), rvm_codegen_getcodesize(dbex->co->cg) - dbex->headoff);
76 if (prec->type & RPA_RECORD_END) {
77 rpa_record_dump(records, rec);
83 static int rpa_dbex_rh_default(rpadbex_t *dbex, long rec)
85 rarray_t *records = dbex->records;
88 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
89 prec = rpa_dbex_record(dbex, rec);
91 rpa_dbex_debug_recordhead(dbex, rec);
92 rpa_dbex_debug_recordtail(dbex, rec);
93 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
95 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
96 prec = rpa_dbex_record(dbex, rec);
98 rpa_dbex_debug_recordhead(dbex, rec);
99 rpa_dbex_debug_recordtail(dbex, rec);
105 static int rpa_dbex_playrecord(rpadbex_t *dbex, long rec)
107 rarray_t *records = dbex->records;
108 rparecord_t *prec = (rparecord_t *)r_array_slot(records, rec);
110 if (prec->ruleuid >= 0 && prec->ruleuid < RPA_PRODUCTION_COUNT && dbex->handlers[prec->ruleuid]) {
111 return dbex->handlers[prec->ruleuid](dbex, rec);
113 return rpa_dbex_rh_default(dbex, rec);
117 static int rpa_dbex_playchildrecords(rpadbex_t *dbex, long rec)
120 rarray_t *records = dbex->records;
122 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_START); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_START)) {
123 if (rpa_dbex_playrecord(dbex, child) < 0)
130 static int rpa_dbex_playreversechildrecords(rpadbex_t *dbex, long rec)
133 rarray_t *records = dbex->records;
135 for (child = rpa_recordtree_lastchild(records, rec, RPA_RECORD_START); child >= 0; child = rpa_recordtree_prev(records, child, RPA_RECORD_START)) {
136 if (rpa_dbex_playrecord(dbex, child) < 0)
144 static long rpa_dbex_getmatchchr(unsigned long matchtype)
146 switch (matchtype & RPA_MATCH_MASK) {
149 return RPA_MATCHCHR_NAN;
151 case RPA_MATCH_MULTIPLE:
152 return RPA_MATCHCHR_MUL;
154 case RPA_MATCH_OPTIONAL:
155 return RPA_MATCHCHR_OPT;
157 case RPA_MATCH_MULTIOPT:
158 return RPA_MATCHCHR_MOP;
161 return RPA_MATCHCHR_NAN;
165 static long rpa_dbex_getmatchspecialchr(unsigned long matchtype)
167 switch (matchtype & RPA_MATCH_MASK) {
170 return RPA_MATCHSPCHR_NAN;
172 case RPA_MATCH_MULTIPLE:
173 return RPA_MATCHSPCHR_MUL;
175 case RPA_MATCH_OPTIONAL:
176 return RPA_MATCHSPCHR_OPT;
178 case RPA_MATCH_MULTIOPT:
179 return RPA_MATCHSPCHR_MOP;
182 return RPA_MATCHSPCHR_NAN;
186 int rpa_record2long(rparecord_t *prec, ruint32 *num)
191 if (!prec || !num || prec->inputsiz == 0 || prec->inputsiz >= sizeof(buffer))
193 r_memset(buffer, 0, sizeof(buffer));
194 r_memcpy(buffer, prec->input, prec->inputsiz);
195 if (prec->ruleuid == RPA_PRODUCTION_HEX) {
196 *num = (ruint32)r_strtoul(prec->input, &endptr, 16);
197 } else if (prec->ruleuid == RPA_PRODUCTION_DEC) {
198 *num = (ruint32)r_strtoul(prec->input, &endptr, 10);
206 static int rpa_dbex_rh_uid(rpadbex_t *dbex, long rec)
208 const char *name = NULL;
211 rparecord_t *pnumrec;
212 rarray_t *records = dbex->records;
215 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
216 prec = rpa_dbex_record(dbex, rec);
218 rpa_dbex_debug_recordhead(dbex, rec);
219 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
220 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
223 pnumrec = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
225 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
228 if (rpa_record2long(pnumrec, &uid) < 0) {
229 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
232 rpa_compiler_rulepref_set_ruleuid(dbex->co, name, namesize, uid);
233 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
234 rpa_dbex_debug_recordtail(dbex, rec);
235 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
237 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
238 prec = rpa_dbex_record(dbex, rec);
240 rpa_dbex_debug_recordhead(dbex, rec);
241 rpa_dbex_debug_recordtail(dbex, rec);
246 static int rpa_dbex_rh_abort(rpadbex_t *dbex, long rec)
248 const char *name = NULL;
250 rarray_t *records = dbex->records;
253 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
254 prec = rpa_dbex_record(dbex, rec);
256 rpa_dbex_debug_recordhead(dbex, rec);
257 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
260 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_ABORTONFAIL);
261 rpa_dbex_debug_recordtail(dbex, rec);
262 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
264 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
265 prec = rpa_dbex_record(dbex, rec);
267 rpa_dbex_debug_recordhead(dbex, rec);
268 rpa_dbex_debug_recordtail(dbex, rec);
273 static int rpa_dbex_rh_emit(rpadbex_t *dbex, long rec)
275 const char *name = NULL;
277 rarray_t *records = dbex->records;
280 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
281 prec = rpa_dbex_record(dbex, rec);
283 rpa_dbex_debug_recordhead(dbex, rec);
284 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
287 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
288 rpa_dbex_debug_recordtail(dbex, rec);
289 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
291 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
292 prec = rpa_dbex_record(dbex, rec);
294 rpa_dbex_debug_recordhead(dbex, rec);
295 rpa_dbex_debug_recordtail(dbex, rec);
300 static int rpa_dbex_rh_noemit(rpadbex_t *dbex, long rec)
302 const char *name = NULL;
304 rarray_t *records = dbex->records;
307 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
308 prec = rpa_dbex_record(dbex, rec);
310 rpa_dbex_debug_recordhead(dbex, rec);
311 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
314 rpa_compiler_rulepref_clear_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
315 rpa_dbex_debug_recordtail(dbex, rec);
316 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
318 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
319 prec = rpa_dbex_record(dbex, rec);
321 rpa_dbex_debug_recordhead(dbex, rec);
322 rpa_dbex_debug_recordtail(dbex, rec);
327 static int rpa_dbex_setemit(rpadbex_t *dbex, rboolean emit)
330 rpa_ruleinfo_t *info;
332 for (i = 0; i < r_array_length(dbex->rules->names); i++) {
333 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
334 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
335 if (info->type == RPA_RULEINFO_NAMEDRULE) {
337 rpa_compiler_rulepref_set_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
339 rpa_compiler_rulepref_clear_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
347 static int rpa_dbex_rh_emitall(rpadbex_t *dbex, long rec)
349 rarray_t *records = dbex->records;
352 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
353 prec = rpa_dbex_record(dbex, rec);
355 rpa_dbex_debug_recordhead(dbex, rec);
356 rpa_dbex_setemit(dbex, TRUE);
357 rpa_dbex_debug_recordtail(dbex, rec);
358 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
360 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
361 prec = rpa_dbex_record(dbex, rec);
363 rpa_dbex_debug_recordhead(dbex, rec);
364 rpa_dbex_debug_recordtail(dbex, rec);
369 static int rpa_dbex_rh_emitnone(rpadbex_t *dbex, long rec)
371 rarray_t *records = dbex->records;
374 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
375 prec = rpa_dbex_record(dbex, rec);
377 rpa_dbex_debug_recordhead(dbex, rec);
378 rpa_dbex_setemit(dbex, FALSE);
379 rpa_dbex_debug_recordtail(dbex, rec);
380 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
382 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
383 prec = rpa_dbex_record(dbex, rec);
385 rpa_dbex_debug_recordhead(dbex, rec);
386 rpa_dbex_debug_recordtail(dbex, rec);
391 static int rpa_dbex_rh_namedrule(rpadbex_t *dbex, long rec)
393 const char *name = NULL;
395 rarray_t *records = dbex->records;
396 rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
398 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
399 prec = rpa_dbex_record(dbex, rec);
401 rpa_dbex_debug_recordhead(dbex, rec);
402 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
406 if (!r_array_empty(dbex->inlinestack)) {
407 rpa_compiler_inlinerule_begin(dbex->co, name, namesize, 0);
409 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
410 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BL, DA, XX, XX, 3));
411 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
412 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
414 if ((prec->usertype & RPA_LOOP_PATH)) {
415 rpa_compiler_loop_begin(dbex->co, name, namesize);
417 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_MATCHBITMAP, DA, XX, XX, prec->userdata));
418 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BXLES, LR, XX, XX, 0));
419 rpa_compiler_rule_begin(dbex->co, name, namesize);
422 r_array_add(dbex->inlinestack, &rec);
423 rpa_dbex_debug_recordtail(dbex, rec);
424 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
426 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
427 prec = rpa_dbex_record(dbex, rec);
429 rpa_dbex_debug_recordhead(dbex, rec);
430 r_array_removelast(dbex->inlinestack);
431 if (!r_array_empty(dbex->inlinestack)) {
432 rpa_compiler_inlinerule_end(dbex->co);
434 if ((prec->usertype & RPA_LOOP_PATH)) {
435 rpa_compiler_loop_end(dbex->co);
437 rpa_compiler_rule_end(dbex->co);
440 rpa_dbex_debug_recordtail(dbex, rec);
445 static int rpa_dbex_rh_anonymousrule(rpadbex_t *dbex, long rec)
447 rarray_t *records = dbex->records;
448 rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
450 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
451 prec = rpa_dbex_record(dbex, rec);
453 rpa_dbex_debug_recordhead(dbex, rec);
454 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
455 rpa_compiler_exp_begin(dbex->co, RPA_MATCH_NONE);
456 rpa_dbex_debug_recordtail(dbex, rec);
457 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
459 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
460 prec = rpa_dbex_record(dbex, rec);
462 rpa_dbex_debug_recordhead(dbex, rec);
463 rpa_compiler_exp_end(dbex->co);
464 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
465 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
466 rpa_dbex_debug_recordtail(dbex, rec);
472 static int rpa_dbex_rh_char(rpadbex_t *dbex, long rec)
475 rarray_t *records = dbex->records;
478 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
479 prec = rpa_dbex_record(dbex, rec);
481 rpa_dbex_debug_recordhead(dbex, rec);
482 rpa_dbex_debug_recordtail(dbex, rec);
483 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
485 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
486 prec = rpa_dbex_record(dbex, rec);
488 rpa_dbex_debug_recordhead(dbex, rec);
489 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
493 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
494 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
495 rpa_dbex_debug_recordtail(dbex, rec);
500 static int rpa_dbex_rh_specialchar(rpadbex_t *dbex, long rec)
503 rarray_t *records = dbex->records;
506 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
507 prec = rpa_dbex_record(dbex, rec);
509 rpa_dbex_debug_recordhead(dbex, rec);
510 rpa_dbex_debug_recordtail(dbex, rec);
511 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
513 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
514 prec = rpa_dbex_record(dbex, rec);
516 rpa_dbex_debug_recordhead(dbex, rec);
517 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
521 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchspecialchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
522 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
523 rpa_dbex_debug_recordtail(dbex, rec);
528 static int rpa_dbex_rh_cls(rpadbex_t *dbex, long rec)
530 rarray_t *records = dbex->records;
533 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
534 prec = rpa_dbex_record(dbex, rec);
536 rpa_dbex_debug_recordhead(dbex, rec);
537 rpa_compiler_class_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
538 rpa_dbex_debug_recordtail(dbex, rec);
539 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
541 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
542 prec = rpa_dbex_record(dbex, rec);
544 rpa_dbex_debug_recordhead(dbex, rec);
545 rpa_compiler_class_end(dbex->co);
546 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
547 rpa_dbex_debug_recordtail(dbex, rec);
552 static int rpa_dbex_rh_clschar(rpadbex_t *dbex, long rec)
555 rarray_t *records = dbex->records;
558 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
559 prec = rpa_dbex_record(dbex, rec);
561 rpa_dbex_debug_recordhead(dbex, rec);
562 rpa_dbex_debug_recordtail(dbex, rec);
563 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
565 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
566 prec = rpa_dbex_record(dbex, rec);
568 rpa_dbex_debug_recordhead(dbex, rec);
569 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
573 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, wc));
574 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
575 rpa_dbex_debug_recordtail(dbex, rec);
580 static int rpa_dbex_rh_minexp(rpadbex_t *dbex, long rec)
582 rarray_t *records = dbex->records;
585 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
586 prec = rpa_dbex_record(dbex, rec);
588 rpa_dbex_debug_recordhead(dbex, rec);
589 rpa_compiler_exp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
590 rpa_dbex_debug_recordtail(dbex, rec);
591 if (rpa_dbex_playreversechildrecords(dbex, rec) < 0)
593 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
594 prec = rpa_dbex_record(dbex, rec);
596 rpa_dbex_debug_recordhead(dbex, rec);
597 rpa_compiler_exp_end(dbex->co);
598 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
599 rpa_dbex_debug_recordtail(dbex, rec);
604 static int rpa_dbex_rh_exp(rpadbex_t *dbex, long rec)
606 rarray_t *records = dbex->records;
609 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
610 prec = rpa_dbex_record(dbex, rec);
612 rpa_dbex_debug_recordhead(dbex, rec);
613 rpa_compiler_exp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
614 rpa_dbex_debug_recordtail(dbex, rec);
615 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
617 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
618 prec = rpa_dbex_record(dbex, rec);
620 rpa_dbex_debug_recordhead(dbex, rec);
621 rpa_compiler_exp_end(dbex->co);
622 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
623 rpa_dbex_debug_recordtail(dbex, rec);
628 static int rpa_dbex_rh_orop(rpadbex_t *dbex, long rec)
630 rarray_t *records = dbex->records;
633 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
634 prec = rpa_dbex_record(dbex, rec);
636 rpa_dbex_debug_recordhead(dbex, rec);
637 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
638 rpa_dbex_debug_recordtail(dbex, rec);
639 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
641 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
642 prec = rpa_dbex_record(dbex, rec);
644 rpa_dbex_debug_recordhead(dbex, rec);
645 rpa_compiler_altexp_end(dbex->co);
646 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
647 rpa_dbex_debug_recordtail(dbex, rec);
652 static int rpa_dbex_rh_norop(rpadbex_t *dbex, long rec)
654 rarray_t *records = dbex->records;
657 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
658 prec = rpa_dbex_record(dbex, rec);
660 rpa_dbex_debug_recordhead(dbex, rec);
661 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
662 rpa_dbex_debug_recordtail(dbex, rec);
663 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
665 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
666 prec = rpa_dbex_record(dbex, rec);
668 rpa_dbex_debug_recordhead(dbex, rec);
669 rpa_compiler_altexp_end(dbex->co);
670 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
671 rpa_dbex_debug_recordtail(dbex, rec);
676 static int rpa_dbex_rh_notop(rpadbex_t *dbex, long rec)
678 rarray_t *records = dbex->records;
681 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
682 prec = rpa_dbex_record(dbex, rec);
684 rpa_dbex_debug_recordhead(dbex, rec);
685 rpa_compiler_notexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
686 rpa_dbex_debug_recordtail(dbex, rec);
687 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
689 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
690 prec = rpa_dbex_record(dbex, rec);
692 rpa_dbex_debug_recordhead(dbex, rec);
693 rpa_compiler_notexp_end(dbex->co);
694 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
695 rpa_dbex_debug_recordtail(dbex, rec);
700 static int rpa_dbex_rh_range(rpadbex_t *dbex, long rec)
702 rarray_t *records = dbex->records;
705 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
706 prec = rpa_dbex_record(dbex, rec);
708 rpa_dbex_debug_recordhead(dbex, rec);
709 dbex->co->currange.p1 = 0;
710 dbex->co->currange.p2 = 0;
711 rpa_dbex_debug_recordtail(dbex, rec);
712 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
714 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
715 prec = rpa_dbex_record(dbex, rec);
717 rpa_dbex_debug_recordhead(dbex, rec);
718 if (dbex->co->currange.p1 < dbex->co->currange.p2)
719 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
721 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
722 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
723 rpa_dbex_debug_recordtail(dbex, rec);
728 static int rpa_dbex_rh_numrange(rpadbex_t *dbex, long rec)
730 rarray_t *records = dbex->records;
734 * Fix me: probably we don't need to access the children from here. There should be a way just to
735 * play them a regular records!
738 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
739 prec = rpa_dbex_record(dbex, rec);
741 rpa_dbex_debug_recordhead(dbex, rec);
742 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
743 if (rpa_record2long(child, &dbex->co->currange.p1) < 0)
745 child = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
746 if (rpa_record2long(child, &dbex->co->currange.p2) < 0)
748 rpa_dbex_debug_recordtail(dbex, rec);
749 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
751 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
752 prec = rpa_dbex_record(dbex, rec);
754 rpa_dbex_debug_recordhead(dbex, rec);
755 if (dbex->co->currange.p1 < dbex->co->currange.p2)
756 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
758 rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
759 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
760 rpa_dbex_debug_recordtail(dbex, rec);
765 static int rpa_dbex_rh_clsnum(rpadbex_t *dbex, long rec)
767 rarray_t *records = dbex->records;
772 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
773 prec = rpa_dbex_record(dbex, rec);
775 rpa_dbex_debug_recordhead(dbex, rec);
776 rpa_dbex_debug_recordtail(dbex, rec);
777 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
779 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
780 prec = rpa_dbex_record(dbex, rec);
782 rpa_dbex_debug_recordhead(dbex, rec);
783 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
784 if (rpa_record2long(child, &wc) < 0)
786 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
787 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
788 rpa_dbex_debug_recordtail(dbex, rec);
793 static int rpa_dbex_rh_beginchar(rpadbex_t *dbex, long rec)
795 rarray_t *records = dbex->records;
799 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
800 prec = rpa_dbex_record(dbex, rec);
802 rpa_dbex_debug_recordhead(dbex, rec);
803 rpa_dbex_debug_recordtail(dbex, rec);
804 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
806 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
807 prec = rpa_dbex_record(dbex, rec);
809 rpa_dbex_debug_recordhead(dbex, rec);
810 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
814 dbex->co->currange.p1 = wc;
815 rpa_dbex_debug_recordtail(dbex, rec);
820 static int rpa_dbex_rh_endchar(rpadbex_t *dbex, long rec)
822 rarray_t *records = dbex->records;
826 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
827 prec = rpa_dbex_record(dbex, rec);
829 rpa_dbex_debug_recordhead(dbex, rec);
830 rpa_dbex_debug_recordtail(dbex, rec);
831 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
833 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
834 prec = rpa_dbex_record(dbex, rec);
836 rpa_dbex_debug_recordhead(dbex, rec);
837 if (r_utf8_mbtowc(&wc, (const unsigned char*) prec->input, (const unsigned char*)prec->input + prec->inputsiz) < 0) {
841 dbex->co->currange.p2 = wc;
842 rpa_dbex_debug_recordtail(dbex, rec);
847 static int rpa_dbex_rh_branch(rpadbex_t *dbex, long rec)
849 rarray_t *records = dbex->records;
852 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
853 prec = rpa_dbex_record(dbex, rec);
855 rpa_dbex_debug_recordhead(dbex, rec);
856 if (prec->usertype & RPA_NONLOOP_PATH) {
857 rpa_compiler_nonloopybranch_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
859 rpa_compiler_branch_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
861 rpa_dbex_debug_recordtail(dbex, rec);
862 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
864 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
865 prec = rpa_dbex_record(dbex, rec);
867 rpa_dbex_debug_recordhead(dbex, rec);
868 if (prec->usertype & RPA_NONLOOP_PATH) {
869 rpa_compiler_nonloopybranch_end(dbex->co);
871 rpa_compiler_branch_end(dbex->co);
873 rpa_dbex_debug_recordtail(dbex, rec);
878 static void rpa_dbex_rh_loopref(rpadbex_t *dbex, rparecord_t *prec)
881 * We ignore, it doesn't make sense for loops:
884 rpa_compiler_exp_begin(dbex->co, (prec->usertype & RPA_MATCH_OPTIONAL));
885 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_CMP, R_LOO, DA, XX, 0));
886 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 3));
887 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
888 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
889 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_ADD, R_TOP, R_TOP, R_LOO, 0));
890 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, R_LOO, XX, 0));
891 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
892 rpa_compiler_exp_end(dbex->co);
896 static int rpa_dbex_rh_aref(rpadbex_t *dbex, long rec)
898 const char *name = NULL;
900 rpa_ruleinfo_t *info;
901 rarray_t *records = dbex->records;
904 rec = rpa_recordtree_get(records, rec, RPA_RECORD_START);
905 prec = rpa_dbex_record(dbex, rec);
907 rpa_dbex_debug_recordhead(dbex, rec);
908 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
913 if ((prec->usertype & RPA_LOOP_PATH) && rpa_parseinfo_loopdetect(dbex, rec, rpa_dbex_firstinlined(dbex))) {
914 info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
916 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVEDSYMBOL);
917 RPA_DBEX_SETERRINFO_NAME(dbex, name, namesize);
920 if (rpa_dbex_findinlined(dbex, info->startrec)) {
921 rpa_dbex_rh_loopref(dbex, prec);
923 if (prec->usertype & RPA_MATCH_OPTIONAL) {
925 * Most probably this is useless case - loop refs shouldn't have quantitative modifiers
926 * but in case they do we wrap the inlined production rule in quantitative expression.
927 * The inlined named rule can take the quantitative argument, but I just don't have
928 * a clean way to pass it from here - so, lets play the records inside an expression that
929 * has the right quantitative argument.
930 * We ignore, it doesn't make sense for loops:
933 rpa_compiler_exp_begin(dbex->co, RPA_MATCH_OPTIONAL);
934 rpa_dbex_playrecord(dbex, info->startrec);
935 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
936 rpa_compiler_exp_end(dbex->co);
938 rpa_dbex_playrecord(dbex, info->startrec);
942 rpa_compiler_reference(dbex->co, name, namesize, (prec->usertype & RPA_MATCH_MASK));
944 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
945 rpa_dbex_debug_recordtail(dbex, rec);
946 if (rpa_dbex_playchildrecords(dbex, rec) < 0)
948 rec = rpa_recordtree_get(records, rec, RPA_RECORD_END);
949 prec = rpa_dbex_record(dbex, rec);
951 rpa_dbex_debug_recordhead(dbex, rec);
952 rpa_dbex_debug_recordtail(dbex, rec);
957 rpadbex_t *rpa_dbex_create(void)
959 rpadbex_t *dbex = (rpadbex_t *) r_zmalloc(sizeof(*dbex));
961 dbex->co = rpa_compiler_create();
962 dbex->pa = rpa_parser_create();
963 dbex->text = r_array_create(sizeof(char *));
964 dbex->records = r_array_create(sizeof(rparecord_t));
965 dbex->temprecords = r_array_create(sizeof(rparecord_t));
966 dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
967 dbex->recstack = r_array_create(sizeof(unsigned long));
968 dbex->inlinestack = r_array_create(sizeof(unsigned long));
969 dbex->handlers = r_zmalloc(sizeof(rpa_dbex_recordhandler) * RPA_PRODUCTION_COUNT);
970 rpa_dbex_cfgset(dbex, RPA_DBEXCFG_OPTIMIZATIONS, 1);
972 dbex->handlers[RPA_PRODUCTION_NONE] = rpa_dbex_rh_default;
973 dbex->handlers[RPA_PRODUCTION_NAMEDRULE] = rpa_dbex_rh_namedrule;
974 dbex->handlers[RPA_PRODUCTION_ANONYMOUSRULE] = rpa_dbex_rh_anonymousrule;
975 dbex->handlers[RPA_PRODUCTION_CLS] = rpa_dbex_rh_cls;
976 dbex->handlers[RPA_PRODUCTION_CHAR] = rpa_dbex_rh_char;
977 dbex->handlers[RPA_PRODUCTION_SPECIALCHAR] = rpa_dbex_rh_specialchar;
978 dbex->handlers[RPA_PRODUCTION_CLSCHAR] = rpa_dbex_rh_clschar;
979 dbex->handlers[RPA_PRODUCTION_AREF] = rpa_dbex_rh_aref;
980 dbex->handlers[RPA_PRODUCTION_CREF] = rpa_dbex_rh_aref;
981 dbex->handlers[RPA_PRODUCTION_BRACKETEXP] = rpa_dbex_rh_exp;
982 dbex->handlers[RPA_PRODUCTION_OROP] = rpa_dbex_rh_orop;
983 dbex->handlers[RPA_PRODUCTION_NOTOP] = rpa_dbex_rh_notop;
984 dbex->handlers[RPA_PRODUCTION_ALTBRANCH] = rpa_dbex_rh_branch;
985 dbex->handlers[RPA_PRODUCTION_NEGBRANCH] = rpa_dbex_rh_branch;
986 dbex->handlers[RPA_PRODUCTION_CHARRNG] = rpa_dbex_rh_range;
987 dbex->handlers[RPA_PRODUCTION_NUMRNG] = rpa_dbex_rh_numrange;
988 dbex->handlers[RPA_PRODUCTION_CLSNUM] = rpa_dbex_rh_clsnum;
989 dbex->handlers[RPA_PRODUCTION_BEGINCHAR] = rpa_dbex_rh_beginchar;
990 dbex->handlers[RPA_PRODUCTION_ENDCHAR] = rpa_dbex_rh_endchar;
991 dbex->handlers[RPA_PRODUCTION_NOROP] = rpa_dbex_rh_norop;
992 dbex->handlers[RPA_PRODUCTION_REQOP] = rpa_dbex_rh_exp;
993 dbex->handlers[RPA_PRODUCTION_MINOP] = rpa_dbex_rh_minexp;
994 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMIT] = rpa_dbex_rh_emit;
995 dbex->handlers[RPA_PRODUCTION_DIRECTIVEABORT] = rpa_dbex_rh_abort;
996 dbex->handlers[RPA_PRODUCTION_DIRECTIVENOEMIT] = rpa_dbex_rh_noemit;
997 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITALL] = rpa_dbex_rh_emitall;
998 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITNONE] = rpa_dbex_rh_emitnone;
999 dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITID] = rpa_dbex_rh_uid;
1005 void rpa_dbex_destroy(rpadbex_t *dbex)
1009 for (i = 0; i < r_array_length(dbex->text); i++)
1010 r_free(r_array_index(dbex->text, i, char*));
1011 rpa_compiler_destroy(dbex->co);
1012 rpa_parser_destroy(dbex->pa);
1013 r_harray_destroy(dbex->rules);
1014 r_array_destroy(dbex->records);
1015 r_array_destroy(dbex->temprecords);
1016 r_array_destroy(dbex->recstack);
1017 r_array_destroy(dbex->inlinestack);
1018 r_array_destroy(dbex->text);
1019 r_free(dbex->handlers);
1025 static int rpa_parseinfo_loopdetect_do(rpadbex_t *dbex, long parent, long loopto, int inderction)
1033 if (parent == loopto && inderction > 0)
1035 for (i = 0; i < r_array_length(dbex->recstack); i++) {
1036 if (parent == r_array_index(dbex->recstack, i, long))
1039 r_array_add(dbex->recstack, &parent);
1041 if (!(prec = (rparecord_t *)r_array_slot(dbex->records, parent)))
1043 if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)
1046 i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START);
1047 for (; i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
1048 prec = (rparecord_t *)r_array_slot(dbex->records, i);
1049 if (prec->ruleuid == RPA_PRODUCTION_RULENAME)
1051 if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF) {
1052 rpa_ruleinfo_t *info;
1053 if ((inderction > 0 || i != parent) && i == loopto) {
1055 * We found what we are looking for
1060 if (rpa_dbex_rulename(dbex, i, &name, &namesiz) < 0)
1062 info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesiz));
1065 if ((ret = rpa_parseinfo_loopdetect_do(dbex, info->startrec, loopto, inderction + 1)) > 0)
1068 if ((ret = rpa_parseinfo_loopdetect_do(dbex, i, loopto, inderction + 1)) > 0)
1072 if ((prec->usertype & RPA_MATCH_OPTIONAL) == 0 && (prec->ruleuid == RPA_PRODUCTION_CREF || prec->ruleuid == RPA_PRODUCTION_AREF ||
1073 prec->ruleuid == RPA_PRODUCTION_CHAR || prec->ruleuid == RPA_PRODUCTION_CLS || prec->ruleuid == RPA_PRODUCTION_SPECIALCHAR))
1078 r_array_removelast(dbex->recstack);
1083 static int rpa_parseinfo_loopdetect(rpadbex_t *dbex, long parent, long loopto)
1085 if (parent != loopto) {
1087 * Make sure we are dealing with a loop first
1089 if (!rpa_parseinfo_loopdetect_do(dbex, loopto, parent, 0))
1093 return (rpa_parseinfo_loopdetect_do(dbex, parent, loopto, 0)) ? 1 : 0;
1097 static void rpa_parseinfo_marklooppath(rpadbex_t *dbex, long parent)
1101 if (rpa_parseinfo_loopdetect(dbex, parent, parent) > 0) {
1102 rpa_record_setusertype(dbex->records, parent, RPA_LOOP_PATH, RVALSET_OR);
1103 for (i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
1104 rpa_parseinfo_marklooppath(dbex, i);
1110 static int rpa_parseinfo_rule_checkforloop(rpadbex_t *dbex, const char *name, rsize_t namesize, long loopto)
1112 rpa_ruleinfo_t *info = info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
1116 return rpa_parseinfo_loopdetect(dbex, info->startrec, loopto);
1120 static void rpa_dbex_buildloopinfo(rpadbex_t *dbex)
1123 rharray_t *rules = dbex->rules;
1124 rpa_ruleinfo_t *info;
1126 for (i = 0; i < r_array_length(rules->members); i++) {
1127 if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, i)) != NULL)
1128 rpa_parseinfo_marklooppath(dbex, info->startrec);
1132 * Mark the non-loop branches.
1134 for (i = 0; i < r_array_length(dbex->records); i++) {
1135 rparecord_t *prec = (rparecord_t *)r_array_slot(dbex->records, i);
1136 if (prec->type == RPA_RECORD_START &&
1137 (prec->ruleuid == RPA_PRODUCTION_ALTBRANCH) &&
1138 (prec->usertype & RPA_LOOP_PATH) == 0) {
1139 p = rpa_recordtree_parent(dbex->records, i, RPA_RECORD_START);
1141 prec = (rparecord_t *)r_array_slot(dbex->records, p);
1142 if (prec && (prec->usertype & RPA_LOOP_PATH))
1143 rpa_record_setusertype(dbex->records, i, RPA_NONLOOP_PATH, RVALSET_OR);
1150 static void rpa_dbex_buildruleinfo(rpadbex_t *dbex)
1153 rpa_ruleinfo_t info;
1154 unsigned int nrecords;
1156 const char *name = NULL;
1157 rsize_t namesize = 0;
1160 r_object_destroy((robject_t *)dbex->rules);
1163 dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
1165 for (i = 0, nrecords = r_array_length(dbex->records); i < nrecords; i++) {
1166 if (!(rec = rpa_dbex_record(dbex, i)))
1168 if ((rec->ruleuid == RPA_PRODUCTION_NAMEDRULE) && (rec->type & RPA_RECORD_START)) {
1169 r_memset(&info, 0, sizeof(info));
1170 info.type = RPA_RULEINFO_NAMEDRULE;
1172 info.sizerecs = rpa_recordtree_size(dbex->records, i);
1173 if (info.sizerecs < 0)
1175 if (rpa_dbex_rulename(dbex, i, &name, &namesize) < 0) {
1178 r_harray_add(dbex->rules, name, namesize, &info);
1179 i += info.sizerecs - 1;
1180 } else if ((rec->ruleuid == RPA_PRODUCTION_ANONYMOUSRULE) && (rec->type & RPA_RECORD_START)) {
1181 r_memset(&info, 0, sizeof(info));
1182 info.type = RPA_RULEINFO_ANONYMOUSRULE;
1184 info.sizerecs = rpa_recordtree_size(dbex->records, i);
1185 if (info.sizerecs < 0)
1187 if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
1188 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
1189 i += info.sizerecs - 1;
1190 } else if ((rec->type & RPA_RECORD_START) && (rec->ruleuid >= RPA_PRODUCTION_DIRECTIVEEMIT) && (rec->ruleuid <= RPA_PRODUCTION_DIRECTIVEEMITID)) {
1191 r_memset(&info, 0, sizeof(info));
1192 info.type = RPA_RULEINFO_DIRECTIVE;
1194 info.sizerecs = rpa_recordtree_size(dbex->records, i);
1195 if (info.sizerecs < 0)
1197 if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
1198 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
1199 i += info.sizerecs - 1;
1206 static long rpa_dbex_copy_handler(rarray_t *records, long rec, rpointer userdata)
1208 rpadbex_t *dbex = (rpadbex_t *)userdata;
1211 rparecord_t *prec = (rparecord_t *)r_array_slot(records, rec);
1212 if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & RPA_RECORD_START)) {
1216 } else if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & (RPA_RECORD_END))) {
1217 ruint32 usertype = RPA_MATCH_NONE;
1220 * Don't copy it but set the usertype of the previous record accordingly.
1222 switch (*prec->input) {
1224 usertype = RPA_MATCH_OPTIONAL;
1227 usertype = RPA_MATCH_MULTIPLE;
1230 usertype = RPA_MATCH_MULTIOPT;
1233 usertype = RPA_MATCH_NONE;
1235 lastrec = r_array_length(dbex->records) - 1;
1237 rpa_record_setusertype(dbex->records, lastrec, usertype, RVALSET_OR);
1238 } else if (prec->ruleuid) {
1239 index = r_array_add(dbex->records, prec);
1241 * Optimizations. Lets apply the optimizations while we copy the records.
1242 * This is probably not the most clean way to apply optimizations. In the future
1243 * we should probably think of optimization pass right before compiling.
1245 if (dbex->optimizations) {
1246 if (prec->ruleuid == RPA_PRODUCTION_OROP && (prec->type & RPA_RECORD_END)) {
1247 rpa_optimiztion_orop(dbex->records, rpa_recordtree_get(dbex->records, index, RPA_RECORD_START));
1256 static void rpa_dbex_copyrecords(rpadbex_t *dbex)
1259 rarray_t *records = dbex->temprecords;
1261 for (i = rpa_recordtree_get(records, 0, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(records, i, RPA_RECORD_START))
1262 rpa_recordtree_walk(records, i, 0, rpa_dbex_copy_handler, dbex);
1266 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, long rec)
1270 if (!dbex || !dbex->rules)
1272 if (rec < 0 || rec >= r_array_length(dbex->records))
1274 prec = (rparecord_t *)r_array_slot(dbex->records, rec);
1280 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid)
1283 rpa_ruleinfo_t *info;
1286 if (!dbex || !dbex->rules)
1288 info = r_harray_get(dbex->rules, rid);
1291 rec = info->startrec + info->sizerecs - 1;
1292 if (rec < 0 || rec >= r_array_length(dbex->records))
1294 prec = (rparecord_t *)r_array_slot(dbex->records, rec);
1299 static int rpa_dbex_rulename(rpadbex_t *dbex, long rec, const char **name, rsize_t *namesize)
1301 rparecord_t *pnamerec = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_START), RPA_RECORD_END));
1302 if (!pnamerec || !(pnamerec->ruleuid & RPA_PRODUCTION_RULENAME))
1304 *name = pnamerec->input;
1305 *namesize = pnamerec->inputsiz;
1310 int rpa_dbex_open(rpadbex_t *dbex)
1315 r_object_destroy((robject_t *)dbex->rules);
1323 void rpa_dbex_close(rpadbex_t *dbex)
1327 rpa_dbex_buildruleinfo(dbex);
1328 rpa_dbex_buildloopinfo(dbex);
1329 rpa_dbex_buildbitmapinfo(dbex);
1333 long rpa_dbex_load(rpadbex_t *dbex, const char *rules, rsize_t size)
1344 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTOPEN);
1348 text = r_strndup(rules, size);
1350 r_array_add(dbex->text, &text);
1351 r_array_setlength(dbex->temprecords, 0);
1352 if ((ret = rpa_parser_load(dbex->pa, text, size, dbex->temprecords)) < 0) {
1360 for (line = 1; ptext >= text; --ptext) {
1364 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAXERROR);
1365 RPA_DBEX_SETERRINFO_OFFSET(dbex, ret);
1366 RPA_DBEX_SETERRINFO_LINE(dbex, line);
1369 rpa_dbex_copyrecords(dbex);
1374 long rpa_dbex_load_s(rpadbex_t *dbex, const char *rules)
1376 return rpa_dbex_load(dbex, rules, r_strlen(rules));
1380 void rpa_dbex_dumpindented(rpadbex_t *dbex, long rec, int level, const char *rulelabel)
1384 rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1388 r_memset(buffer, 0, sizeof(buffer));
1389 for (i = 0; i < level + 1; i++)
1392 r_printf("%s, %c, %c", rulelabel, rpa_record_optchar(prec, 'x'), rpa_record_loopchar(prec, 'x'));
1394 size = R_MIN(prec->inputsiz, sizeof(buffer) - 1);
1395 r_strncpy(buffer, prec->input, size);
1397 if (size == (sizeof(buffer) - 1))
1398 r_printf(" %s ...\n", buffer);
1400 r_printf(" %s\n", buffer);
1405 static long rpa_dbex_firstinlined(rpadbex_t *dbex)
1407 long ret = r_array_empty(dbex->inlinestack) ? -1 : r_array_index(dbex->inlinestack, 0, long);
1412 static int rpa_dbex_findinlined(rpadbex_t *dbex, long startrec)
1415 for (i = 0; i < r_array_length(dbex->inlinestack); i++) {
1416 if (r_array_index(dbex->inlinestack, i, long) == startrec)
1423 static void rpa_dbex_dumptree_do(rpadbex_t *dbex, long rec, int level)
1425 rparecord_t *prec = rpa_dbex_record(dbex, rec);
1426 if (prec && prec->ruleuid == RPA_PRODUCTION_RULENAME)
1428 if (prec && (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)) {
1429 const char *name = NULL;
1430 rsize_t namesize = 0;
1432 rpa_ruleinfo_t *info;
1434 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) >= 0) {
1435 loop = rpa_parseinfo_rule_checkforloop(dbex, name, namesize, rpa_dbex_firstinlined(dbex));
1436 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
1438 if (!rpa_dbex_findinlined(dbex, info->startrec)) {
1440 * Temporary set the quantitative flags for the inlined rule to the parent
1441 * reference, so they are printed correctly. After the printing is done
1442 * restore the original flags.
1444 rparecord_t *prulestart = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_START));
1445 rparecord_t *pruleend = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1446 unsigned long optional = (prulestart->usertype & RPA_MATCH_OPTIONAL);
1447 prulestart->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1448 pruleend->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1449 r_array_add(dbex->inlinestack, &info->startrec);
1450 rpa_dbex_dumptree_do(dbex, info->startrec, level);
1451 r_array_removelast(dbex->inlinestack);
1453 prulestart->usertype &= ~RPA_MATCH_OPTIONAL;
1454 pruleend->usertype &= ~RPA_MATCH_OPTIONAL;
1457 rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, "loopref");
1463 rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, prec->rule);
1464 for (rec = rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_START); rec >= 0; rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START)) {
1465 rpa_dbex_dumptree_do(dbex, rec, level + 1);
1470 int rpa_dbex_dumptree(rpadbex_t *dbex, rparule_t rid)
1472 rpa_ruleinfo_t *info;
1477 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_PARAM);
1481 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1484 if (!(info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid))) {
1485 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1488 r_array_add(dbex->inlinestack, &info->startrec);
1489 rpa_dbex_dumptree_do(dbex, info->startrec, 0);
1490 r_array_removelast(dbex->inlinestack);
1495 int rpa_dbex_dumpproductions(rpadbex_t *dbex)
1504 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1507 for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1508 ret = rpa_dbex_strncpy(dbex, buffer, rid, sizeof(buffer));
1510 if (ret == sizeof(buffer))
1511 r_printf(" %s ...\n", buffer);
1513 r_printf(" %s\n", buffer);
1521 int rpa_dbex_dumprecords(rpadbex_t *dbex)
1528 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1531 for (i = 0; i < r_array_length(dbex->records); i++) {
1532 rpa_record_dump(dbex->records, i);
1538 int rpa_dbex_dumpinfo(rpadbex_t *dbex)
1541 rpa_ruleinfo_t *info;
1546 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1549 for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1550 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
1551 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1552 switch (info->type) {
1553 case RPA_RULEINFO_NAMEDRULE:
1556 case RPA_RULEINFO_ANONYMOUSRULE:
1559 case RPA_RULEINFO_DIRECTIVE:
1566 r_printf("(%7d, %4d, code: %7ld, %5ld) : %s\n", info->startrec, info->sizerecs, info->codeoff, info->codesiz, name->str);
1572 int rpa_dbex_dumpuids(rpadbex_t *dbex)
1576 rpa_ruleinfo_t *info;
1577 char *buffer = r_zmalloc(32 * sizeof(char));
1582 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1585 for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1586 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1587 if (info->type == RPA_RULEINFO_DIRECTIVE) {
1588 rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1589 if (prec->ruleuid == RPA_PRODUCTION_DIRECTIVEEMITID && prec->inputsiz) {
1590 rec = rpa_recordtree_firstchild(dbex->records, info->startrec, RPA_RECORD_START);
1592 prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1593 if (prec->ruleuid == RPA_PRODUCTION_ALIASNAME) {
1595 if (rpa_record2long(rpa_dbex_record(dbex, rpa_recordtree_next(dbex->records, rec, RPA_RECORD_END)), &dec) < 0)
1597 buffer = r_realloc(buffer, prec->inputsiz + 1);
1598 r_memset(buffer, 0, prec->inputsiz + 1);
1599 r_memcpy(buffer, prec->input, prec->inputsiz);
1600 r_printf("#define %s %d\n", buffer, dec);
1603 rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START);
1613 int rpa_dbex_dumpcode(rpadbex_t* dbex, rparule_t rid)
1615 rpa_ruleinfo_t *info;
1619 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_PARAM);
1623 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1626 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1629 rvm_asm_dump(rvm_codegen_getcode(dbex->co->cg, info->codeoff), info->codesiz);
1634 rsize_t rpa_dbex_strlen(rpadbex_t *dbex, rparule_t rid)
1641 if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1642 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1645 size = prec->inputsiz;
1650 rsize_t rpa_dbex_strncpy(rpadbex_t *dbex, char *dst, rparule_t rid, rsize_t n)
1657 if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1658 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1661 size = prec->inputsiz;
1664 r_memset(dst, 0, n);
1665 r_strncpy(dst, prec->input, size);
1670 const char *rpa_dbex_name(rpadbex_t *dbex, rparule_t rid)
1677 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1680 if (rid >= r_array_length(dbex->rules->members))
1682 name = r_array_index(dbex->rules->names, rid, rstr_t*);
1687 rparule_t rpa_dbex_first(rpadbex_t *dbex)
1692 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1696 if (r_array_length(dbex->rules->members) <= 0) {
1697 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1704 rparule_t rpa_dbex_last(rpadbex_t *dbex)
1709 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1713 if (r_array_length(dbex->rules->members) <= 0) {
1714 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1717 return r_array_length(dbex->rules->members) - 1;
1721 rparule_t rpa_dbex_lookup(rpadbex_t *dbex, const char *name, rsize_t namesize)
1729 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1733 ret = (rparule_t) r_harray_taillookup(dbex->rules, name, namesize);
1735 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1741 rparule_t rpa_dbex_lookup_s(rpadbex_t *dbex, const char *name)
1743 return rpa_dbex_lookup(dbex, name, r_strlen(name));
1747 rparule_t rpa_dbex_next(rpadbex_t *dbex, rparule_t rid)
1752 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1757 if (rid < r_array_length(dbex->rules->members))
1763 rparule_t rpa_dbex_prev(rpadbex_t *dbex, rparule_t rid)
1768 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1778 long rpa_dbex_lasterror(rpadbex_t *dbex)
1782 return dbex->err.code;
1786 long rpa_dbex_lasterrorinfo(rpadbex_t *dbex, rpa_errinfo_t *errinfo)
1788 if (!dbex || !errinfo)
1790 r_memcpy(errinfo, &dbex->err, sizeof(rpa_errinfo_t));
1795 const char *rpa_dbex_version()
1801 static int rpa_dbex_compile_rule(rpadbex_t *dbex, rparule_t rid)
1804 rpa_ruleinfo_t *info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1808 codeoff = rvm_codegen_getcodesize(dbex->co->cg);
1810 * Set the rid in the rulepref, so the compiler associates this rule
1811 * with the correct rid.
1813 rpa_compiler_rulepref_set_ruleid_s(dbex->co, rpa_dbex_name(dbex, rid), rid);
1814 if (rpa_dbex_playrecord(dbex, info->startrec) < 0)
1816 info->codeoff = codeoff;
1817 info->codesiz = rvm_codegen_getcodesize(dbex->co->cg) - codeoff;
1822 int rpa_dbex_compile(rpadbex_t *dbex)
1825 rvm_codelabel_t *labelerr;
1830 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1834 * By default all production rules emit
1837 rpa_compiler_destroy(dbex->co);
1838 dbex->co = rpa_compiler_create();
1839 rpa_dbex_setemit(dbex, TRUE);
1841 for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1842 if (rpa_dbex_compile_rule(dbex, rid) < 0) {
1843 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_COMPILE);
1848 if (rvm_codegen_relocate(dbex->co->cg, &labelerr) < 0) {
1849 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVEDSYMBOL);
1850 RPA_DBEX_SETERRINFO_NAME(dbex, labelerr->name->str, labelerr->name->size);
1858 rvm_asmins_t *rpa_dbex_executable(rpadbex_t *dbex)
1863 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1866 if (!dbex->compiled || rvm_codegen_getcodesize(dbex->co->cg) == 0) {
1867 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCOMPILED);
1870 return rvm_codegen_getcode(dbex->co->cg, 0);
1874 long rpa_dbex_executableoffset(rpadbex_t *dbex, rparule_t rid)
1876 rpa_ruleinfo_t *info;
1881 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1884 if (!dbex->compiled) {
1885 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCOMPILED);
1888 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1890 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1893 return info->codeoff;
1897 long rpa_dbex_cfgset(rpadbex_t *dbex, unsigned long cfg, unsigned long val)
1901 if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1902 dbex->optimizations = val;
1904 } else if(cfg == RPA_DBEXCFG_DEBUG) {
1912 long rpa_dbex_cfgget(rpadbex_t *dbex, unsigned long cfg)
1916 if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1917 return dbex->optimizations;
1918 } else if(cfg == RPA_DBEXCFG_DEBUG) {