RPA Toolkit
added --dump-alias. Work on the RJS.
[rpatk.git] / rpa2 / rpadbex.c
1 #include "rpacompiler.h"
2 #include "rpadbex.h"
3 #include "rpaerror.h"
4 #include "rpaparser.h"
5 #include "rpaoptimization.h"
6 #include "rmem.h"
7 #include "rutf.h"
8
9 typedef rint (*rpa_dbex_recordhandler)(rpadbex_t *dbex, rlong rec);
10
11 #define RPA_RULEINFO_NONE 0
12 #define RPA_RULEINFO_NAMEDRULE 1
13 #define RPA_RULEINFO_ANONYMOUSRULE 2
14 #define RPA_RULEINFO_DIRECTIVE 3
15
16 #define RPA_DBEX_SETERRINFO_CODE(__d__, __e__) do { (__d__)->err.code = __e__; (__d__)->err.mask |= RPA_ERRINFO_CODE; } while (0)
17 #define RPA_DBEX_SETERRINFO_OFFSET(__d__, __o__) do { (__d__)->err.offset = __o__; (__d__)->err.mask |= RPA_ERRINFO_OFFSET; } while (0)
18 #define RPA_DBEX_SETERRINFO_RULEID(__d__, __r__) do { (__d__)->err.ruleid = __r__; (__d__)->err.mask |= RPA_ERRINFO_RULEID; } while (0)
19 #define RPA_DBEX_SETERRINFO_NAME(__d__, __n__, __s__) do { \
20         (__d__)->err.mask |= RPA_ERRINFO_NAME; \
21         r_memset((__d__)->err.name, 0, sizeof((__d__)->err.name)); \
22         r_strncpy((__d__)->err.name, __n__, R_MIN(__s__, (sizeof((__d__)->err.name) - 1)));  } while (0)
23
24
25 typedef struct rpa_ruleinfo_s {
26         rlong startrec;
27         rlong sizerecs;
28         rlong codeoff;
29         rlong codesiz;
30         rulong type;
31 } rpa_ruleinfo_t;
32
33
34 struct rpadbex_s {
35         rpa_compiler_t *co;
36         rpa_parser_t *pa;
37         rarray_t *records;
38         rharray_t *rules;
39         rarray_t *recstack;
40         rarray_t *inlinestack;
41         rpa_dbex_recordhandler *handlers;
42         rpa_errinfo_t err;
43         rulong optimizations:1;
44 };
45
46 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid);
47 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, rlong rec);
48 static rint rpa_dbex_rulename(rpadbex_t *dbex, rlong rec, const rchar **name, rsize_t *namesize);
49 static rint rpa_parseinfo_loopdetect(rpadbex_t *dbex, rlong parent, rlong loopto);
50 static rlong rpa_dbex_play_recordhandler(rpadbex_t *dbex, rlong rec);
51 static rlong rpa_dbex_play_recordhandlers(rpadbex_t *dbex, rlong rec, rlong nrecs);
52 static rlong rpa_dbex_firstinlined(rpadbex_t *dbex);
53 static rint rpa_dbex_findinlined(rpadbex_t *dbex, rlong startrec);
54
55
56 static rlong rpa_dbex_getmatchchr(rulong matchtype)
57 {
58         switch (matchtype & RPA_MATCH_MASK) {
59         default:
60         case RPA_MATCH_NONE:
61                 return RPA_MATCHCHR_NAN;
62                 break;
63         case RPA_MATCH_MULTIPLE:
64                 return RPA_MATCHCHR_MUL;
65                 break;
66         case RPA_MATCH_OPTIONAL:
67                 return RPA_MATCHCHR_OPT;
68                 break;
69         case RPA_MATCH_MULTIOPT:
70                 return RPA_MATCHCHR_MOP;
71                 break;
72         };
73         return RPA_MATCHCHR_NAN;
74 }
75
76
77 static rlong rpa_dbex_getmatchspecialchr(rulong matchtype)
78 {
79         switch (matchtype & RPA_MATCH_MASK) {
80         default:
81         case RPA_MATCH_NONE:
82                 return RPA_MATCHSPCHR_NAN;
83                 break;
84         case RPA_MATCH_MULTIPLE:
85                 return RPA_MATCHSPCHR_MUL;
86                 break;
87         case RPA_MATCH_OPTIONAL:
88                 return RPA_MATCHSPCHR_OPT;
89                 break;
90         case RPA_MATCH_MULTIOPT:
91                 return RPA_MATCHSPCHR_MOP;
92                 break;
93         };
94         return RPA_MATCHSPCHR_NAN;
95 }
96
97
98 static rint rpa_record2long(rparecord_t *prec, ruint32 *num)
99 {
100         rchar *endptr = NULL;
101         rchar buffer[64];
102
103         if (!prec || !num || prec->inputsiz == 0 || prec->inputsiz >= sizeof(buffer))
104                 return -1;
105         r_memset(buffer, 0, sizeof(buffer));
106         r_memcpy(buffer, prec->input, prec->inputsiz);
107         if (prec->ruleuid == RPA_PRODUCTION_HEX) {
108                 *num = (ruint32)r_strtoul(prec->input, &endptr, 16);
109         } else if (prec->ruleuid == RPA_PRODUCTION_DEC) {
110                 *num = (ruint32)r_strtoul(prec->input, &endptr, 10);
111         } else {
112                 return -1;
113         }
114         return 0;
115 }
116
117
118 static rint rpa_dbex_rh_uid(rpadbex_t *dbex, rlong rec)
119 {
120         const rchar *name = NULL;
121         rsize_t namesize;
122         ruint32 uid = 0;
123         rparecord_t *pnumrec;
124         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
125
126         if (prec->type & RPA_RECORD_START) {
127                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
128                         RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
129                         return -1;
130                 }
131                 pnumrec = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
132                 if (!pnumrec) {
133                         RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
134                         return -1;
135                 }
136                 if (rpa_record2long(pnumrec, &uid) < 0) {
137                         RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
138                         return -1;
139                 }
140                 rpa_compiler_rulepref_set_ruleuid(dbex->co, name, namesize, uid);
141         } else if (prec->type & RPA_RECORD_END) {
142
143         }
144         return 0;
145 }
146
147
148 static rint rpa_dbex_rh_emit(rpadbex_t *dbex, rlong rec)
149 {
150         const rchar *name = NULL;
151         rsize_t namesize;
152         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
153
154         if (prec->type & RPA_RECORD_START) {
155                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
156                         return -1;
157                 }
158                 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
159         } else if (prec->type & RPA_RECORD_END) {
160
161         }
162         return 0;
163 }
164
165
166 static rint rpa_dbex_rh_noemit(rpadbex_t *dbex, rlong rec)
167 {
168         const rchar *name = NULL;
169         rsize_t namesize;
170         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
171
172         if (prec->type & RPA_RECORD_START) {
173                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
174                         return -1;
175                 }
176                 rpa_compiler_rulepref_clear_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
177         } else if (prec->type & RPA_RECORD_END) {
178
179         }
180         return 0;
181 }
182
183
184 static rint rpa_dbex_setemit(rpadbex_t *dbex, rboolean emit)
185 {
186         rlong i;
187         rpa_ruleinfo_t *info;
188
189         for (i = 0; i < r_array_length(dbex->rules->names); i++) {
190                 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
191                 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
192                 if (info->type == RPA_RULEINFO_NAMEDRULE) {
193                         if (emit) {
194                                 rpa_compiler_rulepref_set_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
195                         } else {
196                                 rpa_compiler_rulepref_clear_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
197                         }
198                 }
199         }
200         return 0;
201 }
202
203
204 static rint rpa_dbex_rh_emitall(rpadbex_t *dbex, rlong rec)
205 {
206         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
207
208         if (prec->type & RPA_RECORD_START) {
209                 rpa_dbex_setemit(dbex, TRUE);
210         } else if (prec->type & RPA_RECORD_END) {
211
212         }
213         return 0;
214 }
215
216
217 static rint rpa_dbex_rh_emitnone(rpadbex_t *dbex, rlong rec)
218 {
219         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
220
221         if (prec->type & RPA_RECORD_START) {
222                 rpa_dbex_setemit(dbex, FALSE);
223         } else if (prec->type & RPA_RECORD_END) {
224
225         }
226         return 0;
227 }
228
229
230 static rint rpa_dbex_rh_namedrule(rpadbex_t *dbex, rlong rec)
231 {
232         const rchar *name = NULL;
233         rsize_t namesize;
234         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
235
236         if (prec->type & RPA_RECORD_START) {
237                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
238
239                         return -1;
240                 }
241                 if (!r_array_empty(dbex->inlinestack)) {
242                         rpa_compiler_inlinerule_begin(dbex->co, name, namesize, 0);
243                 } else {
244                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITHEAD, XX, XX, XX, 0));
245                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
246                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BL, DA, XX, XX, 3));
247                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
248                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
249
250                         if ((prec->usertype & RPA_LOOP_PATH)) {
251                                 rpa_compiler_loop_begin(dbex->co, name, namesize);
252                         } else {
253                                 rpa_compiler_rule_begin(dbex->co, name, namesize);
254                         }
255                 }
256                 r_array_add(dbex->inlinestack, &rec);
257         } else if (prec->type & RPA_RECORD_END) {
258                 r_array_removelast(dbex->inlinestack);
259
260                 if (!r_array_empty(dbex->inlinestack)) {
261                         rpa_compiler_inlinerule_end(dbex->co);
262                 } else {
263                         if ((prec->usertype & RPA_LOOP_PATH)) {
264                                 rpa_compiler_loop_end(dbex->co);
265                         } else {
266                                 rpa_compiler_rule_end(dbex->co);
267                         }
268                 }
269         }
270         return 0;
271 }
272
273
274 static rint rpa_dbex_rh_anonymousrule(rpadbex_t *dbex, rlong rec)
275 {
276         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
277
278         if (prec->type & RPA_RECORD_START) {
279                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITHEAD, XX, XX, XX, 0));
280                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
281                 rpa_compiler_exp_begin(dbex->co, RPA_MATCH_NONE);
282
283         } else if (prec->type & RPA_RECORD_END) {
284                 rpa_compiler_exp_end(dbex->co);
285                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
286                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
287         }
288
289         return 0;
290 }
291
292
293 static rint rpa_dbex_rh_char(rpadbex_t *dbex, rlong rec)
294 {
295         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
296
297         if (prec->type & RPA_RECORD_END) {
298                 ruint32 wc = 0;
299                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
300
301                         return -1;
302                 }
303                 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
304                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
305         }
306
307         return 0;
308 }
309
310
311 static rint rpa_dbex_rh_specialchar(rpadbex_t *dbex, rlong rec)
312 {
313         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
314
315         if (prec->type & RPA_RECORD_END) {
316                 ruint32 wc = 0;
317                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
318
319                         return -1;
320                 }
321                 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchspecialchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
322                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
323         }
324
325         return 0;
326 }
327
328
329 static rint rpa_dbex_rh_cls(rpadbex_t *dbex, rlong rec)
330 {
331         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
332
333         if (prec->type & RPA_RECORD_START) {
334                 rpa_compiler_class_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
335
336         } else if (prec->type & RPA_RECORD_END) {
337                 rpa_compiler_class_end(dbex->co);
338                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
339         }
340
341         return 0;
342 }
343
344
345 static rint rpa_dbex_rh_clschar(rpadbex_t *dbex, rlong rec)
346 {
347         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
348
349         if (prec->type & RPA_RECORD_END) {
350                 ruint32 wc = 0;
351                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
352
353                         return -1;
354                 }
355                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, wc));
356                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
357         }
358
359         return 0;
360 }
361
362
363 static rint rpa_dbex_rh_exp(rpadbex_t *dbex, rlong rec)
364 {
365         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
366
367         if (prec->type & RPA_RECORD_START) {
368                 rpa_compiler_exp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
369
370         } else if (prec->type & RPA_RECORD_END) {
371                 rpa_compiler_exp_end(dbex->co);
372                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
373         }
374
375         return 0;
376 }
377
378
379 static rint rpa_dbex_rh_orop(rpadbex_t *dbex, rlong rec)
380 {
381         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
382
383         if (prec->type & RPA_RECORD_START) {
384                 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
385
386         } else if (prec->type & RPA_RECORD_END) {
387                 rpa_compiler_altexp_end(dbex->co);
388                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
389         }
390
391         return 0;
392 }
393
394
395 static rint rpa_dbex_rh_norop(rpadbex_t *dbex, rlong rec)
396 {
397         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
398
399         if (prec->type & RPA_RECORD_START) {
400                 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
401
402         } else if (prec->type & RPA_RECORD_END) {
403                 rpa_compiler_altexp_end(dbex->co);
404                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
405         }
406
407         return 0;
408 }
409
410
411 static rint rpa_dbex_rh_notop(rpadbex_t *dbex, rlong rec)
412 {
413         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
414
415         if (prec->type & RPA_RECORD_START) {
416                 rpa_compiler_notexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
417
418         } else if (prec->type & RPA_RECORD_END) {
419                 rpa_compiler_notexp_end(dbex->co);
420                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
421         }
422
423         return 0;
424 }
425
426
427 static rint rpa_dbex_rh_range(rpadbex_t *dbex, rlong rec)
428 {
429         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
430
431         if (prec->type & RPA_RECORD_START) {
432                 dbex->co->currange.p1 = 0;
433                 dbex->co->currange.p2 = 0;
434         } else if (prec->type & RPA_RECORD_END) {
435                 if (dbex->co->currange.p1 < dbex->co->currange.p2)
436                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
437                 else
438                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
439                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
440         }
441
442         return 0;
443 }
444
445
446 static rint rpa_dbex_rh_numrange(rpadbex_t *dbex, rlong rec)
447 {
448         rparecord_t *prec = (rparecord_t *) rpa_dbex_record(dbex, rec);
449
450         if (!prec)
451                 return -1;
452
453         if (prec->type & RPA_RECORD_START) {
454                 rparecord_t *child;
455                 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
456                 if (rpa_record2long(child, &dbex->co->currange.p1) < 0)
457                         return -1;
458                 child = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
459                 if (rpa_record2long(child, &dbex->co->currange.p2) < 0)
460                         return -1;
461         } else if (prec->type & RPA_RECORD_END) {
462                 if (dbex->co->currange.p1 < dbex->co->currange.p2)
463                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
464                 else
465                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
466                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
467         }
468
469         return 0;
470 }
471
472
473 static rint rpa_dbex_rh_clsnum(rpadbex_t *dbex, rlong rec)
474 {
475         rparecord_t *prec = (rparecord_t *) rpa_dbex_record(dbex, rec);
476
477         if (!prec)
478                 return -1;
479         if (prec->type & RPA_RECORD_START) {
480
481         } else if (prec->type & RPA_RECORD_END) {
482                 ruint32 wc;
483                 rparecord_t *child;
484                 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
485                 if (rpa_record2long(child, &wc) < 0)
486                         return -1;
487                 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
488                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
489         }
490
491         return 0;
492 }
493
494
495 static rint rpa_dbex_rh_beginchar(rpadbex_t *dbex, rlong rec)
496 {
497         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
498
499         if (prec->type & RPA_RECORD_START) {
500
501         } else if (prec->type & RPA_RECORD_END) {
502                 ruint32 wc = 0;
503                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
504
505                         return -1;
506                 }
507                 dbex->co->currange.p1 = wc;
508         }
509
510         return 0;
511 }
512
513
514 static rint rpa_dbex_rh_endchar(rpadbex_t *dbex, rlong rec)
515 {
516         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
517
518         if (prec->type & RPA_RECORD_START) {
519
520         } else if (prec->type & RPA_RECORD_END) {
521                 ruint32 wc = 0;
522                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
523
524                         return -1;
525                 }
526                 dbex->co->currange.p2 = wc;
527         }
528
529         return 0;
530 }
531
532
533 static rint rpa_dbex_rh_branch(rpadbex_t *dbex, rlong rec)
534 {
535         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
536
537         if (prec->type & RPA_RECORD_START) {
538                 if (prec->usertype & RPA_NONLOOP_PATH) {
539                         rpa_compiler_nonloopybranch_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
540                 } else {
541                         rpa_compiler_branch_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
542                 }
543         } else if (prec->type & RPA_RECORD_END) {
544
545                 if (prec->usertype & RPA_NONLOOP_PATH) {
546                         rpa_compiler_nonloopybranch_end(dbex->co);
547                 } else {
548                         rpa_compiler_branch_end(dbex->co);
549                 }
550
551         }
552
553         return 0;
554 }
555
556
557 static void rpa_dbex_rh_loopref(rpadbex_t *dbex, rparecord_t *prec)
558 {
559         /*
560          * We ignore, it doesn't make sense for loops:
561          * RPA_MATCH_MULTIPLE
562          */
563         rpa_compiler_exp_begin(dbex->co, (prec->usertype & RPA_MATCH_OPTIONAL));
564         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_CMP, R_LOO, DA, XX, 0));
565         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 3));
566         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
567         rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
568         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_ADD, R_TOP, R_TOP, R_LOO, 0));
569         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, R_LOO, XX, 0));
570         rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
571         rpa_compiler_exp_end(dbex->co);
572 //      rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
573
574 }
575
576
577 static rint rpa_dbex_rh_aref(rpadbex_t *dbex, rlong rec)
578 {
579         const rchar *name = NULL;
580         rsize_t namesize;
581         rpa_ruleinfo_t *info;
582         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
583
584         if (prec->type & RPA_RECORD_START) {
585                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
586
587                         return -1;
588                 }
589
590                 if ((prec->usertype & RPA_LOOP_PATH) && rpa_parseinfo_loopdetect(dbex, rec, rpa_dbex_firstinlined(dbex))) {
591                         info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
592                         if (!info) {
593                                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVED_SYMBOL);
594                                 RPA_DBEX_SETERRINFO_NAME(dbex, name, namesize);
595                                 return -1;
596                         }
597                         if (rpa_dbex_findinlined(dbex, info->startrec)) {
598                                 rpa_dbex_rh_loopref(dbex, prec);
599                         } else {
600                                 if (prec->usertype & RPA_MATCH_OPTIONAL) {
601                                         /*
602                                          * Most probably this is useless case - loop refs shouldn't have quantitative modifiers
603                                          * but in case they do we wrap the inlined production rule in quantitative expression.
604                                          * The inlined named rule can take the quantitative argument, but I just don't have
605                                          * a clean way to pass it from here - so, lets play the records inside an expression that
606                                          * has the right quantitative argument.
607                                          * We ignore, it doesn't make sense for loops:
608                                          * RPA_MATCH_MULTIPLE
609                                          */
610                                         rpa_compiler_exp_begin(dbex->co, RPA_MATCH_OPTIONAL);
611                                         rpa_dbex_play_recordhandlers(dbex, info->startrec, info->sizerecs);
612                                         rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
613                                         rpa_compiler_exp_end(dbex->co);
614                                 } else {
615                                         rpa_dbex_play_recordhandlers(dbex, info->startrec, info->sizerecs);
616                                 }
617                         }
618                 } else {
619                         rpa_compiler_reference(dbex->co, name, namesize, (prec->usertype & RPA_MATCH_MASK));
620                 }
621                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
622
623         } else if (prec->type & RPA_RECORD_END) {
624
625         }
626         return 0;
627 }
628
629
630 rpadbex_t *rpa_dbex_create(void)
631 {
632         rpadbex_t *dbex = (rpadbex_t *) r_zmalloc(sizeof(*dbex));
633
634         dbex->co = rpa_compiler_create();
635         dbex->pa = rpa_parser_create();
636         dbex->records = r_array_create(sizeof(rparecord_t));
637         dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
638         dbex->recstack = r_array_create(sizeof(rulong));
639         dbex->inlinestack = r_array_create(sizeof(rulong));
640         dbex->handlers = r_zmalloc(sizeof(rpa_dbex_recordhandler) * RPA_PRODUCTION_COUNT);
641         rpa_dbex_cfgset(dbex, RPA_DBEXCFG_OPTIMIZATIONS, 1);
642
643         dbex->handlers[RPA_PRODUCTION_NAMEDRULE] = rpa_dbex_rh_namedrule;
644         dbex->handlers[RPA_PRODUCTION_ANONYMOUSRULE] = rpa_dbex_rh_anonymousrule;
645         dbex->handlers[RPA_PRODUCTION_CLS] = rpa_dbex_rh_cls;
646         dbex->handlers[RPA_PRODUCTION_CHAR] = rpa_dbex_rh_char;
647         dbex->handlers[RPA_PRODUCTION_SPECIALCHAR] = rpa_dbex_rh_specialchar;
648         dbex->handlers[RPA_PRODUCTION_CLSCHAR] = rpa_dbex_rh_clschar;
649         dbex->handlers[RPA_PRODUCTION_AREF] = rpa_dbex_rh_aref;
650         dbex->handlers[RPA_PRODUCTION_CREF] = rpa_dbex_rh_aref;
651         dbex->handlers[RPA_PRODUCTION_BRACKETEXP] = rpa_dbex_rh_exp;
652         dbex->handlers[RPA_PRODUCTION_OROP] = rpa_dbex_rh_orop;
653         dbex->handlers[RPA_PRODUCTION_NOTOP] = rpa_dbex_rh_notop;
654         dbex->handlers[RPA_PRODUCTION_ALTBRANCH] = rpa_dbex_rh_branch;
655         dbex->handlers[RPA_PRODUCTION_NEGBRANCH] = rpa_dbex_rh_branch;
656         dbex->handlers[RPA_PRODUCTION_CHARRNG] = rpa_dbex_rh_range;
657         dbex->handlers[RPA_PRODUCTION_NUMRNG] = rpa_dbex_rh_numrange;
658         dbex->handlers[RPA_PRODUCTION_CLSNUM] = rpa_dbex_rh_clsnum;
659         dbex->handlers[RPA_PRODUCTION_BEGINCHAR] = rpa_dbex_rh_beginchar;
660         dbex->handlers[RPA_PRODUCTION_ENDCHAR] = rpa_dbex_rh_endchar;
661         dbex->handlers[RPA_PRODUCTION_NOROP] = rpa_dbex_rh_norop;
662         dbex->handlers[RPA_PRODUCTION_REQOP] = rpa_dbex_rh_exp;
663         dbex->handlers[RPA_PRODUCTION_MINOP] = rpa_dbex_rh_exp;
664         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMIT] = rpa_dbex_rh_emit;
665         dbex->handlers[RPA_PRODUCTION_DIRECTIVENOEMIT] = rpa_dbex_rh_noemit;
666         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITALL] = rpa_dbex_rh_emitall;
667         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITNONE] = rpa_dbex_rh_emitnone;
668         dbex->handlers[RPA_PRODUCTION_DIRECTIVEUID] = rpa_dbex_rh_uid;
669
670         return dbex;
671 }
672
673
674 void rpa_dbex_destroy(rpadbex_t *dbex)
675 {
676         if (dbex) {
677                 rpa_compiler_destroy(dbex->co);
678                 rpa_parser_destroy(dbex->pa);
679                 r_harray_destroy(dbex->rules);
680                 r_array_destroy(dbex->records);
681                 r_array_destroy(dbex->recstack);
682                 r_array_destroy(dbex->inlinestack);
683                 r_free(dbex->handlers);
684                 r_free(dbex);
685         }
686 }
687
688
689 static rint rpa_parseinfo_loopdetect_do(rpadbex_t *dbex, rlong parent, rlong loopto, rint inderction)
690 {
691         rsize_t namesiz;
692         const rchar *name;
693         rlong i;
694         rint ret = 0;
695         rparecord_t *prec;
696
697         if (parent == loopto && inderction > 0)
698                 return 1;
699         for (i = 0; i < r_array_length(dbex->recstack); i++) {
700                 if (parent == r_array_index(dbex->recstack, i, rlong))
701                         return 0;
702         }
703         r_array_add(dbex->recstack, &parent);
704
705         if (!(prec = (rparecord_t *)r_array_slot(dbex->records, parent)))
706                 return 0;
707         if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)
708                 i = parent;
709         else
710                 i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START);
711         for (; i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
712                 prec = (rparecord_t *)r_array_slot(dbex->records, i);
713                 if (prec->ruleuid == RPA_PRODUCTION_RULENAME)
714                         continue;
715                 if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF) {
716                         rpa_ruleinfo_t *info;
717                         if ((inderction > 0 || i != parent) && i == loopto) {
718                                 /*
719                                  * We found what we are looking for
720                                  */
721                                 ret = 1;
722                                 break;
723                         }
724                         if (rpa_dbex_rulename(dbex, i, &name, &namesiz) < 0)
725                                 R_ASSERT(0);
726                         info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesiz));
727                         if (!info)
728                                 continue;
729                         if ((ret = rpa_parseinfo_loopdetect_do(dbex, info->startrec, loopto, inderction + 1)) > 0)
730                                 break;
731                 } else {
732                         if ((ret = rpa_parseinfo_loopdetect_do(dbex, i, loopto, inderction + 1)) > 0)
733                                 break;
734                 }
735
736                 if ((prec->usertype & RPA_MATCH_OPTIONAL) == 0 && (prec->ruleuid == RPA_PRODUCTION_CREF || prec->ruleuid == RPA_PRODUCTION_AREF ||
737                                 prec->ruleuid == RPA_PRODUCTION_CHAR || prec->ruleuid == RPA_PRODUCTION_CLS || prec->ruleuid == RPA_PRODUCTION_SPECIALCHAR))
738                         break;
739
740         }
741
742         r_array_removelast(dbex->recstack);
743         return ret;
744 }
745
746
747 static rint rpa_parseinfo_loopdetect(rpadbex_t *dbex, rlong parent, rlong loopto)
748 {
749         if (parent != loopto) {
750                 /*
751                  * Make sure we are dealing with a loop first
752                  */
753                 if (!rpa_parseinfo_loopdetect_do(dbex, loopto, parent, 0))
754                         return 0;
755         }
756
757         return (rpa_parseinfo_loopdetect_do(dbex, parent, loopto, 0)) ? 1 : 0;
758 }
759
760
761 static void rpa_parseinfo_marklooppath(rpadbex_t *dbex, rlong parent)
762 {
763         rlong i;
764
765         if (rpa_parseinfo_loopdetect(dbex, parent, parent) > 0) {
766                 rpa_record_setusertype(dbex->records, parent, RPA_LOOP_PATH, RVALSET_OR);
767                 for (i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
768                         rpa_parseinfo_marklooppath(dbex, i);
769                 }
770         }
771 }
772
773
774 static rint rpa_parseinfo_rule_checkforloop(rpadbex_t *dbex, const char *name, rsize_t namesize, rlong loopto)
775 {
776         rpa_ruleinfo_t *info = info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
777
778         if (!info)
779                 return 0;
780         return rpa_parseinfo_loopdetect(dbex, info->startrec, loopto);
781 }
782
783
784 static void rpa_dbex_buildloopinfo(rpadbex_t *dbex)
785 {
786         ruint i, p;
787         rharray_t *rules = dbex->rules;
788         rpa_ruleinfo_t *info;
789
790         for (i = 0; i < r_array_length(rules->members); i++) {
791                 if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, i)) != NULL)
792                         rpa_parseinfo_marklooppath(dbex, info->startrec);
793         }
794
795         /*
796          * Mark the non-loop branches.
797          */
798         for (i = 0; i < r_array_length(dbex->records); i++) {
799                 rparecord_t *prec = (rparecord_t *)r_array_slot(dbex->records, i);
800                 if (prec->type == RPA_RECORD_START &&
801                         (prec->ruleuid == RPA_PRODUCTION_ALTBRANCH) &&
802                         (prec->usertype & RPA_LOOP_PATH) == 0) {
803                         p = rpa_recordtree_parent(dbex->records, i, RPA_RECORD_START);
804                         if (p >= 0) {
805                                 prec = (rparecord_t *)r_array_slot(dbex->records, p);
806                                 if (prec && (prec->usertype & RPA_LOOP_PATH))
807                                         rpa_record_setusertype(dbex->records, i, RPA_NONLOOP_PATH, RVALSET_OR);
808                         }
809                 }
810         }
811 }
812
813
814 static void rpa_dbex_buildruleinfo(rpadbex_t *dbex)
815 {
816         rparecord_t *rec;
817         rpa_ruleinfo_t info;
818         ruint nrecords;
819         rlong i;
820         const rchar *name = NULL;
821         rsize_t namesize = 0;
822
823         if (dbex->rules) {
824                 r_object_destroy((robject_t *)dbex->rules);
825                 dbex->rules = NULL;
826         }
827         dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
828
829         for (i = 0, nrecords = r_array_length(dbex->records); i < nrecords; i++) {
830                 if (!(rec = rpa_dbex_record(dbex, i)))
831                         continue;
832                 if ((rec->ruleuid == RPA_PRODUCTION_NAMEDRULE) && (rec->type & RPA_RECORD_START)) {
833                         r_memset(&info, 0, sizeof(info));
834                         info.type = RPA_RULEINFO_NAMEDRULE;
835                         info.startrec = i;
836                         info.sizerecs = rpa_recordtree_size(dbex->records, i);
837                         if (info.sizerecs < 0)
838                                 continue;
839                         if (rpa_dbex_rulename(dbex, i, &name, &namesize) < 0) {
840                                 continue;
841                         }
842                         r_harray_add(dbex->rules, name, namesize, &info);
843                         i += info.sizerecs - 1;
844                 } else if ((rec->ruleuid == RPA_PRODUCTION_ANONYMOUSRULE) && (rec->type & RPA_RECORD_START)) {
845                         r_memset(&info, 0, sizeof(info));
846                         info.type = RPA_RULEINFO_ANONYMOUSRULE;
847                         info.startrec = i;
848                         info.sizerecs = rpa_recordtree_size(dbex->records, i);
849                         if (info.sizerecs < 0)
850                                 continue;
851                         if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
852                                 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
853                         i += info.sizerecs - 1;
854                 } else if ((rec->type & RPA_RECORD_START) && (rec->ruleuid >= RPA_PRODUCTION_DIRECTIVEEMIT) && (rec->ruleuid <= RPA_PRODUCTION_DIRECTIVEUID)) {
855                         r_memset(&info, 0, sizeof(info));
856                         info.type = RPA_RULEINFO_DIRECTIVE;
857                         info.startrec = i;
858                         info.sizerecs = rpa_recordtree_size(dbex->records, i);
859                         if (info.sizerecs < 0)
860                                 continue;
861                         if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
862                                 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
863                         i += info.sizerecs - 1;
864                 }
865
866         }
867 }
868
869
870 static rlong rpa_dbex_copy_handler(rarray_t *records, rlong rec, rpointer userdata)
871 {
872         rpadbex_t *dbex = (rpadbex_t *)userdata;
873         rlong index;
874
875         rparecord_t *prec = (rparecord_t *)r_array_slot(records, rec);
876         if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & RPA_RECORD_START)) {
877                 /*
878                  * Ignore it
879                  */
880         } else if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & (RPA_RECORD_MATCH | RPA_RECORD_END))) {
881                 ruint32 usertype = RPA_MATCH_NONE;
882                 rlong lastrec = 0;
883                 /*
884                  * Don't copy it but set the usertype of the previous record accordingly.
885                  */
886                 switch (*prec->input) {
887                 case '?':
888                         usertype = RPA_MATCH_OPTIONAL;
889                         break;
890                 case '+':
891                         usertype = RPA_MATCH_MULTIPLE;
892                         break;
893                 case '*':
894                         usertype = RPA_MATCH_MULTIOPT;
895                         break;
896                 default:
897                         usertype = RPA_MATCH_NONE;
898                 };
899                 lastrec = r_array_length(dbex->records) - 1;
900                 if (lastrec >= 0)
901                         rpa_record_setusertype(dbex->records, lastrec, usertype, RVALSET_OR);
902         } else if (prec->ruleuid != RPA_RECORD_INVALID_UID) {
903                 index = r_array_add(dbex->records, prec);
904                 /*
905                  * Optimizations. Lets apply the optimizations while we copy the records.
906                  * This is probably not the most clean way to apply optimizations, in the future
907                  * we should probably think of optimization pass right before compiling.
908                  */
909                 if (dbex->optimizations) {
910                         if (prec->ruleuid == RPA_PRODUCTION_OROP && (prec->type & RPA_RECORD_END)) {
911                                 rpa_optimiztion_orop(dbex->records, rpa_recordtree_get(dbex->records, index, RPA_RECORD_START));
912                         }
913                 }
914         }
915
916         return 0;
917 }
918
919
920 static void rpa_dbex_copyrecords(rpadbex_t *dbex, rarray_t *records)
921 {
922         rint i;
923         for (i = rpa_recordtree_get(records, 0, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(records, i, RPA_RECORD_START))
924                 rpa_recordtree_walk(records, i, 0, rpa_dbex_copy_handler, dbex);
925 }
926
927
928 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, rlong rec)
929 {
930         rparecord_t *prec;
931
932         if (!dbex || !dbex->rules)
933                 return NULL;
934         if (rec < 0 || rec >= r_array_length(dbex->records))
935                 return NULL;
936         prec = (rparecord_t *)r_array_slot(dbex->records, rec);
937         return prec;
938
939 }
940
941
942 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid)
943 {
944         rparecord_t *prec;
945         rpa_ruleinfo_t *info;
946         rlong rec;
947
948         if (!dbex || !dbex->rules)
949                 return NULL;
950         info = r_harray_get(dbex->rules, rid);
951         if (!info)
952                 return NULL;
953         rec = info->startrec + info->sizerecs - 1;
954         if (rec < 0 || rec >= r_array_length(dbex->records))
955                 return NULL;
956         prec = (rparecord_t *)r_array_slot(dbex->records, rec);
957         return prec;
958 }
959
960
961 static rint rpa_dbex_rulename(rpadbex_t *dbex, rlong rec, const rchar **name, rsize_t *namesize)
962 {
963         rparecord_t *pnamerec = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_START), RPA_RECORD_END));
964         if (!pnamerec || !(pnamerec->ruleuid & RPA_PRODUCTION_RULENAME))
965                 return -1;
966         *name = pnamerec->input;
967         *namesize = pnamerec->inputsiz;
968         return 0;
969 }
970
971
972 rint rpa_dbex_open(rpadbex_t *dbex)
973 {
974         if (!dbex)
975                 return -1;
976         if (dbex->rules) {
977                 r_object_destroy((robject_t *)dbex->rules);
978                 dbex->rules = NULL;
979         }
980         return 0;
981 }
982
983
984 void rpa_dbex_close(rpadbex_t *dbex)
985 {
986         if (!dbex)
987                 return;
988         rpa_dbex_buildruleinfo(dbex);
989         rpa_dbex_buildloopinfo(dbex);
990 }
991
992
993 rlong rpa_dbex_load(rpadbex_t *dbex, const rchar *rules, rsize_t size)
994 {
995         rlong ret;
996
997         if (!dbex)
998                 return -1;
999         if (dbex->rules) {
1000                 /*
1001                  * Dbex is not open
1002                  */
1003                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTOPEN);
1004                 return -1;
1005         }
1006         if ((ret = rpa_parser_load(dbex->pa, rules, size)) < 0) {
1007
1008                 return -1;
1009         }
1010         if (ret != size) {
1011                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
1012                 RPA_DBEX_SETERRINFO_OFFSET(dbex, ret);
1013                 return -1;
1014         }
1015         rpa_dbex_copyrecords(dbex, dbex->pa->stat->records);
1016         return ret;
1017 }
1018
1019
1020 rlong rpa_dbex_load_s(rpadbex_t *dbex, const rchar *rules)
1021 {
1022         return rpa_dbex_load(dbex, rules, r_strlen(rules));
1023 }
1024
1025
1026 void rpa_dbex_dumpindented(rpadbex_t *dbex, rlong rec, rint level, const rchar *rulelabel)
1027 {
1028         rchar buffer[1024];
1029         rint i, size;
1030         rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1031
1032         if (!prec)
1033                 return;
1034         r_memset(buffer, 0, sizeof(buffer));
1035         for (i = 0; i < level + 1; i++)
1036                 r_printf("   ");
1037         r_printf("(");
1038         r_printf("%s, %c, %c", rulelabel, rpa_record_optchar(prec, 'x'), rpa_record_loopchar(prec, 'x'));
1039         r_printf(")");
1040         size = R_MIN(prec->inputsiz, sizeof(buffer) - 1);
1041         r_strncpy(buffer, prec->input, size);
1042
1043         if (size == (sizeof(buffer) - 1))
1044                 r_printf(" %s ...\n", buffer);
1045         else
1046                 r_printf(" %s\n", buffer);
1047         return;
1048 }
1049
1050
1051 static rlong rpa_dbex_firstinlined(rpadbex_t *dbex)
1052 {
1053         rlong ret = r_array_empty(dbex->inlinestack) ? -1 : r_array_index(dbex->inlinestack, 0, rlong);
1054         return ret;
1055 }
1056
1057
1058 static rint rpa_dbex_findinlined(rpadbex_t *dbex, rlong startrec)
1059 {
1060         rlong i;
1061         for (i = 0; i < r_array_length(dbex->inlinestack); i++) {
1062                 if (r_array_index(dbex->inlinestack, i, rlong) == startrec)
1063                         return 1;
1064         }
1065         return 0;
1066 }
1067
1068
1069 static void rpa_dbex_dumptree_do(rpadbex_t *dbex, rlong rec, rint level)
1070 {
1071         rparecord_t *prec = rpa_dbex_record(dbex, rec);
1072         if (prec && prec->ruleuid == RPA_PRODUCTION_RULENAME)
1073                 return;
1074         if (prec && (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)) {
1075                 const rchar *name = NULL;
1076                 rsize_t namesize = 0;
1077                 rint loop = 0;
1078                 rpa_ruleinfo_t *info;
1079
1080                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) >= 0) {
1081                         loop = rpa_parseinfo_rule_checkforloop(dbex, name, namesize, rpa_dbex_firstinlined(dbex));
1082                         info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
1083                         if (loop && info){
1084                                 if (!rpa_dbex_findinlined(dbex, info->startrec)) {
1085                                         /*
1086                                          * Temporary set the quantitative flags for the inlined rule to the parent
1087                                          * reference, so they are printed correctly. After the printing is done
1088                                          * restore the original flags.
1089                                          */
1090                                         rparecord_t *prulestart = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_START));
1091                                         rparecord_t *pruleend = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1092                                         rulong optional = (prulestart->usertype & RPA_MATCH_OPTIONAL);
1093                                         prulestart->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1094                                         pruleend->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1095                                         r_array_add(dbex->inlinestack, &info->startrec);
1096                                         rpa_dbex_dumptree_do(dbex, info->startrec, level);
1097                                         r_array_removelast(dbex->inlinestack);
1098                                         if (!optional) {
1099                                                 prulestart->usertype &= ~RPA_MATCH_OPTIONAL;
1100                                                 pruleend->usertype &= ~RPA_MATCH_OPTIONAL;
1101                                         }
1102                                 } else {
1103                                         rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, "loopref");
1104                                 }
1105                                 return;
1106                         }
1107                 }
1108         }
1109         rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, prec->rule);
1110         for (rec = rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_START); rec >= 0; rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START)) {
1111                 rpa_dbex_dumptree_do(dbex, rec, level + 1);
1112         }
1113 }
1114
1115
1116 rint rpa_dbex_dumptree(rpadbex_t *dbex, const rchar *rulename, rsize_t namesize, rint level)
1117 {
1118         rpa_ruleinfo_t *info;
1119
1120         if (!dbex)
1121                 return -1;
1122         if (!dbex->rules) {
1123                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1124                 return -1;
1125         }
1126         if (!(info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, rulename, namesize)))) {
1127                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1128                 return -1;
1129         }
1130         r_array_add(dbex->inlinestack, &info->startrec);
1131         rpa_dbex_dumptree_do(dbex, info->startrec, level);
1132         r_array_removelast(dbex->inlinestack);
1133         return 0;
1134 }
1135
1136
1137 rint rpa_dbex_dumptree_s(rpadbex_t *dbex, const rchar *rulename, rint level)
1138 {
1139         return rpa_dbex_dumptree(dbex, rulename, r_strlen(rulename), level);
1140 }
1141
1142
1143 rint rpa_dbex_dumprules(rpadbex_t *dbex)
1144 {
1145         rint ret = 0;
1146         rparule_t rid;
1147         rchar buffer[512];
1148
1149         if (!dbex)
1150                 return -1;
1151         if (!dbex->rules) {
1152                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1153                 return -1;
1154         }
1155         for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1156                 ret = rpa_dbex_copy(dbex, rid, buffer, sizeof(buffer));
1157                 if ( ret >= 0) {
1158                         if (ret == sizeof(buffer))
1159                                 r_printf("   %s ...\n", buffer);
1160                         else
1161                                 r_printf("   %s\n", buffer);
1162                 }
1163
1164         }
1165         return ret;
1166 }
1167
1168
1169 rint rpa_dbex_dumprecords(rpadbex_t *dbex)
1170 {
1171         rlong i;
1172
1173         if (!dbex)
1174                 return -1;
1175         if (!dbex->rules) {
1176                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1177                 return -1;
1178         }
1179         for (i = 0; i < r_array_length(dbex->records); i++) {
1180                 rpa_record_dump(dbex->records, i);
1181         }
1182         return 0;
1183 }
1184
1185
1186 rint rpa_dbex_dumpinfo(rpadbex_t *dbex)
1187 {
1188         rlong i;
1189         rpa_ruleinfo_t *info;
1190
1191         if (!dbex)
1192                 return -1;
1193         if (!dbex->rules) {
1194                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1195                 return -1;
1196         }
1197         for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1198                 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
1199                 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1200                 switch (info->type) {
1201                 case RPA_RULEINFO_NAMEDRULE:
1202                         r_printf("N ");
1203                         break;
1204                 case RPA_RULEINFO_ANONYMOUSRULE:
1205                         r_printf("A ");
1206                         break;
1207                 case RPA_RULEINFO_DIRECTIVE:
1208                         r_printf("D ");
1209                         break;
1210                 default:
1211                         r_printf("  ");
1212                         break;
1213                 };
1214                 r_printf("(%7d, %4d, code: %7ld, %5ld) : %s\n", info->startrec, info->sizerecs, info->codeoff, info->codesiz, name->str);
1215         }
1216         return 0;
1217 }
1218
1219
1220 rint rpa_dbex_dumpalias(rpadbex_t *dbex)
1221 {
1222         rlong i;
1223         rlong rec;
1224         rpa_ruleinfo_t *info;
1225         rchar *buffer = r_zmalloc(32 * sizeof(rchar));
1226
1227         if (!dbex)
1228                 return -1;
1229         if (!dbex->rules) {
1230                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1231                 return -1;
1232         }
1233         for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1234                 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1235                 if (info->type == RPA_RULEINFO_DIRECTIVE) {
1236                         rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1237                         if (prec->ruleuid == RPA_PRODUCTION_DIRECTIVEUID && prec->inputsiz) {
1238                                 rec = rpa_recordtree_firstchild(dbex->records, info->startrec, RPA_RECORD_START);
1239                                 while (rec >= 0) {
1240                                         prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1241                                         if (prec->ruleuid == RPA_PRODUCTION_ALIASNAME) {
1242                                                 ruint32 dec;
1243                                                 if (rpa_record2long(rpa_dbex_record(dbex, rpa_recordtree_next(dbex->records, rec, RPA_RECORD_END)), &dec) < 0)
1244                                                         break;
1245                                                 buffer = r_realloc(buffer, prec->inputsiz + 1);
1246                                                 r_memset(buffer, 0, prec->inputsiz + 1);
1247                                                 r_memcpy(buffer, prec->input, prec->inputsiz);
1248                                                 r_printf("#define %s %d\n", buffer, dec);
1249                                                 break;
1250                                         }
1251                                         rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START);
1252                                 }
1253                         }
1254                 }
1255         }
1256         r_free(buffer);
1257         return 0;
1258 }
1259
1260
1261 rint rpa_dbex_dumpcode(rpadbex_t* dbex, const rchar *rulename)
1262 {
1263         rpa_ruleinfo_t *info;
1264         if (!dbex)
1265                 return -1;
1266         if (!dbex->rules) {
1267                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1268                 return -1;
1269         }
1270         info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup_s(dbex, rulename));
1271         if (!info)
1272                 return -1;
1273         rvm_asm_dump(rvm_codegen_getcode(dbex->co->cg, info->codeoff), info->codesiz);
1274         return 0;
1275 }
1276
1277 rsize_t rpa_dbex_copy(rpadbex_t *dbex, rparule_t rid, rchar *buf, rsize_t bufsize)
1278 {
1279         rparecord_t *prec;
1280         rsize_t size;
1281
1282         if (!dbex)
1283                 return -1;
1284         if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1285                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1286                 return -1;
1287         }
1288         size = prec->inputsiz;
1289         if (bufsize <= size)
1290                 size = bufsize - 1;
1291         r_memset(buf, 0, bufsize);
1292         r_strncpy(buf, prec->input, size);
1293         return size + 1;
1294 }
1295
1296
1297 rparule_t rpa_dbex_first(rpadbex_t *dbex)
1298 {
1299         if (!dbex || !dbex->rules)
1300                 return -1;
1301
1302         if (r_array_length(dbex->rules->members) > 0)
1303                 return 0;
1304         return -1;
1305 }
1306
1307
1308 rparule_t rpa_dbex_last(rpadbex_t *dbex)
1309 {
1310         if (!dbex || !dbex->rules)
1311                 return -1;
1312
1313         if (r_array_length(dbex->rules->members) > 0)
1314                 return r_array_length(dbex->rules->members) - 1;
1315         return -1;
1316 }
1317
1318
1319 rparule_t rpa_dbex_default(rpadbex_t *dbex)
1320 {
1321         return rpa_dbex_last(dbex);
1322 }
1323
1324
1325 rparule_t rpa_dbex_lookup(rpadbex_t *dbex, const rchar *name, rsize_t namesize)
1326 {
1327         if (!dbex) {
1328                 return -1;
1329         }
1330
1331         return r_harray_taillookup(dbex->rules, name, namesize);
1332 }
1333
1334
1335 rparule_t rpa_dbex_lookup_s(rpadbex_t *dbex, const rchar *name)
1336 {
1337         return rpa_dbex_lookup(dbex, name, r_strlen(name));
1338 }
1339
1340
1341 rparule_t rpa_dbex_next(rpadbex_t *dbex, rparule_t rid)
1342 {
1343         if (!dbex || !dbex->rules)
1344                 return -1;
1345         ++rid;
1346         if (rid < r_array_length(dbex->rules->members))
1347                 return rid;
1348         return -1;
1349 }
1350
1351
1352 rparule_t rpa_dbex_prev(rpadbex_t *dbex, rparule_t rid)
1353 {
1354         if (!dbex || !dbex->rules)
1355                 return -1;
1356         --rid;
1357         if (rid >= 0)
1358                 return rid;
1359         return -1;
1360 }
1361
1362
1363 rlong rpa_dbex_getlasterror(rpadbex_t *dbex)
1364 {
1365         if (!dbex)
1366                 return -1;
1367         return dbex->err.code;
1368 }
1369
1370
1371 rlong rpa_dbex_getlasterrinfo(rpadbex_t *dbex, rpa_errinfo_t *errinfo)
1372 {
1373         if (!dbex || !errinfo)
1374                 return -1;
1375         r_memcpy(errinfo, &dbex->err, sizeof(rpa_errinfo_t));
1376         return 0;
1377 }
1378
1379
1380 const rchar *rpa_dbex_version()
1381 {
1382         return "2.0";
1383 }
1384
1385
1386 static rlong rpa_dbex_play_recordhandler(rpadbex_t *dbex, rlong rec)
1387 {
1388         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
1389         rpa_dbex_recordhandler handler = dbex->handlers[prec->ruleuid];
1390         if (handler) {
1391                 if (handler(dbex, rec) < 0)
1392                         return -1;
1393         }
1394         return 0;
1395 }
1396
1397
1398 static rlong rpa_dbex_play_recordhandlers(rpadbex_t *dbex, rlong rec, rlong nrecs)
1399 {
1400         rparecord_t *prec;
1401         rlong i, res = 0;
1402
1403         for (i = rec; i < rec + nrecs; i++) {
1404                 prec = (rparecord_t *) r_array_slot(dbex->records, i);
1405
1406                 if (prec->ruleuid == RPA_PRODUCTION_MINOP && (prec->type & RPA_RECORD_START)) {
1407                         rlong lastchild = rpa_recordtree_lastchild(dbex->records, i, RPA_RECORD_START);
1408                         rlong firstchild = rpa_recordtree_firstchild(dbex->records, i, RPA_RECORD_START);
1409                         if (firstchild < 0 || lastchild < 0 || firstchild == lastchild)
1410                                 return -1;
1411                         if ((res = rpa_dbex_play_recordhandler(dbex, i)) < 0)
1412                                 return -1;
1413                         if ((res = rpa_dbex_play_recordhandlers(dbex, lastchild, rpa_recordtree_size(dbex->records, lastchild))) < 0)
1414                                 return -1;
1415                         if ((res = rpa_dbex_play_recordhandlers(dbex, firstchild, lastchild - firstchild)) < 0)
1416                                 return -1;
1417                         if ((res = rpa_dbex_play_recordhandler(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))) < 0)
1418                                 return -1;
1419                         i += rpa_recordtree_size(dbex->records, i) - 1;
1420                         continue;
1421                 }
1422
1423                 if (rpa_dbex_play_recordhandler(dbex, i) < 0)
1424                         return -1;
1425         }
1426
1427         return i;
1428 }
1429
1430
1431 static rint rpa_dbex_compile_rule(rpadbex_t *dbex, rparule_t rid)
1432 {
1433         rlong codeoff;
1434         rpa_ruleinfo_t *info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1435
1436         if (!info)
1437                 return -1;
1438         codeoff = rvm_codegen_getcodesize(dbex->co->cg);
1439         if (rpa_dbex_play_recordhandlers(dbex, info->startrec, info->sizerecs) < 0)
1440                 return -1;
1441         info->codeoff = codeoff;
1442         info->codesiz = rvm_codegen_getcodesize(dbex->co->cg) - codeoff;
1443
1444         return 0;
1445 }
1446
1447
1448 rint rpa_dbex_compile(rpadbex_t *dbex)
1449 {
1450         rparule_t rid;
1451         rvm_codelabel_t *labelerr;
1452
1453         if (!dbex)
1454                 return -1;
1455         if (!dbex->rules) {
1456                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1457                 return -1;
1458         }
1459         /*
1460          * By default all production rules emit
1461          */
1462         if (dbex->co)
1463                 rpa_compiler_destroy(dbex->co);
1464         dbex->co = rpa_compiler_create();
1465         rpa_dbex_setemit(dbex, TRUE);
1466
1467         for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1468                 if (rpa_dbex_compile_rule(dbex, rid) < 0) {
1469                         return -1;
1470                 }
1471         }
1472
1473         if (rvm_codegen_relocate(dbex->co->cg, &labelerr) < 0) {
1474                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVED_SYMBOL);
1475                 RPA_DBEX_SETERRINFO_NAME(dbex, labelerr->name->str, labelerr->name->size);
1476                 return -1;
1477         }
1478
1479         return 0;
1480 }
1481
1482
1483 rvm_asmins_t *rvm_dbex_getexecutable(rpadbex_t *dbex)
1484 {
1485         if (!dbex)
1486                 return NULL;
1487         if (!dbex->rules) {
1488                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1489                 return NULL;
1490         }
1491         return rvm_codegen_getcode(dbex->co->cg, 0);
1492 }
1493
1494
1495 rlong rvm_dbex_executableoffset(rpadbex_t *dbex, rparule_t rid)
1496 {
1497         rpa_ruleinfo_t *info;
1498
1499         if (!dbex)
1500                 return -1;
1501         if (!dbex->rules) {
1502                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1503                 return -1;
1504         }
1505         info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1506         if (!info) {
1507                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1508                 return -1;
1509         }
1510         return info->codeoff;
1511 }
1512
1513
1514 rlong rpa_dbex_cfgset(rpadbex_t *dbex, rulong cfg, rulong val)
1515 {
1516         if (!dbex)
1517                 return -1;
1518         if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1519                 dbex->optimizations = val;
1520                 return 0;
1521         }
1522         return -1;
1523 }
1524
1525
1526 rlong rpa_dbex_cfgget(rpadbex_t *dbex, rulong cfg)
1527 {
1528         if (!dbex)
1529                 return -1;
1530         if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1531                 return dbex->optimizations;
1532         }
1533         return -1;
1534 }
1535