RPA Toolkit
Added more documentation files. Added rpastatpriv.h, containing the private interface...
[rpatk.git] / rpa / rpadbex.c
1 /**
2  *\file rpadbex.c
3  */
4
5 #include "rpacompiler.h"
6 #include "rpadbex.h"
7 #include "rpastatpriv.h"
8 #include "rpaparser.h"
9 #include "rpaoptimization.h"
10 #include "rmem.h"
11 #include "rutf.h"
12
13 typedef rint (*rpa_dbex_recordhandler)(rpadbex_t *dbex, rlong rec);
14
15 #define RPA_RULEINFO_NONE 0
16 #define RPA_RULEINFO_NAMEDRULE 1
17 #define RPA_RULEINFO_ANONYMOUSRULE 2
18 #define RPA_RULEINFO_DIRECTIVE 3
19
20 #define RPA_DBEX_SETERRINFO_CODE(__d__, __e__) do { (__d__)->err.code = __e__; (__d__)->err.mask |= RPA_ERRINFO_CODE; } while (0)
21 #define RPA_DBEX_SETERRINFO_OFFSET(__d__, __o__) do { (__d__)->err.offset = __o__; (__d__)->err.mask |= RPA_ERRINFO_OFFSET; } while (0)
22 #define RPA_DBEX_SETERRINFO_RULEID(__d__, __r__) do { (__d__)->err.ruleid = __r__; (__d__)->err.mask |= RPA_ERRINFO_RULEID; } while (0)
23 #define RPA_DBEX_SETERRINFO_NAME(__d__, __n__, __s__) do { \
24         (__d__)->err.mask |= RPA_ERRINFO_NAME; \
25         r_memset((__d__)->err.name, 0, sizeof((__d__)->err.name)); \
26         r_strncpy((__d__)->err.name, __n__, R_MIN(__s__, (sizeof((__d__)->err.name) - 1)));  } while (0)
27
28
29 typedef struct rpa_ruleinfo_s {
30         rlong startrec;
31         rlong sizerecs;
32         rlong codeoff;
33         rlong codesiz;
34         rulong type;
35 } rpa_ruleinfo_t;
36
37
38 struct rpadbex_s {
39         rpa_compiler_t *co;
40         rpa_parser_t *pa;
41         rarray_t *records;
42         rharray_t *rules;
43         rarray_t *recstack;
44         rarray_t *inlinestack;
45         rpa_dbex_recordhandler *handlers;
46         rpa_errinfo_t err;
47         rulong optimizations:1;
48 };
49
50 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid);
51 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, rlong rec);
52 static rint rpa_dbex_rulename(rpadbex_t *dbex, rlong rec, const rchar **name, rsize_t *namesize);
53 static rint rpa_parseinfo_loopdetect(rpadbex_t *dbex, rlong parent, rlong loopto);
54 static rlong rpa_dbex_play_recordhandler(rpadbex_t *dbex, rlong rec);
55 static rlong rpa_dbex_play_recordhandlers(rpadbex_t *dbex, rlong rec, rlong nrecs);
56 static rlong rpa_dbex_firstinlined(rpadbex_t *dbex);
57 static rint rpa_dbex_findinlined(rpadbex_t *dbex, rlong startrec);
58
59
60 static rlong rpa_dbex_getmatchchr(rulong matchtype)
61 {
62         switch (matchtype & RPA_MATCH_MASK) {
63         default:
64         case RPA_MATCH_NONE:
65                 return RPA_MATCHCHR_NAN;
66                 break;
67         case RPA_MATCH_MULTIPLE:
68                 return RPA_MATCHCHR_MUL;
69                 break;
70         case RPA_MATCH_OPTIONAL:
71                 return RPA_MATCHCHR_OPT;
72                 break;
73         case RPA_MATCH_MULTIOPT:
74                 return RPA_MATCHCHR_MOP;
75                 break;
76         };
77         return RPA_MATCHCHR_NAN;
78 }
79
80
81 static rlong rpa_dbex_getmatchspecialchr(rulong matchtype)
82 {
83         switch (matchtype & RPA_MATCH_MASK) {
84         default:
85         case RPA_MATCH_NONE:
86                 return RPA_MATCHSPCHR_NAN;
87                 break;
88         case RPA_MATCH_MULTIPLE:
89                 return RPA_MATCHSPCHR_MUL;
90                 break;
91         case RPA_MATCH_OPTIONAL:
92                 return RPA_MATCHSPCHR_OPT;
93                 break;
94         case RPA_MATCH_MULTIOPT:
95                 return RPA_MATCHSPCHR_MOP;
96                 break;
97         };
98         return RPA_MATCHSPCHR_NAN;
99 }
100
101
102 static rint rpa_record2long(rparecord_t *prec, ruint32 *num)
103 {
104         rchar *endptr = NULL;
105         rchar buffer[64];
106
107         if (!prec || !num || prec->inputsiz == 0 || prec->inputsiz >= sizeof(buffer))
108                 return -1;
109         r_memset(buffer, 0, sizeof(buffer));
110         r_memcpy(buffer, prec->input, prec->inputsiz);
111         if (prec->ruleuid == RPA_PRODUCTION_HEX) {
112                 *num = (ruint32)r_strtoul(prec->input, &endptr, 16);
113         } else if (prec->ruleuid == RPA_PRODUCTION_DEC) {
114                 *num = (ruint32)r_strtoul(prec->input, &endptr, 10);
115         } else {
116                 return -1;
117         }
118         return 0;
119 }
120
121
122 static rint rpa_dbex_rh_uid(rpadbex_t *dbex, rlong rec)
123 {
124         const rchar *name = NULL;
125         rsize_t namesize;
126         ruint32 uid = 0;
127         rparecord_t *pnumrec;
128         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
129
130         if (prec->type & RPA_RECORD_START) {
131                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
132                         RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
133                         return -1;
134                 }
135                 pnumrec = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
136                 if (!pnumrec) {
137                         RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
138                         return -1;
139                 }
140                 if (rpa_record2long(pnumrec, &uid) < 0) {
141                         RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
142                         return -1;
143                 }
144                 rpa_compiler_rulepref_set_ruleuid(dbex->co, name, namesize, uid);
145                 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
146         } else if (prec->type & RPA_RECORD_END) {
147
148         }
149         return 0;
150 }
151
152
153 static rint rpa_dbex_rh_emit(rpadbex_t *dbex, rlong rec)
154 {
155         const rchar *name = NULL;
156         rsize_t namesize;
157         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
158
159         if (prec->type & RPA_RECORD_START) {
160                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
161                         return -1;
162                 }
163                 rpa_compiler_rulepref_set_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
164         } else if (prec->type & RPA_RECORD_END) {
165
166         }
167         return 0;
168 }
169
170
171 static rint rpa_dbex_rh_noemit(rpadbex_t *dbex, rlong rec)
172 {
173         const rchar *name = NULL;
174         rsize_t namesize;
175         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
176
177         if (prec->type & RPA_RECORD_START) {
178                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
179                         return -1;
180                 }
181                 rpa_compiler_rulepref_clear_flag(dbex->co, name, namesize, RPA_RFLAG_EMITRECORD);
182         } else if (prec->type & RPA_RECORD_END) {
183
184         }
185         return 0;
186 }
187
188
189 static rint rpa_dbex_setemit(rpadbex_t *dbex, rboolean emit)
190 {
191         rlong i;
192         rpa_ruleinfo_t *info;
193
194         for (i = 0; i < r_array_length(dbex->rules->names); i++) {
195                 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
196                 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
197                 if (info->type == RPA_RULEINFO_NAMEDRULE) {
198                         if (emit) {
199                                 rpa_compiler_rulepref_set_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
200                         } else {
201                                 rpa_compiler_rulepref_clear_flag(dbex->co, name->str, name->size, RPA_RFLAG_EMITRECORD);
202                         }
203                 }
204         }
205         return 0;
206 }
207
208
209 static rint rpa_dbex_rh_emitall(rpadbex_t *dbex, rlong rec)
210 {
211         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
212
213         if (prec->type & RPA_RECORD_START) {
214                 rpa_dbex_setemit(dbex, TRUE);
215         } else if (prec->type & RPA_RECORD_END) {
216
217         }
218         return 0;
219 }
220
221
222 static rint rpa_dbex_rh_emitnone(rpadbex_t *dbex, rlong rec)
223 {
224         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
225
226         if (prec->type & RPA_RECORD_START) {
227                 rpa_dbex_setemit(dbex, FALSE);
228         } else if (prec->type & RPA_RECORD_END) {
229
230         }
231         return 0;
232 }
233
234
235 static rint rpa_dbex_rh_namedrule(rpadbex_t *dbex, rlong rec)
236 {
237         const rchar *name = NULL;
238         rsize_t namesize;
239         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
240
241         if (prec->type & RPA_RECORD_START) {
242                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
243
244                         return -1;
245                 }
246                 if (!r_array_empty(dbex->inlinestack)) {
247                         rpa_compiler_inlinerule_begin(dbex->co, name, namesize, 0);
248                 } else {
249                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
250                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BL, DA, XX, XX, 3));
251                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
252                         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
253
254                         if ((prec->usertype & RPA_LOOP_PATH)) {
255                                 rpa_compiler_loop_begin(dbex->co, name, namesize);
256                         } else {
257                                 rpa_compiler_rule_begin(dbex->co, name, namesize);
258                         }
259                 }
260                 r_array_add(dbex->inlinestack, &rec);
261         } else if (prec->type & RPA_RECORD_END) {
262                 r_array_removelast(dbex->inlinestack);
263
264                 if (!r_array_empty(dbex->inlinestack)) {
265                         rpa_compiler_inlinerule_end(dbex->co);
266                 } else {
267                         if ((prec->usertype & RPA_LOOP_PATH)) {
268                                 rpa_compiler_loop_end(dbex->co);
269                         } else {
270                                 rpa_compiler_rule_end(dbex->co);
271                         }
272                 }
273         }
274         return 0;
275 }
276
277
278 static rint rpa_dbex_rh_anonymousrule(rpadbex_t *dbex, rlong rec)
279 {
280         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
281
282         if (prec->type & RPA_RECORD_START) {
283                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_SHIFT, XX, XX, XX, 0));
284                 rpa_compiler_exp_begin(dbex->co, RPA_MATCH_NONE);
285
286         } else if (prec->type & RPA_RECORD_END) {
287                 rpa_compiler_exp_end(dbex->co);
288                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_EMITTAIL, XX, XX, XX, 0));
289                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_EXT, XX, XX, XX, 0));
290         }
291
292         return 0;
293 }
294
295
296 static rint rpa_dbex_rh_char(rpadbex_t *dbex, rlong rec)
297 {
298         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
299
300         if (prec->type & RPA_RECORD_END) {
301                 ruint32 wc = 0;
302                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
303
304                         return -1;
305                 }
306                 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
307                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
308         }
309
310         return 0;
311 }
312
313
314 static rint rpa_dbex_rh_specialchar(rpadbex_t *dbex, rlong rec)
315 {
316         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
317
318         if (prec->type & RPA_RECORD_END) {
319                 ruint32 wc = 0;
320                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
321
322                         return -1;
323                 }
324                 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchspecialchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
325                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
326         }
327
328         return 0;
329 }
330
331
332 static rint rpa_dbex_rh_cls(rpadbex_t *dbex, rlong rec)
333 {
334         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
335
336         if (prec->type & RPA_RECORD_START) {
337                 rpa_compiler_class_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
338
339         } else if (prec->type & RPA_RECORD_END) {
340                 rpa_compiler_class_end(dbex->co);
341                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
342         }
343
344         return 0;
345 }
346
347
348 static rint rpa_dbex_rh_clschar(rpadbex_t *dbex, rlong rec)
349 {
350         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
351
352         if (prec->type & RPA_RECORD_END) {
353                 ruint32 wc = 0;
354                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
355
356                         return -1;
357                 }
358                 rvm_codegen_addins(dbex->co->cg, rvm_asm(RPA_MATCHCHR_NAN, DA, XX, XX, wc));
359                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
360         }
361
362         return 0;
363 }
364
365
366 static rint rpa_dbex_rh_exp(rpadbex_t *dbex, rlong rec)
367 {
368         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
369
370         if (prec->type & RPA_RECORD_START) {
371                 rpa_compiler_exp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
372
373         } else if (prec->type & RPA_RECORD_END) {
374                 rpa_compiler_exp_end(dbex->co);
375                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
376         }
377
378         return 0;
379 }
380
381
382 static rint rpa_dbex_rh_orop(rpadbex_t *dbex, rlong rec)
383 {
384         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
385
386         if (prec->type & RPA_RECORD_START) {
387                 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
388
389         } else if (prec->type & RPA_RECORD_END) {
390                 rpa_compiler_altexp_end(dbex->co);
391                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
392         }
393
394         return 0;
395 }
396
397
398 static rint rpa_dbex_rh_norop(rpadbex_t *dbex, rlong rec)
399 {
400         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
401
402         if (prec->type & RPA_RECORD_START) {
403                 rpa_compiler_altexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
404
405         } else if (prec->type & RPA_RECORD_END) {
406                 rpa_compiler_altexp_end(dbex->co);
407                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
408         }
409
410         return 0;
411 }
412
413
414 static rint rpa_dbex_rh_notop(rpadbex_t *dbex, rlong rec)
415 {
416         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
417
418         if (prec->type & RPA_RECORD_START) {
419                 rpa_compiler_notexp_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
420
421         } else if (prec->type & RPA_RECORD_END) {
422                 rpa_compiler_notexp_end(dbex->co);
423                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
424         }
425
426         return 0;
427 }
428
429
430 static rint rpa_dbex_rh_range(rpadbex_t *dbex, rlong rec)
431 {
432         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
433
434         if (prec->type & RPA_RECORD_START) {
435                 dbex->co->currange.p1 = 0;
436                 dbex->co->currange.p2 = 0;
437         } else if (prec->type & RPA_RECORD_END) {
438                 if (dbex->co->currange.p1 < dbex->co->currange.p2)
439                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
440                 else
441                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
442                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
443         }
444
445         return 0;
446 }
447
448
449 static rint rpa_dbex_rh_numrange(rpadbex_t *dbex, rlong rec)
450 {
451         rparecord_t *prec = (rparecord_t *) rpa_dbex_record(dbex, rec);
452
453         if (!prec)
454                 return -1;
455
456         if (prec->type & RPA_RECORD_START) {
457                 rparecord_t *child;
458                 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
459                 if (rpa_record2long(child, &dbex->co->currange.p1) < 0)
460                         return -1;
461                 child = rpa_dbex_record(dbex, rpa_recordtree_lastchild(dbex->records, rec, RPA_RECORD_END));
462                 if (rpa_record2long(child, &dbex->co->currange.p2) < 0)
463                         return -1;
464         } else if (prec->type & RPA_RECORD_END) {
465                 if (dbex->co->currange.p1 < dbex->co->currange.p2)
466                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p1, dbex->co->currange.p2));
467                 else
468                         rvm_codegen_addins(dbex->co->cg, rvm_asm2(RPA_MATCHRNG_NAN, DA, XX, XX, dbex->co->currange.p2, dbex->co->currange.p1));
469                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
470         }
471
472         return 0;
473 }
474
475
476 static rint rpa_dbex_rh_clsnum(rpadbex_t *dbex, rlong rec)
477 {
478         rparecord_t *prec = (rparecord_t *) rpa_dbex_record(dbex, rec);
479
480         if (!prec)
481                 return -1;
482         if (prec->type & RPA_RECORD_START) {
483
484         } else if (prec->type & RPA_RECORD_END) {
485                 ruint32 wc;
486                 rparecord_t *child;
487                 child = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_END));
488                 if (rpa_record2long(child, &wc) < 0)
489                         return -1;
490                 rvm_codegen_addins(dbex->co->cg, rvm_asm(rpa_dbex_getmatchchr(prec->usertype & RPA_MATCH_MASK), DA, XX, XX, wc));
491                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BGRE, DA, XX, XX, 0));
492         }
493
494         return 0;
495 }
496
497
498 static rint rpa_dbex_rh_beginchar(rpadbex_t *dbex, rlong rec)
499 {
500         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
501
502         if (prec->type & RPA_RECORD_START) {
503
504         } else if (prec->type & RPA_RECORD_END) {
505                 ruint32 wc = 0;
506                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
507
508                         return -1;
509                 }
510                 dbex->co->currange.p1 = wc;
511         }
512
513         return 0;
514 }
515
516
517 static rint rpa_dbex_rh_endchar(rpadbex_t *dbex, rlong rec)
518 {
519         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
520
521         if (prec->type & RPA_RECORD_START) {
522
523         } else if (prec->type & RPA_RECORD_END) {
524                 ruint32 wc = 0;
525                 if (r_utf8_mbtowc(&wc, (const ruchar*) prec->input, (const ruchar*)prec->input + prec->inputsiz) < 0) {
526
527                         return -1;
528                 }
529                 dbex->co->currange.p2 = wc;
530         }
531
532         return 0;
533 }
534
535
536 static rint rpa_dbex_rh_branch(rpadbex_t *dbex, rlong rec)
537 {
538         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
539
540         if (prec->type & RPA_RECORD_START) {
541                 if (prec->usertype & RPA_NONLOOP_PATH) {
542                         rpa_compiler_nonloopybranch_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
543                 } else {
544                         rpa_compiler_branch_begin(dbex->co, prec->usertype & RPA_MATCH_MASK);
545                 }
546         } else if (prec->type & RPA_RECORD_END) {
547
548                 if (prec->usertype & RPA_NONLOOP_PATH) {
549                         rpa_compiler_nonloopybranch_end(dbex->co);
550                 } else {
551                         rpa_compiler_branch_end(dbex->co);
552                 }
553
554         }
555
556         return 0;
557 }
558
559
560 static void rpa_dbex_rh_loopref(rpadbex_t *dbex, rparecord_t *prec)
561 {
562         /*
563          * We ignore, it doesn't make sense for loops:
564          * RPA_MATCH_MULTIPLE
565          */
566         rpa_compiler_exp_begin(dbex->co, (prec->usertype & RPA_MATCH_OPTIONAL));
567         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_CMP, R_LOO, DA, XX, 0));
568         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_BGRE, DA, XX, XX, 3));
569         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, DA, XX, -1));
570         rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
571         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_ADD, R_TOP, R_TOP, R_LOO, 0));
572         rvm_codegen_addins(dbex->co->cg, rvm_asm(RVM_MOVS, R0, R_LOO, XX, 0));
573         rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
574         rpa_compiler_exp_end(dbex->co);
575 //      rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
576
577 }
578
579
580 static rint rpa_dbex_rh_aref(rpadbex_t *dbex, rlong rec)
581 {
582         const rchar *name = NULL;
583         rsize_t namesize;
584         rpa_ruleinfo_t *info;
585         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
586
587         if (prec->type & RPA_RECORD_START) {
588                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) < 0) {
589
590                         return -1;
591                 }
592
593                 if ((prec->usertype & RPA_LOOP_PATH) && rpa_parseinfo_loopdetect(dbex, rec, rpa_dbex_firstinlined(dbex))) {
594                         info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
595                         if (!info) {
596                                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVED_SYMBOL);
597                                 RPA_DBEX_SETERRINFO_NAME(dbex, name, namesize);
598                                 return -1;
599                         }
600                         if (rpa_dbex_findinlined(dbex, info->startrec)) {
601                                 rpa_dbex_rh_loopref(dbex, prec);
602                         } else {
603                                 if (prec->usertype & RPA_MATCH_OPTIONAL) {
604                                         /*
605                                          * Most probably this is useless case - loop refs shouldn't have quantitative modifiers
606                                          * but in case they do we wrap the inlined production rule in quantitative expression.
607                                          * The inlined named rule can take the quantitative argument, but I just don't have
608                                          * a clean way to pass it from here - so, lets play the records inside an expression that
609                                          * has the right quantitative argument.
610                                          * We ignore, it doesn't make sense for loops:
611                                          * RPA_MATCH_MULTIPLE
612                                          */
613                                         rpa_compiler_exp_begin(dbex->co, RPA_MATCH_OPTIONAL);
614                                         rpa_dbex_play_recordhandlers(dbex, info->startrec, info->sizerecs);
615                                         rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
616                                         rpa_compiler_exp_end(dbex->co);
617                                 } else {
618                                         rpa_dbex_play_recordhandlers(dbex, info->startrec, info->sizerecs);
619                                 }
620                         }
621                 } else {
622                         rpa_compiler_reference(dbex->co, name, namesize, (prec->usertype & RPA_MATCH_MASK));
623                 }
624                 rvm_codegen_index_addrelocins(dbex->co->cg, RVM_RELOC_BRANCH, RPA_COMPILER_CURRENTEXP(dbex->co)->endidx, rvm_asm(RVM_BLES, DA, XX, XX, 0));
625
626         } else if (prec->type & RPA_RECORD_END) {
627
628         }
629         return 0;
630 }
631
632
633 rpadbex_t *rpa_dbex_create(void)
634 {
635         rpadbex_t *dbex = (rpadbex_t *) r_zmalloc(sizeof(*dbex));
636
637         dbex->co = rpa_compiler_create();
638         dbex->pa = rpa_parser_create();
639         dbex->records = r_array_create(sizeof(rparecord_t));
640         dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
641         dbex->recstack = r_array_create(sizeof(rulong));
642         dbex->inlinestack = r_array_create(sizeof(rulong));
643         dbex->handlers = r_zmalloc(sizeof(rpa_dbex_recordhandler) * RPA_PRODUCTION_COUNT);
644         rpa_dbex_cfgset(dbex, RPA_DBEXCFG_OPTIMIZATIONS, 1);
645
646         dbex->handlers[RPA_PRODUCTION_NAMEDRULE] = rpa_dbex_rh_namedrule;
647         dbex->handlers[RPA_PRODUCTION_ANONYMOUSRULE] = rpa_dbex_rh_anonymousrule;
648         dbex->handlers[RPA_PRODUCTION_CLS] = rpa_dbex_rh_cls;
649         dbex->handlers[RPA_PRODUCTION_CHAR] = rpa_dbex_rh_char;
650         dbex->handlers[RPA_PRODUCTION_SPECIALCHAR] = rpa_dbex_rh_specialchar;
651         dbex->handlers[RPA_PRODUCTION_CLSCHAR] = rpa_dbex_rh_clschar;
652         dbex->handlers[RPA_PRODUCTION_AREF] = rpa_dbex_rh_aref;
653         dbex->handlers[RPA_PRODUCTION_CREF] = rpa_dbex_rh_aref;
654         dbex->handlers[RPA_PRODUCTION_BRACKETEXP] = rpa_dbex_rh_exp;
655         dbex->handlers[RPA_PRODUCTION_OROP] = rpa_dbex_rh_orop;
656         dbex->handlers[RPA_PRODUCTION_NOTOP] = rpa_dbex_rh_notop;
657         dbex->handlers[RPA_PRODUCTION_ALTBRANCH] = rpa_dbex_rh_branch;
658         dbex->handlers[RPA_PRODUCTION_NEGBRANCH] = rpa_dbex_rh_branch;
659         dbex->handlers[RPA_PRODUCTION_CHARRNG] = rpa_dbex_rh_range;
660         dbex->handlers[RPA_PRODUCTION_NUMRNG] = rpa_dbex_rh_numrange;
661         dbex->handlers[RPA_PRODUCTION_CLSNUM] = rpa_dbex_rh_clsnum;
662         dbex->handlers[RPA_PRODUCTION_BEGINCHAR] = rpa_dbex_rh_beginchar;
663         dbex->handlers[RPA_PRODUCTION_ENDCHAR] = rpa_dbex_rh_endchar;
664         dbex->handlers[RPA_PRODUCTION_NOROP] = rpa_dbex_rh_norop;
665         dbex->handlers[RPA_PRODUCTION_REQOP] = rpa_dbex_rh_exp;
666         dbex->handlers[RPA_PRODUCTION_MINOP] = rpa_dbex_rh_exp;
667         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMIT] = rpa_dbex_rh_emit;
668         dbex->handlers[RPA_PRODUCTION_DIRECTIVENOEMIT] = rpa_dbex_rh_noemit;
669         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITALL] = rpa_dbex_rh_emitall;
670         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITNONE] = rpa_dbex_rh_emitnone;
671         dbex->handlers[RPA_PRODUCTION_DIRECTIVEEMITID] = rpa_dbex_rh_uid;
672
673         return dbex;
674 }
675
676
677 void rpa_dbex_destroy(rpadbex_t *dbex)
678 {
679         if (dbex) {
680                 rpa_compiler_destroy(dbex->co);
681                 rpa_parser_destroy(dbex->pa);
682                 r_harray_destroy(dbex->rules);
683                 r_array_destroy(dbex->records);
684                 r_array_destroy(dbex->recstack);
685                 r_array_destroy(dbex->inlinestack);
686                 r_free(dbex->handlers);
687                 r_free(dbex);
688         }
689 }
690
691
692 static rint rpa_parseinfo_loopdetect_do(rpadbex_t *dbex, rlong parent, rlong loopto, rint inderction)
693 {
694         rsize_t namesiz;
695         const rchar *name;
696         rlong i;
697         rint ret = 0;
698         rparecord_t *prec;
699
700         if (parent == loopto && inderction > 0)
701                 return 1;
702         for (i = 0; i < r_array_length(dbex->recstack); i++) {
703                 if (parent == r_array_index(dbex->recstack, i, rlong))
704                         return 0;
705         }
706         r_array_add(dbex->recstack, &parent);
707
708         if (!(prec = (rparecord_t *)r_array_slot(dbex->records, parent)))
709                 return 0;
710         if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)
711                 i = parent;
712         else
713                 i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START);
714         for (; i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
715                 prec = (rparecord_t *)r_array_slot(dbex->records, i);
716                 if (prec->ruleuid == RPA_PRODUCTION_RULENAME)
717                         continue;
718                 if (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF) {
719                         rpa_ruleinfo_t *info;
720                         if ((inderction > 0 || i != parent) && i == loopto) {
721                                 /*
722                                  * We found what we are looking for
723                                  */
724                                 ret = 1;
725                                 break;
726                         }
727                         if (rpa_dbex_rulename(dbex, i, &name, &namesiz) < 0)
728                                 R_ASSERT(0);
729                         info = (rpa_ruleinfo_t *) r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesiz));
730                         if (!info)
731                                 continue;
732                         if ((ret = rpa_parseinfo_loopdetect_do(dbex, info->startrec, loopto, inderction + 1)) > 0)
733                                 break;
734                 } else {
735                         if ((ret = rpa_parseinfo_loopdetect_do(dbex, i, loopto, inderction + 1)) > 0)
736                                 break;
737                 }
738
739                 if ((prec->usertype & RPA_MATCH_OPTIONAL) == 0 && (prec->ruleuid == RPA_PRODUCTION_CREF || prec->ruleuid == RPA_PRODUCTION_AREF ||
740                                 prec->ruleuid == RPA_PRODUCTION_CHAR || prec->ruleuid == RPA_PRODUCTION_CLS || prec->ruleuid == RPA_PRODUCTION_SPECIALCHAR))
741                         break;
742
743         }
744
745         r_array_removelast(dbex->recstack);
746         return ret;
747 }
748
749
750 static rint rpa_parseinfo_loopdetect(rpadbex_t *dbex, rlong parent, rlong loopto)
751 {
752         if (parent != loopto) {
753                 /*
754                  * Make sure we are dealing with a loop first
755                  */
756                 if (!rpa_parseinfo_loopdetect_do(dbex, loopto, parent, 0))
757                         return 0;
758         }
759
760         return (rpa_parseinfo_loopdetect_do(dbex, parent, loopto, 0)) ? 1 : 0;
761 }
762
763
764 static void rpa_parseinfo_marklooppath(rpadbex_t *dbex, rlong parent)
765 {
766         rlong i;
767
768         if (rpa_parseinfo_loopdetect(dbex, parent, parent) > 0) {
769                 rpa_record_setusertype(dbex->records, parent, RPA_LOOP_PATH, RVALSET_OR);
770                 for (i = rpa_recordtree_firstchild(dbex->records, parent, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(dbex->records, i, RPA_RECORD_START)) {
771                         rpa_parseinfo_marklooppath(dbex, i);
772                 }
773         }
774 }
775
776
777 static rint rpa_parseinfo_rule_checkforloop(rpadbex_t *dbex, const char *name, rsize_t namesize, rlong loopto)
778 {
779         rpa_ruleinfo_t *info = info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
780
781         if (!info)
782                 return 0;
783         return rpa_parseinfo_loopdetect(dbex, info->startrec, loopto);
784 }
785
786
787 static void rpa_dbex_buildloopinfo(rpadbex_t *dbex)
788 {
789         ruint i, p;
790         rharray_t *rules = dbex->rules;
791         rpa_ruleinfo_t *info;
792
793         for (i = 0; i < r_array_length(rules->members); i++) {
794                 if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, i)) != NULL)
795                         rpa_parseinfo_marklooppath(dbex, info->startrec);
796         }
797
798         /*
799          * Mark the non-loop branches.
800          */
801         for (i = 0; i < r_array_length(dbex->records); i++) {
802                 rparecord_t *prec = (rparecord_t *)r_array_slot(dbex->records, i);
803                 if (prec->type == RPA_RECORD_START &&
804                         (prec->ruleuid == RPA_PRODUCTION_ALTBRANCH) &&
805                         (prec->usertype & RPA_LOOP_PATH) == 0) {
806                         p = rpa_recordtree_parent(dbex->records, i, RPA_RECORD_START);
807                         if (p >= 0) {
808                                 prec = (rparecord_t *)r_array_slot(dbex->records, p);
809                                 if (prec && (prec->usertype & RPA_LOOP_PATH))
810                                         rpa_record_setusertype(dbex->records, i, RPA_NONLOOP_PATH, RVALSET_OR);
811                         }
812                 }
813         }
814 }
815
816
817 static void rpa_dbex_buildruleinfo(rpadbex_t *dbex)
818 {
819         rparecord_t *rec;
820         rpa_ruleinfo_t info;
821         ruint nrecords;
822         rlong i;
823         const rchar *name = NULL;
824         rsize_t namesize = 0;
825
826         if (dbex->rules) {
827                 r_object_destroy((robject_t *)dbex->rules);
828                 dbex->rules = NULL;
829         }
830         dbex->rules = r_harray_create(sizeof(rpa_ruleinfo_t));
831
832         for (i = 0, nrecords = r_array_length(dbex->records); i < nrecords; i++) {
833                 if (!(rec = rpa_dbex_record(dbex, i)))
834                         continue;
835                 if ((rec->ruleuid == RPA_PRODUCTION_NAMEDRULE) && (rec->type & RPA_RECORD_START)) {
836                         r_memset(&info, 0, sizeof(info));
837                         info.type = RPA_RULEINFO_NAMEDRULE;
838                         info.startrec = i;
839                         info.sizerecs = rpa_recordtree_size(dbex->records, i);
840                         if (info.sizerecs < 0)
841                                 continue;
842                         if (rpa_dbex_rulename(dbex, i, &name, &namesize) < 0) {
843                                 continue;
844                         }
845                         r_harray_add(dbex->rules, name, namesize, &info);
846                         i += info.sizerecs - 1;
847                 } else if ((rec->ruleuid == RPA_PRODUCTION_ANONYMOUSRULE) && (rec->type & RPA_RECORD_START)) {
848                         r_memset(&info, 0, sizeof(info));
849                         info.type = RPA_RULEINFO_ANONYMOUSRULE;
850                         info.startrec = i;
851                         info.sizerecs = rpa_recordtree_size(dbex->records, i);
852                         if (info.sizerecs < 0)
853                                 continue;
854                         if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
855                                 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
856                         i += info.sizerecs - 1;
857                 } else if ((rec->type & RPA_RECORD_START) && (rec->ruleuid >= RPA_PRODUCTION_DIRECTIVEEMIT) && (rec->ruleuid <= RPA_PRODUCTION_DIRECTIVEEMITID)) {
858                         r_memset(&info, 0, sizeof(info));
859                         info.type = RPA_RULEINFO_DIRECTIVE;
860                         info.startrec = i;
861                         info.sizerecs = rpa_recordtree_size(dbex->records, i);
862                         if (info.sizerecs < 0)
863                                 continue;
864                         if ((rec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))))
865                                 r_harray_add(dbex->rules, rec->input, rec->inputsiz, &info);
866                         i += info.sizerecs - 1;
867                 }
868
869         }
870 }
871
872
873 static rlong rpa_dbex_copy_handler(rarray_t *records, rlong rec, rpointer userdata)
874 {
875         rpadbex_t *dbex = (rpadbex_t *)userdata;
876         rlong index;
877
878         rparecord_t *prec = (rparecord_t *)r_array_slot(records, rec);
879         if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & RPA_RECORD_START)) {
880                 /*
881                  * Ignore it
882                  */
883         } else if (prec->ruleuid == RPA_PRODUCTION_OCCURENCE && (prec->type & (RPA_RECORD_MATCH | RPA_RECORD_END))) {
884                 ruint32 usertype = RPA_MATCH_NONE;
885                 rlong lastrec = 0;
886                 /*
887                  * Don't copy it but set the usertype of the previous record accordingly.
888                  */
889                 switch (*prec->input) {
890                 case '?':
891                         usertype = RPA_MATCH_OPTIONAL;
892                         break;
893                 case '+':
894                         usertype = RPA_MATCH_MULTIPLE;
895                         break;
896                 case '*':
897                         usertype = RPA_MATCH_MULTIOPT;
898                         break;
899                 default:
900                         usertype = RPA_MATCH_NONE;
901                 };
902                 lastrec = r_array_length(dbex->records) - 1;
903                 if (lastrec >= 0)
904                         rpa_record_setusertype(dbex->records, lastrec, usertype, RVALSET_OR);
905         } else if (prec->ruleuid != RPA_RECORD_INVALID_UID) {
906                 index = r_array_add(dbex->records, prec);
907                 /*
908                  * Optimizations. Lets apply the optimizations while we copy the records.
909                  * This is probably not the most clean way to apply optimizations, in the future
910                  * we should probably think of optimization pass right before compiling.
911                  */
912                 if (dbex->optimizations) {
913                         if (prec->ruleuid == RPA_PRODUCTION_OROP && (prec->type & RPA_RECORD_END)) {
914                                 rpa_optimiztion_orop(dbex->records, rpa_recordtree_get(dbex->records, index, RPA_RECORD_START));
915                         }
916                 }
917         }
918
919         return 0;
920 }
921
922
923 static void rpa_dbex_copyrecords(rpadbex_t *dbex, rarray_t *records)
924 {
925         rint i;
926         for (i = rpa_recordtree_get(records, 0, RPA_RECORD_START); i >= 0; i = rpa_recordtree_next(records, i, RPA_RECORD_START))
927                 rpa_recordtree_walk(records, i, 0, rpa_dbex_copy_handler, dbex);
928 }
929
930
931 static rparecord_t *rpa_dbex_record(rpadbex_t *dbex, rlong rec)
932 {
933         rparecord_t *prec;
934
935         if (!dbex || !dbex->rules)
936                 return NULL;
937         if (rec < 0 || rec >= r_array_length(dbex->records))
938                 return NULL;
939         prec = (rparecord_t *)r_array_slot(dbex->records, rec);
940         return prec;
941
942 }
943
944
945 static rparecord_t *rpa_dbex_rulerecord(rpadbex_t *dbex, rparule_t rid)
946 {
947         rparecord_t *prec;
948         rpa_ruleinfo_t *info;
949         rlong rec;
950
951         if (!dbex || !dbex->rules)
952                 return NULL;
953         info = r_harray_get(dbex->rules, rid);
954         if (!info)
955                 return NULL;
956         rec = info->startrec + info->sizerecs - 1;
957         if (rec < 0 || rec >= r_array_length(dbex->records))
958                 return NULL;
959         prec = (rparecord_t *)r_array_slot(dbex->records, rec);
960         return prec;
961 }
962
963
964 static rint rpa_dbex_rulename(rpadbex_t *dbex, rlong rec, const rchar **name, rsize_t *namesize)
965 {
966         rparecord_t *pnamerec = rpa_dbex_record(dbex, rpa_recordtree_firstchild(dbex->records, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_START), RPA_RECORD_END));
967         if (!pnamerec || !(pnamerec->ruleuid & RPA_PRODUCTION_RULENAME))
968                 return -1;
969         *name = pnamerec->input;
970         *namesize = pnamerec->inputsiz;
971         return 0;
972 }
973
974
975 rint rpa_dbex_open(rpadbex_t *dbex)
976 {
977         if (!dbex)
978                 return -1;
979         if (dbex->rules) {
980                 r_object_destroy((robject_t *)dbex->rules);
981                 dbex->rules = NULL;
982         }
983         return 0;
984 }
985
986
987 void rpa_dbex_close(rpadbex_t *dbex)
988 {
989         if (!dbex)
990                 return;
991         rpa_dbex_buildruleinfo(dbex);
992         rpa_dbex_buildloopinfo(dbex);
993 }
994
995
996 rlong rpa_dbex_load(rpadbex_t *dbex, const rchar *rules, rsize_t size)
997 {
998         rlong ret;
999
1000         if (!dbex)
1001                 return -1;
1002         if (dbex->rules) {
1003                 /*
1004                  * Dbex is not open
1005                  */
1006                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTOPEN);
1007                 return -1;
1008         }
1009         if ((ret = rpa_parser_load(dbex->pa, rules, size)) < 0) {
1010
1011                 return -1;
1012         }
1013         if (ret != size) {
1014                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_SYNTAX_ERROR);
1015                 RPA_DBEX_SETERRINFO_OFFSET(dbex, ret);
1016                 return -1;
1017         }
1018         rpa_dbex_copyrecords(dbex, dbex->pa->stat->records);
1019         return ret;
1020 }
1021
1022
1023 rlong rpa_dbex_load_s(rpadbex_t *dbex, const rchar *rules)
1024 {
1025         return rpa_dbex_load(dbex, rules, r_strlen(rules));
1026 }
1027
1028
1029 void rpa_dbex_dumpindented(rpadbex_t *dbex, rlong rec, rint level, const rchar *rulelabel)
1030 {
1031         rchar buffer[1024];
1032         rint i, size;
1033         rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1034
1035         if (!prec)
1036                 return;
1037         r_memset(buffer, 0, sizeof(buffer));
1038         for (i = 0; i < level + 1; i++)
1039                 r_printf("   ");
1040         r_printf("(");
1041         r_printf("%s, %c, %c", rulelabel, rpa_record_optchar(prec, 'x'), rpa_record_loopchar(prec, 'x'));
1042         r_printf(")");
1043         size = R_MIN(prec->inputsiz, sizeof(buffer) - 1);
1044         r_strncpy(buffer, prec->input, size);
1045
1046         if (size == (sizeof(buffer) - 1))
1047                 r_printf(" %s ...\n", buffer);
1048         else
1049                 r_printf(" %s\n", buffer);
1050         return;
1051 }
1052
1053
1054 static rlong rpa_dbex_firstinlined(rpadbex_t *dbex)
1055 {
1056         rlong ret = r_array_empty(dbex->inlinestack) ? -1 : r_array_index(dbex->inlinestack, 0, rlong);
1057         return ret;
1058 }
1059
1060
1061 static rint rpa_dbex_findinlined(rpadbex_t *dbex, rlong startrec)
1062 {
1063         rlong i;
1064         for (i = 0; i < r_array_length(dbex->inlinestack); i++) {
1065                 if (r_array_index(dbex->inlinestack, i, rlong) == startrec)
1066                         return 1;
1067         }
1068         return 0;
1069 }
1070
1071
1072 static void rpa_dbex_dumptree_do(rpadbex_t *dbex, rlong rec, rint level)
1073 {
1074         rparecord_t *prec = rpa_dbex_record(dbex, rec);
1075         if (prec && prec->ruleuid == RPA_PRODUCTION_RULENAME)
1076                 return;
1077         if (prec && (prec->ruleuid == RPA_PRODUCTION_AREF || prec->ruleuid == RPA_PRODUCTION_CREF)) {
1078                 const rchar *name = NULL;
1079                 rsize_t namesize = 0;
1080                 rint loop = 0;
1081                 rpa_ruleinfo_t *info;
1082
1083                 if (rpa_dbex_rulename(dbex, rec, &name, &namesize) >= 0) {
1084                         loop = rpa_parseinfo_rule_checkforloop(dbex, name, namesize, rpa_dbex_firstinlined(dbex));
1085                         info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rpa_dbex_lookup(dbex, name, namesize));
1086                         if (loop && info){
1087                                 if (!rpa_dbex_findinlined(dbex, info->startrec)) {
1088                                         /*
1089                                          * Temporary set the quantitative flags for the inlined rule to the parent
1090                                          * reference, so they are printed correctly. After the printing is done
1091                                          * restore the original flags.
1092                                          */
1093                                         rparecord_t *prulestart = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_START));
1094                                         rparecord_t *pruleend = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1095                                         rulong optional = (prulestart->usertype & RPA_MATCH_OPTIONAL);
1096                                         prulestart->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1097                                         pruleend->usertype |= (prec->usertype & RPA_MATCH_OPTIONAL);
1098                                         r_array_add(dbex->inlinestack, &info->startrec);
1099                                         rpa_dbex_dumptree_do(dbex, info->startrec, level);
1100                                         r_array_removelast(dbex->inlinestack);
1101                                         if (!optional) {
1102                                                 prulestart->usertype &= ~RPA_MATCH_OPTIONAL;
1103                                                 pruleend->usertype &= ~RPA_MATCH_OPTIONAL;
1104                                         }
1105                                 } else {
1106                                         rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, "loopref");
1107                                 }
1108                                 return;
1109                         }
1110                 }
1111         }
1112         rpa_dbex_dumpindented(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END), level, prec->rule);
1113         for (rec = rpa_recordtree_firstchild(dbex->records, rec, RPA_RECORD_START); rec >= 0; rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START)) {
1114                 rpa_dbex_dumptree_do(dbex, rec, level + 1);
1115         }
1116 }
1117
1118
1119 rint rpa_dbex_dumptree(rpadbex_t *dbex, rparule_t rid)
1120 {
1121         rpa_ruleinfo_t *info;
1122
1123         if (!dbex)
1124                 return -1;
1125         if (rid < 0) {
1126                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_PARAM);
1127                 return -1;
1128         }
1129         if (!dbex->rules) {
1130                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1131                 return -1;
1132         }
1133         if (!(info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid))) {
1134                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1135                 return -1;
1136         }
1137         r_array_add(dbex->inlinestack, &info->startrec);
1138         rpa_dbex_dumptree_do(dbex, info->startrec, 0);
1139         r_array_removelast(dbex->inlinestack);
1140         return 0;
1141 }
1142
1143
1144 rint rpa_dbex_dumpproductions(rpadbex_t *dbex)
1145 {
1146         rint ret = 0;
1147         rparule_t rid;
1148         rchar buffer[512];
1149
1150         if (!dbex)
1151                 return -1;
1152         if (!dbex->rules) {
1153                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1154                 return -1;
1155         }
1156         for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1157                 ret = rpa_dbex_strncpy(dbex, buffer, rid, sizeof(buffer));
1158                 if ( ret >= 0) {
1159                         if (ret == sizeof(buffer))
1160                                 r_printf("   %s ...\n", buffer);
1161                         else
1162                                 r_printf("   %s\n", buffer);
1163                 }
1164
1165         }
1166         return ret;
1167 }
1168
1169
1170 rint rpa_dbex_dumprecords(rpadbex_t *dbex)
1171 {
1172         rlong i;
1173
1174         if (!dbex)
1175                 return -1;
1176         if (!dbex->rules) {
1177                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1178                 return -1;
1179         }
1180         for (i = 0; i < r_array_length(dbex->records); i++) {
1181                 rpa_record_dump(dbex->records, i);
1182         }
1183         return 0;
1184 }
1185
1186
1187 rint rpa_dbex_dumpinfo(rpadbex_t *dbex)
1188 {
1189         rlong i;
1190         rpa_ruleinfo_t *info;
1191
1192         if (!dbex)
1193                 return -1;
1194         if (!dbex->rules) {
1195                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1196                 return -1;
1197         }
1198         for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1199                 rstr_t *name = r_array_index(dbex->rules->names, i, rstr_t*);
1200                 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1201                 switch (info->type) {
1202                 case RPA_RULEINFO_NAMEDRULE:
1203                         r_printf("N ");
1204                         break;
1205                 case RPA_RULEINFO_ANONYMOUSRULE:
1206                         r_printf("A ");
1207                         break;
1208                 case RPA_RULEINFO_DIRECTIVE:
1209                         r_printf("D ");
1210                         break;
1211                 default:
1212                         r_printf("  ");
1213                         break;
1214                 };
1215                 r_printf("(%7d, %4d, code: %7ld, %5ld) : %s\n", info->startrec, info->sizerecs, info->codeoff, info->codesiz, name->str);
1216         }
1217         return 0;
1218 }
1219
1220
1221 rint rpa_dbex_dumpuids(rpadbex_t *dbex)
1222 {
1223         rlong i;
1224         rlong rec;
1225         rpa_ruleinfo_t *info;
1226         rchar *buffer = r_zmalloc(32 * sizeof(rchar));
1227
1228         if (!dbex)
1229                 return -1;
1230         if (!dbex->rules) {
1231                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1232                 return -1;
1233         }
1234         for (i = 0; i < r_array_length(dbex->rules->names); i++) {
1235                 info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, i);
1236                 if (info->type == RPA_RULEINFO_DIRECTIVE) {
1237                         rparecord_t *prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, info->startrec, RPA_RECORD_END));
1238                         if (prec->ruleuid == RPA_PRODUCTION_DIRECTIVEEMITID && prec->inputsiz) {
1239                                 rec = rpa_recordtree_firstchild(dbex->records, info->startrec, RPA_RECORD_START);
1240                                 while (rec >= 0) {
1241                                         prec = rpa_dbex_record(dbex, rpa_recordtree_get(dbex->records, rec, RPA_RECORD_END));
1242                                         if (prec->ruleuid == RPA_PRODUCTION_ALIASNAME) {
1243                                                 ruint32 dec;
1244                                                 if (rpa_record2long(rpa_dbex_record(dbex, rpa_recordtree_next(dbex->records, rec, RPA_RECORD_END)), &dec) < 0)
1245                                                         break;
1246                                                 buffer = r_realloc(buffer, prec->inputsiz + 1);
1247                                                 r_memset(buffer, 0, prec->inputsiz + 1);
1248                                                 r_memcpy(buffer, prec->input, prec->inputsiz);
1249                                                 r_printf("#define %s %d\n", buffer, dec);
1250                                                 break;
1251                                         }
1252                                         rec = rpa_recordtree_next(dbex->records, rec, RPA_RECORD_START);
1253                                 }
1254                         }
1255                 }
1256         }
1257         r_free(buffer);
1258         return 0;
1259 }
1260
1261
1262 rint rpa_dbex_dumpcode(rpadbex_t* dbex, rparule_t rid)
1263 {
1264         rpa_ruleinfo_t *info;
1265         if (!dbex)
1266                 return -1;
1267         if (rid < 0) {
1268                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_PARAM);
1269                 return -1;
1270         }
1271         if (!dbex->rules) {
1272                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1273                 return -1;
1274         }
1275         info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1276         if (!info)
1277                 return -1;
1278         rvm_asm_dump(rvm_codegen_getcode(dbex->co->cg, info->codeoff), info->codesiz);
1279         return 0;
1280 }
1281
1282
1283 rsize_t rpa_dbex_strlen(rpadbex_t *dbex, rparule_t rid)
1284 {
1285         rparecord_t *prec;
1286         rsize_t size;
1287
1288         if (!dbex)
1289                 return -1;
1290         if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1291                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1292                 return -1;
1293         }
1294         size = prec->inputsiz;
1295         return size;
1296 }
1297
1298
1299 rsize_t rpa_dbex_strncpy(rpadbex_t *dbex, rchar *dst, rparule_t rid, rsize_t n)
1300 {
1301         rparecord_t *prec;
1302         rsize_t size;
1303
1304         if (!dbex)
1305                 return -1;
1306         if ((prec = rpa_dbex_rulerecord(dbex, rid)) == NULL) {
1307                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1308                 return -1;
1309         }
1310         size = prec->inputsiz;
1311         if (n <= size)
1312                 size = n - 1;
1313         r_memset(dst, 0, n);
1314         r_strncpy(dst, prec->input, size);
1315         return size + 1;
1316 }
1317
1318
1319 rparule_t rpa_dbex_first(rpadbex_t *dbex)
1320 {
1321         if (!dbex || !dbex->rules)
1322                 return -1;
1323
1324         if (r_array_length(dbex->rules->members) > 0)
1325                 return 0;
1326         return -1;
1327 }
1328
1329
1330 rparule_t rpa_dbex_last(rpadbex_t *dbex)
1331 {
1332         if (!dbex || !dbex->rules)
1333                 return -1;
1334
1335         if (r_array_length(dbex->rules->members) > 0)
1336                 return r_array_length(dbex->rules->members) - 1;
1337         return -1;
1338 }
1339
1340
1341 rparule_t rpa_dbex_lookup(rpadbex_t *dbex, const rchar *name, rsize_t namesize)
1342 {
1343         if (!dbex) {
1344                 return -1;
1345         }
1346
1347         return r_harray_taillookup(dbex->rules, name, namesize);
1348 }
1349
1350
1351 rparule_t rpa_dbex_lookup_s(rpadbex_t *dbex, const rchar *name)
1352 {
1353         return rpa_dbex_lookup(dbex, name, r_strlen(name));
1354 }
1355
1356
1357 rparule_t rpa_dbex_next(rpadbex_t *dbex, rparule_t rid)
1358 {
1359         if (!dbex || !dbex->rules)
1360                 return -1;
1361         ++rid;
1362         if (rid < r_array_length(dbex->rules->members))
1363                 return rid;
1364         return -1;
1365 }
1366
1367
1368 rparule_t rpa_dbex_prev(rpadbex_t *dbex, rparule_t rid)
1369 {
1370         if (!dbex || !dbex->rules)
1371                 return -1;
1372         --rid;
1373         if (rid >= 0)
1374                 return rid;
1375         return -1;
1376 }
1377
1378
1379 rlong rpa_dbex_lasterror(rpadbex_t *dbex)
1380 {
1381         if (!dbex)
1382                 return -1;
1383         return dbex->err.code;
1384 }
1385
1386
1387 rlong rpa_dbex_lasterrorinfo(rpadbex_t *dbex, rpa_errinfo_t *errinfo)
1388 {
1389         if (!dbex || !errinfo)
1390                 return -1;
1391         r_memcpy(errinfo, &dbex->err, sizeof(rpa_errinfo_t));
1392         return 0;
1393 }
1394
1395
1396 const rchar *rpa_dbex_version()
1397 {
1398         return "2.0";
1399 }
1400
1401
1402 static rlong rpa_dbex_play_recordhandler(rpadbex_t *dbex, rlong rec)
1403 {
1404         rparecord_t *prec = (rparecord_t *) r_array_slot(dbex->records, rec);
1405         rpa_dbex_recordhandler handler = dbex->handlers[prec->ruleuid];
1406         if (handler) {
1407                 if (handler(dbex, rec) < 0)
1408                         return -1;
1409         }
1410         return 0;
1411 }
1412
1413
1414 static rlong rpa_dbex_play_recordhandlers(rpadbex_t *dbex, rlong rec, rlong nrecs)
1415 {
1416         rparecord_t *prec;
1417         rlong i, res = 0;
1418
1419         for (i = rec; i < rec + nrecs; i++) {
1420                 prec = (rparecord_t *) r_array_slot(dbex->records, i);
1421
1422                 if (prec->ruleuid == RPA_PRODUCTION_MINOP && (prec->type & RPA_RECORD_START)) {
1423                         rlong lastchild = rpa_recordtree_lastchild(dbex->records, i, RPA_RECORD_START);
1424                         rlong firstchild = rpa_recordtree_firstchild(dbex->records, i, RPA_RECORD_START);
1425                         if (firstchild < 0 || lastchild < 0 || firstchild == lastchild)
1426                                 return -1;
1427                         if ((res = rpa_dbex_play_recordhandler(dbex, i)) < 0)
1428                                 return -1;
1429                         if ((res = rpa_dbex_play_recordhandlers(dbex, lastchild, rpa_recordtree_size(dbex->records, lastchild))) < 0)
1430                                 return -1;
1431                         if ((res = rpa_dbex_play_recordhandlers(dbex, firstchild, lastchild - firstchild)) < 0)
1432                                 return -1;
1433                         if ((res = rpa_dbex_play_recordhandler(dbex, rpa_recordtree_get(dbex->records, i, RPA_RECORD_END))) < 0)
1434                                 return -1;
1435                         i += rpa_recordtree_size(dbex->records, i) - 1;
1436                         continue;
1437                 }
1438
1439                 if (rpa_dbex_play_recordhandler(dbex, i) < 0)
1440                         return -1;
1441         }
1442
1443         return i;
1444 }
1445
1446
1447 static rint rpa_dbex_compile_rule(rpadbex_t *dbex, rparule_t rid)
1448 {
1449         rlong codeoff;
1450         rpa_ruleinfo_t *info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1451
1452         if (!info)
1453                 return -1;
1454         codeoff = rvm_codegen_getcodesize(dbex->co->cg);
1455         if (rpa_dbex_play_recordhandlers(dbex, info->startrec, info->sizerecs) < 0)
1456                 return -1;
1457         info->codeoff = codeoff;
1458         info->codesiz = rvm_codegen_getcodesize(dbex->co->cg) - codeoff;
1459
1460         return 0;
1461 }
1462
1463
1464 rint rpa_dbex_compile(rpadbex_t *dbex)
1465 {
1466         rparule_t rid;
1467         rvm_codelabel_t *labelerr;
1468
1469         if (!dbex)
1470                 return -1;
1471         if (!dbex->rules) {
1472                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1473                 return -1;
1474         }
1475         /*
1476          * By default all production rules emit
1477          */
1478         if (dbex->co)
1479                 rpa_compiler_destroy(dbex->co);
1480         dbex->co = rpa_compiler_create();
1481         rpa_dbex_setemit(dbex, TRUE);
1482
1483         for (rid = rpa_dbex_first(dbex); rid >= 0; rid = rpa_dbex_next(dbex, rid)) {
1484                 if (rpa_dbex_compile_rule(dbex, rid) < 0) {
1485                         return -1;
1486                 }
1487         }
1488
1489         if (rvm_codegen_relocate(dbex->co->cg, &labelerr) < 0) {
1490                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_UNRESOLVED_SYMBOL);
1491                 RPA_DBEX_SETERRINFO_NAME(dbex, labelerr->name->str, labelerr->name->size);
1492                 return -1;
1493         }
1494
1495         return 0;
1496 }
1497
1498
1499 rvm_asmins_t *rpa_dbex_executable(rpadbex_t *dbex)
1500 {
1501         if (!dbex)
1502                 return NULL;
1503         if (!dbex->rules) {
1504                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1505                 return NULL;
1506         }
1507         return rvm_codegen_getcode(dbex->co->cg, 0);
1508 }
1509
1510
1511 rlong rpa_dbex_executableoffset(rpadbex_t *dbex, rparule_t rid)
1512 {
1513         rpa_ruleinfo_t *info;
1514
1515         if (!dbex)
1516                 return -1;
1517         if (!dbex->rules) {
1518                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTCLOSED);
1519                 return -1;
1520         }
1521         info = (rpa_ruleinfo_t *)r_harray_get(dbex->rules, rid);
1522         if (!info) {
1523                 RPA_DBEX_SETERRINFO_CODE(dbex, RPA_E_NOTFOUND);
1524                 return -1;
1525         }
1526         return info->codeoff;
1527 }
1528
1529
1530 rlong rpa_dbex_cfgset(rpadbex_t *dbex, rulong cfg, rulong val)
1531 {
1532         if (!dbex)
1533                 return -1;
1534         if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1535                 dbex->optimizations = val;
1536                 return 0;
1537         }
1538         return -1;
1539 }
1540
1541
1542 rlong rpa_dbex_cfgget(rpadbex_t *dbex, rulong cfg)
1543 {
1544         if (!dbex)
1545                 return -1;
1546         if (cfg == RPA_DBEXCFG_OPTIMIZATIONS) {
1547                 return dbex->optimizations;
1548         }
1549         return -1;
1550 }
1551