RPA Toolkit
af5dbc41d95c5b5d2459cfbcf6bfe227d2fecae9
[rpatk.git] / rpa / rpabitmap.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include "rpa/rpabitmap.h"
22 #include "rlib/rutf.h"
23 #include "rlib/rmem.h"
24 #include "rpa/rpaparser.h"
25 #include "rpa/rpadbexpriv.h"
26 #include "rpa/rpastatpriv.h"
27
28 static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata);
29
30 static long rpa_bitmap_set_startrec(rarray_t *records, long rec, rpabitmap_t bitmap)
31 {
32         rparecord_t *record = rpa_record_get(records, rpa_recordtree_get(records, rec, RPA_RECORD_START));
33         record->userdata = bitmap;
34
35         return 0;
36 }
37
38
39 void rpa_dbex_buildbitmapinfo_for_rule(rpadbex_t *dbex, rparule_t rid)
40 {
41         rpa_bitmapcompiler_t bc;
42         rharray_t *rules = dbex->rules;
43         rpa_ruleinfo_t *info;
44
45         r_memset(&bc, 0, sizeof(bc));
46         bc.dbex = dbex;
47         if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL) {
48                 rparecord_t *record = rpa_record_get(dbex->records, info->startrec);
49                 RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
50                 rpa_recordtree_walk(dbex->records, info->startrec, 0, rpa_bitmap_set, (rpointer)&bc);
51         }
52
53 }
54
55
56 rword rpa_dbex_getrulebitmap(rpadbex_t *dbex, rparule_t rid)
57 {
58         rword bitmap = 0L;
59         rharray_t *rules = dbex->rules;
60         rpa_ruleinfo_t *info;
61
62         if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL) {
63                 rparecord_t *record = rpa_record_get(dbex->records, info->startrec);
64                 if (record) {
65                         if (!RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(record))) {
66                                 rpa_dbex_buildbitmapinfo_for_rule(dbex, rid);
67                         }
68                         bitmap = RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(record));
69                 }
70         }
71         return bitmap;
72 }
73
74
75 void rpa_dbex_buildbitmapinfo(rpadbex_t *dbex)
76 {
77         unsigned int i;
78         rharray_t *rules = dbex->rules;
79
80         for (i = 0; i < r_array_length(rules->members); i++) {
81                 rpa_dbex_getrulebitmap(dbex, i);
82         }
83 }
84
85
86
87 static long rpa_bitmap_set_char(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
88 {
89         ruint32 wc = 0;
90
91         if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
92                 /*
93                  * Error
94                  */
95                 return -1;
96         }
97
98         RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), wc % RPA_BITMAP_BITS);
99         return 0;
100 }
101
102
103 static long rpa_bitmap_set_range(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
104 {
105         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
106
107         if (record->type & RPA_RECORD_START) {
108                 bc->beginchar = 0;
109                 bc->endchar = 0;
110         } else {
111                 long wc1, wc2, wc;
112                 if (bc->beginchar < bc->endchar) {
113                         wc1 = bc->beginchar;
114                         wc2 = bc->endchar;
115                 } else {
116                         wc2 = bc->beginchar;
117                         wc1 = bc->endchar;
118                 }
119                 for (wc = wc1; wc <= wc2 && (wc - wc1) < RPA_BITMAP_BITS; wc++) {
120                         RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), (wc % RPA_BITMAP_BITS));
121                 }
122         }
123         return 0;
124 }
125
126
127 static long rpa_bitmap_set_beginchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
128 {
129         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
130
131         if (record->type & RPA_RECORD_END) {
132                 ruint32 wc = 0;
133
134                 if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
135                         /*
136                          * Error
137                          */
138                         return -1;
139                 }
140                 bc->beginchar = wc;
141         }
142         return 0;
143 }
144
145
146 static long rpa_bitmap_set_endchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
147 {
148         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
149
150         if (record->type & RPA_RECORD_END) {
151                 ruint32 wc = 0;
152
153                 if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
154                         /*
155                          * Error
156                          */
157                         return -1;
158                 }
159                 bc->endchar = wc;
160         }
161         return 0;
162 }
163
164
165 static long rpa_bitmap_set_clschar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
166 {
167         if (record->type & RPA_RECORD_END) {
168                 ruint32 wc = 0;
169
170                 if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
171                         /*
172                          * Error
173                          */
174                         return -1;
175                 }
176                 RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), wc % RPA_BITMAP_BITS);
177         }
178         return 0;
179 }
180
181
182 static long rpa_bitmap_set_cls(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
183 {
184         if (record->type & RPA_RECORD_END) {
185                 long child;
186
187                 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
188                         rparecord_t *childrecord = rpa_record_get(records, child);
189                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
190                 }
191                 return 0;
192
193         }
194         return 0;
195 }
196
197
198 static long rpa_bitmap_set_namedrule(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
199 {
200         if (record->type & RPA_RECORD_END) {
201                 long child;
202                 child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
203
204                 for (child = rpa_recordtree_next(records, child, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
205                         rparecord_t *childrecord = rpa_record_get(records, child);
206                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
207                         if (!(childrecord->usertype & RPA_MATCH_OPTIONAL))
208                                 break;
209                 }
210         }
211         return 0;
212 }
213
214
215 static long rpa_bitmap_set_expression(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
216 {
217         if (record->type & RPA_RECORD_END) {
218                 long child;
219
220                 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
221                         rparecord_t *childrecord = rpa_record_get(records, child);
222                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
223                         if (!(childrecord->usertype & RPA_MATCH_OPTIONAL))
224                                 break;
225                 }
226                 return 0;
227
228         }
229         return 0;
230 }
231
232
233 static long rpa_bitmap_set_orop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
234 {
235         if (record->type & RPA_RECORD_END) {
236                 long child;
237
238                 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
239                         rparecord_t *childrecord = rpa_record_get(records, child);
240                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
241                 }
242                 return 0;
243
244         }
245         return 0;
246 }
247
248
249 static long rpa_bitmap_set_minop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
250 {
251         if (record->type & RPA_RECORD_END) {
252                 long child;
253
254                 child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
255                 if (child >= 0) {
256                         rparecord_t *childrecord = rpa_record_get(records, child);
257                         RPA_BITMAP_SETVAL(RPA_RECORD2BITMAP(record), RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(childrecord)));
258                 }
259                 return 0;
260
261         }
262         return 0;
263 }
264
265
266 static long rpa_bitmap_set_specialchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
267 {
268         ruint32 wc = 0;
269
270         if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
271                 /*
272                  * Error
273                  */
274                 return -1;
275         }
276         wc = rpa_special_char(wc);
277
278         if (wc == '.') {
279                 RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
280         } else {
281                 RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), wc % RPA_BITMAP_BITS);
282         }
283         return 0;
284 }
285
286
287 static long rpa_bitmap_set_clsnum(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
288 {
289         if (record->type & RPA_RECORD_END) {
290                 long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
291                 if (child >= 0) {
292                         rparecord_t *childrecord = rpa_record_get(records, child);
293                         RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), childrecord->userdata % RPA_BITMAP_BITS);
294                 }
295         }
296         return 0;
297 }
298
299
300 static long rpa_bitmap_set_numrng(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
301 {
302         if (record->type & RPA_RECORD_END) {
303                 long first = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
304                 long second = rpa_recordtree_lastchild(records, rec, RPA_RECORD_END);
305                 if (first >= 0 && second >= 0) {
306                         rword wc1, wc2, wc;
307                         rparecord_t *firstrecord = rpa_record_get(records, first);
308                         rparecord_t *secondrecord = rpa_record_get(records, second);
309                         if (firstrecord->userdata < secondrecord->userdata) {
310                                 wc1 = firstrecord->userdata;
311                                 wc2 = secondrecord->userdata;
312                         } else {
313                                 wc2 = firstrecord->userdata;
314                                 wc1 = secondrecord->userdata;
315                         }
316                         for (wc = wc1; wc <= wc2 && (wc - wc1) < RPA_BITMAP_BITS; wc++) {
317                                 RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), (wc % RPA_BITMAP_BITS));
318                         }
319                 }
320         }
321         return 0;
322 }
323
324
325 static long rpa_bitmap_set_ref(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
326 {
327         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
328         if ((record->type & RPA_RECORD_END) && (record->usertype & RPA_LOOP_PATH) == 0) {
329                 long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
330                 if (child >= 0) {
331                         rparecord_t *childrecord = rpa_record_get(records, child);
332                         rparule_t rid = rpa_dbex_lookup(bc->dbex, childrecord->input, childrecord->inputsiz);
333                         if (rid >= 0) {
334                                 RPA_BITMAP_SETVAL(RPA_RECORD2BITMAP(record), rpa_dbex_getrulebitmap(bc->dbex, rid));
335                         }
336
337                 }
338         }
339         return 0;
340 }
341
342
343 static long rpa_bitmap_set_long(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
344 {
345         ruint32 wc = 0;
346         if (rpa_record2long(record, &wc) < 0)
347                 return -1;
348         record->userdata = wc;
349         return 0;
350 }
351
352
353 static long rpa_bitmap_set_notop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
354 {
355         if (record->type & RPA_RECORD_END) {
356                 long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
357                 if (child >= 0) {
358                         RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
359                 }
360         }
361         return 0;
362 }
363
364
365 static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata)
366 {
367 //      rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
368         rparecord_t *record = rpa_record_get(records, rec);
369
370         switch (record->ruleuid) {
371         case RPA_PRODUCTION_CHAR:
372                 rpa_bitmap_set_char(records, record, rec, userdata);
373                 break;
374         case RPA_PRODUCTION_CHARRNG:
375                 rpa_bitmap_set_range(records, record, rec, userdata);
376                 break;
377         case RPA_PRODUCTION_BEGINCHAR:
378                 rpa_bitmap_set_beginchar(records, record, rec, userdata);
379                 break;
380         case RPA_PRODUCTION_ENDCHAR:
381                 rpa_bitmap_set_endchar(records, record, rec, userdata);
382                 break;
383         case RPA_PRODUCTION_CLSCHAR:
384                 rpa_bitmap_set_clschar(records, record, rec, userdata);
385                 break;
386         case RPA_PRODUCTION_CLS:
387                 rpa_bitmap_set_cls(records, record, rec, userdata);
388                 break;
389         case RPA_PRODUCTION_NAMEDRULE:
390                 rpa_bitmap_set_namedrule(records, record, rec, userdata);
391                 break;
392         case RPA_PRODUCTION_REQOP:
393         case RPA_PRODUCTION_NEGBRANCH:
394         case RPA_PRODUCTION_BRACKETEXP:
395         case RPA_PRODUCTION_ALTBRANCH:
396         case RPA_PRODUCTION_ANONYMOUSRULE:
397                 rpa_bitmap_set_expression(records, record, rec, userdata);
398                 break;
399         case RPA_PRODUCTION_OROP:
400         case RPA_PRODUCTION_NOROP:
401                 rpa_bitmap_set_orop(records, record, rec, userdata);
402                 break;
403         case RPA_PRODUCTION_SPECIALCHAR:
404                 rpa_bitmap_set_specialchar(records, record, rec, userdata);
405                 break;
406         case RPA_PRODUCTION_HEX:
407         case RPA_PRODUCTION_DEC:
408                 rpa_bitmap_set_long(records, record, rec, userdata);
409                 break;
410         case RPA_PRODUCTION_CLSNUM:
411                 rpa_bitmap_set_clsnum(records, record, rec, userdata);
412                 break;
413         case RPA_PRODUCTION_NUMRNG:
414                 rpa_bitmap_set_numrng(records, record, rec, userdata);
415                 break;
416         case RPA_PRODUCTION_AREF:
417         case RPA_PRODUCTION_CREF:
418                 rpa_bitmap_set_ref(records, record, rec, userdata);
419                 break;
420         case RPA_PRODUCTION_NOTOP:
421                 rpa_bitmap_set_notop(records, record, rec, userdata);
422                 break;
423         case RPA_PRODUCTION_MINOP:
424                 rpa_bitmap_set_minop(records, record, rec, userdata);
425                 break;
426
427         default:
428                 break;
429         };
430
431         if (record->type & RPA_RECORD_END) {
432                 rpa_bitmap_set_startrec(records, rec, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(record)));
433         }
434         return 0;
435 }