RPA Toolkit
code cleanup
[rpatk.git] / rpa / rpabitmap.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include "rpa/rpabitmap.h"
22 #include "rlib/rutf.h"
23 #include "rlib/rmem.h"
24 #include "rpa/rpaparser.h"
25 #include "rpa/rpadbexpriv.h"
26 #include "rpa/rpastatpriv.h"
27
28 static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata);
29
30 static long rpa_bitmap_set_startrec(rarray_t *records, long rec, rpabitmap_t bitmap)
31 {
32         rparecord_t *record = rpa_record_get(records, rpa_recordtree_get(records, rec, RPA_RECORD_START));
33         RPA_BITMAP_SETVAL(RPA_RECORD2BITMAP(record), bitmap);
34         return 0;
35 }
36
37
38 void rpa_dbex_buildbitmapinfo_for_rule(rpadbex_t *dbex, rparule_t rid)
39 {
40         rpa_bitmapcompiler_t bc;
41         rharray_t *rules = dbex->rules;
42         rpa_ruleinfo_t *info;
43
44         r_memset(&bc, 0, sizeof(bc));
45         bc.dbex = dbex;
46         if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL) {
47                 rparecord_t *record = rpa_record_get(dbex->records, info->startrec);
48                 RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
49                 rpa_recordtree_walk(dbex->records, info->startrec, 0, rpa_bitmap_set, (rpointer)&bc);
50         }
51
52 }
53
54
55 rword rpa_dbex_getrulebitmap(rpadbex_t *dbex, rparule_t rid)
56 {
57         rword bitmap = 0L;
58         rharray_t *rules = dbex->rules;
59         rpa_ruleinfo_t *info;
60
61         if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL) {
62                 rparecord_t *record = rpa_record_get(dbex->records, info->startrec);
63                 if (record) {
64                         if (!RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(record))) {
65                                 rpa_dbex_buildbitmapinfo_for_rule(dbex, rid);
66                         }
67                         bitmap = RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(record));
68                 }
69         }
70         return bitmap;
71 }
72
73
74 void rpa_dbex_buildbitmapinfo(rpadbex_t *dbex)
75 {
76         unsigned int i;
77         rharray_t *rules = dbex->rules;
78
79         for (i = 0; i < r_array_length(rules->members); i++) {
80                 rpa_dbex_getrulebitmap(dbex, i);
81         }
82 }
83
84
85
86 static long rpa_bitmap_set_char(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
87 {
88         ruint32 wc = 0;
89
90         if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
91                 /*
92                  * Error
93                  */
94                 return -1;
95         }
96         RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), wc % RPA_BITMAP_BITS);
97         return 0;
98 }
99
100
101 static long rpa_bitmap_set_range(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
102 {
103         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
104
105         if (record->type & RPA_RECORD_START) {
106                 bc->beginchar = 0;
107                 bc->endchar = 0;
108         } else {
109                 long wc1, wc2, wc;
110                 if (bc->beginchar < bc->endchar) {
111                         wc1 = bc->beginchar;
112                         wc2 = bc->endchar;
113                 } else {
114                         wc2 = bc->beginchar;
115                         wc1 = bc->endchar;
116                 }
117                 for (wc = wc1; wc <= wc2 && (wc - wc1) < RPA_BITMAP_BITS; wc++) {
118                         RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), (wc % RPA_BITMAP_BITS));
119                 }
120         }
121         return 0;
122 }
123
124
125 static long rpa_bitmap_set_beginchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
126 {
127         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
128
129         if (record->type & RPA_RECORD_END) {
130                 ruint32 wc = 0;
131
132                 if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
133                         /*
134                          * Error
135                          */
136                         return -1;
137                 }
138                 bc->beginchar = wc;
139         }
140         return 0;
141 }
142
143
144 static long rpa_bitmap_set_endchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
145 {
146         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
147
148         if (record->type & RPA_RECORD_END) {
149                 ruint32 wc = 0;
150
151                 if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
152                         /*
153                          * Error
154                          */
155                         return -1;
156                 }
157                 bc->endchar = wc;
158         }
159         return 0;
160 }
161
162
163 static long rpa_bitmap_set_clschar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
164 {
165         if (record->type & RPA_RECORD_END) {
166                 ruint32 wc = 0;
167
168                 if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
169                         /*
170                          * Error
171                          */
172                         return -1;
173                 }
174                 RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), wc % RPA_BITMAP_BITS);
175         }
176         return 0;
177 }
178
179
180 static long rpa_bitmap_set_cls(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
181 {
182         if (record->type & RPA_RECORD_END) {
183                 long child;
184
185                 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
186                         rparecord_t *childrecord = rpa_record_get(records, child);
187                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
188                 }
189         }
190         return 0;
191 }
192
193
194 static long rpa_bitmap_set_namedrule(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
195 {
196         if (record->type & RPA_RECORD_END) {
197                 long child;
198                 child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
199
200                 for (child = rpa_recordtree_next(records, child, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
201                         rparecord_t *childrecord = rpa_record_get(records, child);
202                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
203                         if (!(childrecord->usertype & RPA_MATCH_OPTIONAL))
204                                 break;
205                 }
206         }
207         return 0;
208 }
209
210
211 static long rpa_bitmap_set_expression(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
212 {
213         if (record->type & RPA_RECORD_END) {
214                 long child;
215
216                 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
217                         rparecord_t *childrecord = rpa_record_get(records, child);
218                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
219                         if (!(childrecord->usertype & RPA_MATCH_OPTIONAL))
220                                 break;
221                 }
222         }
223         return 0;
224 }
225
226
227 static long rpa_bitmap_set_orop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
228 {
229         if (record->type & RPA_RECORD_END) {
230                 long child;
231
232                 for (child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END); child >= 0; child = rpa_recordtree_next(records, child, RPA_RECORD_END)) {
233                         rparecord_t *childrecord = rpa_record_get(records, child);
234                         RPA_BITMAP_ORBITS(RPA_RECORD2BITMAP(record), RPA_RECORD2BITMAP(childrecord));
235                 }
236         }
237         return 0;
238 }
239
240
241 static long rpa_bitmap_set_minop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
242 {
243         if (record->type & RPA_RECORD_END) {
244                 long child;
245
246                 child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
247                 if (child >= 0) {
248                         rparecord_t *childrecord = rpa_record_get(records, child);
249                         RPA_BITMAP_SETVAL(RPA_RECORD2BITMAP(record), RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(childrecord)));
250                 }
251         }
252         return 0;
253 }
254
255
256 static long rpa_bitmap_set_specialchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
257 {
258         ruint32 wc = 0;
259
260         if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
261                 /*
262                  * Error
263                  */
264                 return -1;
265         }
266         wc = rpa_special_char(wc);
267
268         if (wc == '.') {
269                 RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
270         } else {
271                 RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), wc % RPA_BITMAP_BITS);
272         }
273         return 0;
274 }
275
276
277 static long rpa_bitmap_set_clsnum(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
278 {
279         if (record->type & RPA_RECORD_END) {
280                 long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
281                 if (child >= 0) {
282                         rparecord_t *childrecord = rpa_record_get(records, child);
283                         RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), childrecord->userdata % RPA_BITMAP_BITS);
284                 }
285         }
286         return 0;
287 }
288
289
290 static long rpa_bitmap_set_numrng(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
291 {
292         if (record->type & RPA_RECORD_END) {
293                 long first = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
294                 long second = rpa_recordtree_lastchild(records, rec, RPA_RECORD_END);
295                 if (first >= 0 && second >= 0) {
296                         rword wc1, wc2, wc;
297                         rparecord_t *firstrecord = rpa_record_get(records, first);
298                         rparecord_t *secondrecord = rpa_record_get(records, second);
299                         if (firstrecord->userdata < secondrecord->userdata) {
300                                 wc1 = firstrecord->userdata;
301                                 wc2 = secondrecord->userdata;
302                         } else {
303                                 wc2 = firstrecord->userdata;
304                                 wc1 = secondrecord->userdata;
305                         }
306                         for (wc = wc1; wc <= wc2 && (wc - wc1) < RPA_BITMAP_BITS; wc++) {
307                                 RPA_BITMAP_SETBIT(RPA_RECORD2BITMAP(record), (wc % RPA_BITMAP_BITS));
308                         }
309                 }
310         }
311         return 0;
312 }
313
314
315 static long rpa_bitmap_set_ref(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
316 {
317         rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
318         if ((record->type & RPA_RECORD_END) && (record->usertype & RPA_LOOP_PATH) == 0) {
319                 long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
320                 if (child >= 0) {
321                         rparecord_t *childrecord = rpa_record_get(records, child);
322                         rparule_t rid = rpa_dbex_lookup(bc->dbex, childrecord->input, childrecord->inputsiz);
323                         if (rid >= 0) {
324                                 RPA_BITMAP_SETVAL(RPA_RECORD2BITMAP(record), rpa_dbex_getrulebitmap(bc->dbex, rid));
325                         }
326
327                 }
328         }
329         return 0;
330 }
331
332
333 static long rpa_bitmap_set_long(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
334 {
335         ruint32 wc = 0;
336         if (rpa_record2long(record, &wc) < 0)
337                 return -1;
338         record->userdata = wc;
339         return 0;
340 }
341
342
343 static long rpa_bitmap_set_notop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
344 {
345         if (record->type & RPA_RECORD_END) {
346                 long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
347                 if (child >= 0) {
348                         RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
349                 }
350         }
351         return 0;
352 }
353
354
355 static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata)
356 {
357 //      rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
358         rparecord_t *record = rpa_record_get(records, rec);
359
360         switch (record->ruleuid) {
361         case RPA_PRODUCTION_CHAR:
362                 rpa_bitmap_set_char(records, record, rec, userdata);
363                 break;
364         case RPA_PRODUCTION_CHARRNG:
365                 rpa_bitmap_set_range(records, record, rec, userdata);
366                 break;
367         case RPA_PRODUCTION_BEGINCHAR:
368                 rpa_bitmap_set_beginchar(records, record, rec, userdata);
369                 break;
370         case RPA_PRODUCTION_ENDCHAR:
371                 rpa_bitmap_set_endchar(records, record, rec, userdata);
372                 break;
373         case RPA_PRODUCTION_CLSCHAR:
374                 rpa_bitmap_set_clschar(records, record, rec, userdata);
375                 break;
376         case RPA_PRODUCTION_CLS:
377                 rpa_bitmap_set_cls(records, record, rec, userdata);
378                 break;
379         case RPA_PRODUCTION_NAMEDRULE:
380                 rpa_bitmap_set_namedrule(records, record, rec, userdata);
381                 break;
382         case RPA_PRODUCTION_REQOP:
383         case RPA_PRODUCTION_NEGBRANCH:
384         case RPA_PRODUCTION_BRACKETEXP:
385         case RPA_PRODUCTION_ALTBRANCH:
386         case RPA_PRODUCTION_ANONYMOUSRULE:
387                 rpa_bitmap_set_expression(records, record, rec, userdata);
388                 break;
389         case RPA_PRODUCTION_OROP:
390         case RPA_PRODUCTION_NOROP:
391                 rpa_bitmap_set_orop(records, record, rec, userdata);
392                 break;
393         case RPA_PRODUCTION_SPECIALCHAR:
394                 rpa_bitmap_set_specialchar(records, record, rec, userdata);
395                 break;
396         case RPA_PRODUCTION_HEX:
397         case RPA_PRODUCTION_DEC:
398                 rpa_bitmap_set_long(records, record, rec, userdata);
399                 break;
400         case RPA_PRODUCTION_CLSNUM:
401                 rpa_bitmap_set_clsnum(records, record, rec, userdata);
402                 break;
403         case RPA_PRODUCTION_NUMRNG:
404                 rpa_bitmap_set_numrng(records, record, rec, userdata);
405                 break;
406         case RPA_PRODUCTION_AREF:
407         case RPA_PRODUCTION_CREF:
408                 rpa_bitmap_set_ref(records, record, rec, userdata);
409                 break;
410         case RPA_PRODUCTION_NOTOP:
411                 rpa_bitmap_set_notop(records, record, rec, userdata);
412                 break;
413         case RPA_PRODUCTION_MINOP:
414                 rpa_bitmap_set_minop(records, record, rec, userdata);
415                 break;
416
417         default:
418                 /*
419                  * We should never get here!
420                  */
421                 RPA_BITMAP_SETALL(RPA_RECORD2BITMAP(record));
422                 break;
423         };
424
425         if (record->type & RPA_RECORD_END) {
426                 rpa_bitmap_set_startrec(records, rec, RPA_BITMAP_GETVAL(RPA_RECORD2BITMAP(record)));
427         }
428         return 0;
429 }