RPA Toolkit
work on bitmap compiler
authorMartin Stoilov <martin@rpasearch.com>
Mon, 18 Jul 2011 05:51:05 +0000 (22:51 -0700)
committerMartin Stoilov <martin@rpasearch.com>
Wed, 20 Jul 2011 05:32:35 +0000 (22:32 -0700)
rpa/rpabitmap.c
rpa/rpabitmap.h
rpa/rpadbex.c
rpa/rpadbexpriv.h
rpa/rpaparser.h
rpa/rpastat.c
rpa/rpastatpriv.h

index c4d6faa..f7ec3a5 100644 (file)
 #include "rlib/rmem.h"
 #include "rpa/rpaparser.h"
 #include "rpa/rpadbexpriv.h"
-
+#include "rpa/rpastatpriv.h"
 
 static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata);
 
+
 void rpa_dbex_buildbitmapinfo_for_rule(rpadbex_t *dbex, rparule_t rid)
 {
        rpa_bitmapcompiler_t bc;
@@ -35,9 +36,31 @@ void rpa_dbex_buildbitmapinfo_for_rule(rpadbex_t *dbex, rparule_t rid)
 
        r_memset(&bc, 0, sizeof(bc));
        bc.dbex = dbex;
-       if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL)
+       if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL) {
+               rparecord_t *record = rpa_record_get(dbex->records, info->startrec);
+               RPA_BITMAP_SETALL(record);
                rpa_recordtree_walk(dbex->records, info->startrec, 0, rpa_bitmap_set, (rpointer)&bc);
+       }
+
+}
+
 
+rword rpa_dbex_getrulebitmap(rpadbex_t *dbex, rparule_t rid)
+{
+       rword bitmap = 0L;
+       rharray_t *rules = dbex->rules;
+       rpa_ruleinfo_t *info;
+
+       if ((info = (rpa_ruleinfo_t *)r_harray_get(rules, rid)) != NULL) {
+               rparecord_t *record = rpa_record_get(dbex->records, info->startrec);
+               if (record) {
+                       if (!record->userdata) {
+                               rpa_dbex_buildbitmapinfo_for_rule(dbex, rid);
+                       }
+                       bitmap = record->userdata;
+               }
+       }
+       return bitmap;
 }
 
 
@@ -47,7 +70,7 @@ void rpa_dbex_buildbitmapinfo(rpadbex_t *dbex)
        rharray_t *rules = dbex->rules;
 
        for (i = 0; i < r_array_length(rules->members); i++) {
-               rpa_dbex_buildbitmapinfo_for_rule(dbex, i);
+               rpa_dbex_getrulebitmap(dbex, i);
        }
 }
 
@@ -169,6 +192,7 @@ static long rpa_bitmap_set_cls(rarray_t *records, rparecord_t *record, long rec,
 static long rpa_bitmap_set_namedrule(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
 {
        if (record->type & RPA_RECORD_END) {
+               rparecord_t *startrecord;
                long child;
                child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
 
@@ -178,8 +202,10 @@ static long rpa_bitmap_set_namedrule(rarray_t *records, rparecord_t *record, lon
                        if (!(childrecord->usertype & RPA_MATCH_OPTIONAL))
                                break;
                }
+               startrecord = rpa_record_get(records, rpa_recordtree_get(records, rec, RPA_RECORD_START));
+               if (startrecord)
+                       startrecord->userdata = record->userdata;
                return 0;
-
        }
        return 0;
 }
@@ -219,6 +245,123 @@ static long rpa_bitmap_set_orop(rarray_t *records, rparecord_t *record, long rec
 }
 
 
+static long rpa_bitmap_set_minop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       if (record->type & RPA_RECORD_END) {
+               long child;
+
+               child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
+               if (child >= 0) {
+                       rparecord_t *childrecord = rpa_record_get(records, child);
+                       RPA_BITMAP_SETVAL(record, RPA_BITMAP_GETVAL(childrecord));
+               }
+               return 0;
+
+       }
+       return 0;
+}
+
+
+static long rpa_bitmap_set_specialchar(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       ruint32 wc = 0;
+
+       if (r_utf8_mbtowc(&wc, (const unsigned char*) record->input, (const unsigned char*)record->input + record->inputsiz) < 0) {
+               /*
+                * Error
+                */
+               return -1;
+       }
+       wc = rpa_special_char(wc);
+
+       if (wc == '.') {
+               RPA_BITMAP_SETALL(record);
+       } else {
+               RPA_BITMAP_SETBIT(record, wc % RPA_BITMAP_BITS);
+       }
+       return 0;
+}
+
+
+static long rpa_bitmap_set_clsnum(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       if (record->type & RPA_RECORD_END) {
+               long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
+               if (child >= 0) {
+                       rparecord_t *childrecord = rpa_record_get(records, child);
+                       RPA_BITMAP_SETBIT(record, childrecord->userdata % RPA_BITMAP_BITS);
+               }
+       }
+       return 0;
+}
+
+
+static long rpa_bitmap_set_numrng(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       if (record->type & RPA_RECORD_END) {
+               long first = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
+               long second = rpa_recordtree_lastchild(records, rec, RPA_RECORD_END);
+               if (first >= 0 && second >= 0) {
+                       rword wc1, wc2, wc;
+                       rparecord_t *firstrecord = rpa_record_get(records, first);
+                       rparecord_t *secondrecord = rpa_record_get(records, second);
+                       if (firstrecord->userdata < secondrecord->userdata) {
+                               wc1 = firstrecord->userdata;
+                               wc2 = secondrecord->userdata;
+                       } else {
+                               wc2 = firstrecord->userdata;
+                               wc1 = secondrecord->userdata;
+                       }
+                       for (wc = wc1; wc <= wc2 && (wc - wc1) < RPA_BITMAP_BITS; wc++) {
+                               RPA_BITMAP_SETBIT(record, (wc % RPA_BITMAP_BITS));
+                       }
+               }
+       }
+       return 0;
+}
+
+
+static long rpa_bitmap_set_ref(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
+       if ((record->type & RPA_RECORD_END) && (record->usertype & RPA_LOOP_PATH) == 0) {
+               long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
+               if (child >= 0) {
+                       rparecord_t *childrecord = rpa_record_get(records, child);
+                       rparule_t rid = rpa_dbex_lookup(bc->dbex, childrecord->input, childrecord->inputsiz);
+                       if (rid >= 0) {
+                               record->userdata = rpa_dbex_getrulebitmap(bc->dbex, rid);
+                       }
+
+               }
+       }
+       return 0;
+}
+
+
+static long rpa_bitmap_set_long(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       ruint32 wc = 0;
+       if (rpa_record2long(record, &wc) < 0)
+               return -1;
+       record->userdata = wc;
+       return 0;
+}
+
+
+static long rpa_bitmap_set_notop(rarray_t *records, rparecord_t *record, long rec, rpointer userdata)
+{
+       if (record->type & RPA_RECORD_END) {
+               long child = rpa_recordtree_firstchild(records, rec, RPA_RECORD_END);
+               if (child >= 0) {
+                       rparecord_t *childrecord = rpa_record_get(records, child);
+                       RPA_BITMAP_SETVAL(record, ~RPA_BITMAP_GETVAL(childrecord));
+               }
+       }
+       return 0;
+}
+
+
 static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata)
 {
 //     rpa_bitmapcompiler_t *bc = (rpa_bitmapcompiler_t*)userdata;
@@ -246,21 +389,44 @@ static long rpa_bitmap_set(rarray_t *records, long rec, rpointer userdata)
        case RPA_PRODUCTION_NAMEDRULE:
                rpa_bitmap_set_namedrule(records, record, rec, userdata);
                break;
+       case RPA_PRODUCTION_REQOP:
+       case RPA_PRODUCTION_NEGBRANCH:
        case RPA_PRODUCTION_BRACKETEXP:
        case RPA_PRODUCTION_ALTBRANCH:
        case RPA_PRODUCTION_ANONYMOUSRULE:
                rpa_bitmap_set_expression(records, record, rec, userdata);
                break;
        case RPA_PRODUCTION_OROP:
+       case RPA_PRODUCTION_NOROP:
                rpa_bitmap_set_orop(records, record, rec, userdata);
                break;
+       case RPA_PRODUCTION_SPECIALCHAR:
+               rpa_bitmap_set_specialchar(records, record, rec, userdata);
+               break;
+       case RPA_PRODUCTION_HEX:
+       case RPA_PRODUCTION_DEC:
+               rpa_bitmap_set_long(records, record, rec, userdata);
+               break;
+       case RPA_PRODUCTION_CLSNUM:
+               rpa_bitmap_set_clsnum(records, record, rec, userdata);
+               break;
+       case RPA_PRODUCTION_NUMRNG:
+               rpa_bitmap_set_numrng(records, record, rec, userdata);
+               break;
+       case RPA_PRODUCTION_AREF:
+       case RPA_PRODUCTION_CREF:
+               rpa_bitmap_set_ref(records, record, rec, userdata);
+               break;
+       case RPA_PRODUCTION_NOTOP:
+               rpa_bitmap_set_notop(records, record, rec, userdata);
+               break;
+       case RPA_PRODUCTION_MINOP:
+               rpa_bitmap_set_minop(records, record, rec, userdata);
+               break;
 
        default:
                break;
        };
 
-       if (record) {
-       }
-
        return 0;
 }
index 20daa37..cc56060 100644 (file)
@@ -34,10 +34,13 @@ extern "C" {
 #define RPA_BITMAP_SIZE (sizeof(rword))
 #define RPA_BITMAP_BITS (RPA_BITMAP_SIZE*8)
 #define RPA_BITMAP_SETBIT(__r__, __b__) do { (__r__)->userdata |= ((rword)1) << (__b__); } while (0)
-#define RPA_BITMAP_GETBIT(__r__, __b__) (__r__)->userdata & (((rword)1) << (__b__)) ? 1 : 0
+#define RPA_BITMAP_GETBIT(__r__, __b__) ((__r__)->userdata & (((rword)1) << (__b__)) ? 1 : 0)
 #define RPA_BITMAP_CLRBIT(__r__, __b__) do { (__r__)->userdata &= ~(((rword)1) << (__b__)); } while (0)
 #define RPA_BITMAP_CLRALL(__r__) do { (__r__)->userdata = (rword)0; } while (0)
+#define RPA_BITMAP_SETALL(__r__) do { (__r__)->userdata = (rword)-1; } while (0)
 #define RPA_BITMAP_ORBITS(__r__, __c__) do { (__r__)->userdata |= (__c__)->userdata; } while (0)
+#define RPA_BITMAP_SETVAL(__r__, __v__) do { (__r__)->userdata = __v__; } while (0)
+#define RPA_BITMAP_GETVAL(__r__) ((__r__)->userdata)
 
 
 typedef struct rpa_bitmapcompiler_s {
index 42257aa..f3844e7 100644 (file)
@@ -183,7 +183,7 @@ static long rpa_dbex_getmatchspecialchr(unsigned long matchtype)
 }
 
 
-static int rpa_record2long(rparecord_t *prec, ruint32 *num)
+int rpa_record2long(rparecord_t *prec, ruint32 *num)
 {
        char *endptr = NULL;
        char buffer[64];
index 94a03c8..08de532 100644 (file)
@@ -68,6 +68,7 @@ struct rpadbex_s {
        unsigned long compiled:1;
 };
 
+int rpa_record2long(rparecord_t *prec, ruint32 *num);
 
 #ifdef __cplusplus
 }
index 988d928..63c3bd3 100644 (file)
@@ -37,7 +37,6 @@ enum {
        RPA_PRODUCTION_ANONYMOUSRULE,
        RPA_PRODUCTION_ALIASNAME,
        RPA_PRODUCTION_RULENAME,
-       RPA_PRODUCTION_REGEXCHAR,
        RPA_PRODUCTION_CHAR,
        RPA_PRODUCTION_ESCAPEDCHAR,
        RPA_PRODUCTION_SPECIALCHAR,
index ac684dd..ae937c1 100644 (file)
@@ -235,17 +235,13 @@ int rpa_stat_matchchr(rpastat_t *stat, rssize_t top, unsigned long wc)
 }
 
 
-int rpa_stat_matchspchr(rpastat_t *stat, rssize_t top, unsigned long wc)
+unsigned long rpa_special_char(unsigned long special)
 {
-       int ret = 0;
-       rpainput_t *in = &stat->instack[top];
-
-       if (in->eof)
-               return 0;
+       unsigned long wc;
 
-       switch (wc) {
+       switch (special) {
                case '.':
-                       return 1;
+                       wc = '.';
                        break;
                case 't':
                        wc = '\t';
@@ -257,9 +253,24 @@ int rpa_stat_matchspchr(rpastat_t *stat, rssize_t top, unsigned long wc)
                        wc = '\n';
                        break;
                default:
+                       wc = special;
                        break;
        };
 
+       return wc;
+}
+
+
+int rpa_stat_matchspchr(rpastat_t *stat, rssize_t top, unsigned long wc)
+{
+       int ret = 0;
+       rpainput_t *in = &stat->instack[top];
+
+       if (in->eof)
+               return 0;
+       wc = rpa_special_char(wc);
+       if (wc == '.')
+               return 1;
        ret = (in->wc == wc) ? 1 : 0;
        return ret;
 }
index 36d7573..53a614f 100644 (file)
@@ -66,7 +66,7 @@ int rpa_stat_matchchr(rpastat_t *stat, rssize_t top, unsigned long wc);
 int rpa_stat_matchspchr(rpastat_t *stat, rssize_t top, unsigned long wc);
 int rpa_stat_matchrng(rpastat_t *stat, rssize_t top, unsigned long wc1, unsigned long wc2);
 long rpa_stat_shift(rpastat_t *stat, rssize_t top);
-
+unsigned long rpa_special_char(unsigned long special);
 
 #ifdef __cplusplus
 }