RPA Toolkit
work on bitmap operations
[rpatk.git] / rgrep / unix / main.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include <stdio.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <wchar.h>
31 #include <time.h>
32 #include "rlib/rmem.h"
33 #include "rlib/rarray.h"
34 #include "rpa/rpadbex.h"
35 #include "rpagrep.h"
36 #include "rpagrepdep.h"
37
38
39 int usage(int argc, const char *argv[])
40 {
41             fprintf(stderr, "RPA Grep with RPA Engine: %s \n", rpa_dbex_version());
42                 fprintf(stderr, "Copyright (C) 2010 Martin Stoilov\n\n");
43
44                 fprintf(stderr, "Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
45                 fprintf(stderr, " OPTIONS:\n");
46                 fprintf(stderr, "\t-e patterns              BNF Expression.\n");
47                 fprintf(stderr, "\t-f patternfile           Read the BNF rules from a file, the last pattern will be executed.\n");
48                 fprintf(stderr, "\t-i                       Ignore case.\n");
49                 fprintf(stderr, "\t-m                       Match.\n");
50                 fprintf(stderr, "\t-p                       Parse.\n");
51                 fprintf(stderr, "\t-l                       Line mode.\n");
52                 fprintf(stderr, "\t-16                      Force UTF16 encoding.\n");
53                 fprintf(stderr, "\t-b                       Force byte encoding.\n");
54                 fprintf(stderr, "\t-d                       Dump a production in a tree format.\n");
55                 fprintf(stderr, "\t-t                       Display time elapsed.\n");
56                 fprintf(stderr, "\t-L, --list-rules         List all patterns.\n");
57                 fprintf(stderr, "\t-v                       Display version information.\n");
58                 fprintf(stderr, "\t-h, --help               Display this help.\n");
59                 fprintf(stderr, "\t    --debug-compile      Display debug compilation information.\n");
60                 fprintf(stderr, "\t    --dump-info          Display rules info.\n");
61                 fprintf(stderr, "\t    --dump-code rule     Display compiled code for rule.\n");
62                 fprintf(stderr, "\t    --dump-alias         Display alias info.\n");
63                 fprintf(stderr, "\t    --dump-records       Display rules parsing records.\n");
64                 fprintf(stderr, "\t    --no-optimizations   Disable optimizations.\n");
65                 fprintf(stderr, "\t    --exec-debug         Execute in debug mode.\n");
66                 fprintf(stderr, "\t    --no-cache           Disable execution cache.\n");
67                 fprintf(stderr, "\t    --no-bitmap          Disable expression bitmap use.\n");
68                 
69                 return 0;
70 }
71
72
73 int main(int argc, const char *argv[])
74 {
75         unsigned long sckb = 0;
76         int ret, scanned = 0, i;
77         rpa_grep_t *pGrep;
78         rarray_t *buffers;
79
80         buffers = r_array_create(sizeof(rpa_buffer_t *));
81         pGrep = rpa_grep_create();
82         if (argc <= 1) {
83                 usage(argc, argv);
84                 goto end;
85         }
86
87         for (i = 1; i < argc; i++) {
88                 if (strcmp(argv[i], "-t") == 0) {
89                         pGrep->showtime = 1;
90                 }
91         }
92
93         for (i = 1; i < argc; i++) {
94                 if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "/?") == 0 || strcmp(argv[i], "-h") == 0) {
95                         usage(argc, argv);
96                         goto end;
97                 }
98         }
99
100         for (i = 1; i < argc; i++) {
101                 if (strcmp(argv[i], "-v") == 0) {
102                         fprintf(stderr, "RPA Grep with RPA Engine: %s\n", rpa_dbex_version());
103                         goto end;
104                 }
105         }
106
107         for (i = 1; i < argc; i++) {
108                 if (strcmp(argv[i], "--no-bitmap") == 0) {
109                         rpa_dbex_cfgset(pGrep->hDbex, RPA_DBEXCFG_BITMAP, 0);
110                 }
111         }
112
113         for (i = 1; i < argc; i++) {
114                 if (strcmp(argv[i], "--no-optimizations") == 0) {
115                         rpa_grep_optimizations(pGrep, 0);
116                 }
117         }
118
119         for (i = 1; i < argc; i++) {
120                 if (strcmp(argv[i], "-f") == 0) {
121                         if (++i < argc) {
122                                 rpa_buffer_t *pattern = rpa_buffer_map_file(argv[i]);
123                                 if (pattern) {
124                                         ret = rpa_grep_load_pattern(pGrep, pattern);
125                                         r_array_add(buffers, &pattern);
126                                 } else {
127                                         ret = -1;
128                                 }
129                                 if (ret < 0)
130                                         goto error;
131                         }
132                 }
133         }
134
135         for (i = 1; i < argc; i++) {
136                 if (strcmp(argv[i], "-e") == 0) {
137                         if (++i < argc) {
138                                 rpa_buffer_t pattern;
139                                 pattern.s = (char*)argv[i];
140                                 pattern.size = strlen(argv[i]);
141                                 ret = rpa_grep_load_string_pattern(pGrep, &pattern);
142                                 if (ret < 0)
143                                         goto error;
144                         }
145                         
146                 }
147         }
148
149         for (i = 1; i < argc; i++) {
150                 if (strcmp(argv[i], "--exec-debug") == 0) {
151                         pGrep->execdebug = 1;
152                 }
153         }
154
155         for (i = 1; i < argc; i++) {
156                 if (strcmp(argv[i], "--no-cache") == 0) {
157                         pGrep->disablecache = 1;
158                 }
159         }
160
161         for (i = 1; i < argc; i++) {
162                 if (strcmp(argv[i], "--dump-code") == 0) {
163                         if (rpa_dbex_compile(pGrep->hDbex) == 0) {
164                                 if (++i < argc) {
165                                         rpa_dbex_dumpcode(pGrep->hDbex, rpa_dbex_lookup_s(pGrep->hDbex, argv[i]));
166                                 }
167                         }
168                         goto end;
169                 }
170         }
171
172
173         for (i = 1; i < argc; i++) {
174                 if (strcmp(argv[i], "--dump-info") == 0) {
175                         rpa_grep_dump_pattern_info(pGrep);
176                         goto end;
177                 }
178         }
179
180         for (i = 1; i < argc; i++) {
181                 if (strcmp(argv[i], "--debug-compile") == 0) {
182                         rpa_grep_debug_compile(pGrep);
183                         goto end;
184                 }
185         }
186
187         for (i = 1; i < argc; i++) {
188                 if (strcmp(argv[i], "--dump-alias") == 0) {
189                         rpa_grep_dump_alias_info(pGrep);
190                         goto end;
191                 }
192         }
193
194         for (i = 1; i < argc; i++) {
195                 if (strcmp(argv[i], "--dump-records") == 0) {
196                         rpa_grep_dump_pattern_records(pGrep);
197                         goto end;
198                 }
199         }
200
201         if (rpa_dbex_compile(pGrep->hDbex) < 0) {
202                 rpa_errinfo_t errinfo;
203                 rpa_dbex_lasterrorinfo(pGrep->hDbex, &errinfo);
204                 if (errinfo.code == RPA_E_UNRESOLVEDSYMBOL) {
205                         fprintf(stdout, "ERROR: Unresolved Symbol: %s\n", errinfo.name);
206                 } else {
207                         fprintf(stdout, "ERROR %ld: Compilation failed.\n", errinfo.code);
208                 }
209                 goto end;
210         }
211
212
213         for (i = 1; i < argc; i++) {
214                 if (strcmp(argv[i], "--dump-records") == 0) {
215                         rpa_grep_dump_pattern_records(pGrep);
216                         goto end;
217                 } else if (strcmp(argv[i], "--dump-info") == 0) {
218                         rpa_grep_dump_pattern_info(pGrep);
219                         goto end;
220                 } else if (strcmp(argv[i], "-L") == 0 || strcmp(argv[i], "--list-rules") == 0) {
221                         rpa_grep_list_patterns(pGrep);
222                         goto end;
223                 } else if (strcmp(argv[i], "-d") == 0) {
224                         if (++i < argc) {
225                                 if (argv[i]) {
226                                         rpa_buffer_t pattern;
227                                         pattern.s = (char*)argv[i];
228                                         pattern.size = strlen(argv[i]);                                 
229                                         rpa_grep_dump_pattern_tree(pGrep, &pattern);
230                                         goto end;
231                                 }
232                         }
233                 } else if (strcmp(argv[i], "-i") == 0) {
234                         pGrep->icase = 1;
235                 } else if (strcmp(argv[i], "-l") == 0) {
236                         pGrep->greptype = RPA_GREPTYPE_SCANLINES;
237                 } else if (strcmp(argv[i], "-m") == 0) {
238                         pGrep->greptype = RPA_GREPTYPE_MATCH;
239                 } else if (strcmp(argv[i], "-p") == 0) {
240                         pGrep->greptype = RPA_GREPTYPE_PARSE;
241                 } else if (strcmp(argv[i], "-a") == 0) {
242                         pGrep->greptype = RPA_GREPTYPE_PARSEAST;
243                 } else if (strcmp(argv[i], "-16") == 0) {
244                         pGrep->forceEncoding = RPA_GREP_FORCE_UTF16;
245                 } else if (strcmp(argv[i], "-b") == 0) {
246                         pGrep->forceEncoding = RPA_GREP_FORCE_BYTE;
247                 }
248                 
249         }
250
251
252         for (i = 1; i < argc; i++) {
253                 if (strcmp(argv[i], "-s") == 0) {
254                         if (++i < argc) {
255                                 rpa_buffer_t buf;
256                                 buf.s = (char*)argv[i];
257                                 buf.size = r_strlen(argv[i]);
258                                 rpa_grep_scan_buffer(pGrep, &buf);
259                                 ++scanned;
260                         }
261                 }
262         }
263
264         /* scan files */
265         for (i = 1; i < argc; i++) {
266                 if (argv[i][0] != '-') {
267                         ++scanned;
268                         rpa_grep_scan_path(pGrep, argv[i]);
269                 } else if (argv[i][1] == 'e' || argv[i][1] == 'f' || argv[i][1] == 'c' || argv[i][1] == 'C'){
270                         ++i;
271                 }
272                 
273         }
274
275         if (!scanned) {
276                 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
277                 if (buf) {
278                         rpa_grep_scan_buffer(pGrep, buf);
279                         rpa_buffer_destroy(buf);
280                 }
281         }
282
283 end:
284         for (i = 0; i < r_array_length(buffers); i++) {
285                 rpa_buffer_destroy(r_array_index(buffers, i, rpa_buffer_t*));
286         }
287         r_object_destroy((robject_t*)buffers);
288         rpa_grep_close(pGrep);
289         if (pGrep->showtime) {
290                 sckb = (unsigned long)(pGrep->scsize/1024);
291                 unsigned long milsec;
292                 unsigned long minutes;
293                 float sec;
294                 milsec = pGrep->scanmilisec;
295                 if (milsec == 0)
296                         milsec = 1;
297                 minutes = milsec/60000;
298                 sec = (milsec%60000)/1000.0;
299                 fprintf(stdout, "\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld \n",
300                                 minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (long)r_debug_get_maxmem()/1000, (long)r_debug_get_allocmem(),
301                                 pGrep->cachehit);
302         }
303
304         rpa_grep_destroy(pGrep);
305         return 0;
306
307 error:
308         rpa_grep_destroy(pGrep);
309         return 1;
310 }
311
312