RPA Toolkit
work on bitmap operations
[rpatk.git] / rgrep / win32 / main.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21
22 #include <wchar.h>
23 #include <windows.h>
24 #include "rlib/rmem.h"
25 #include "rlib/rarray.h"
26 #include "rpa/rpadbex.h"
27 #include "rpagrep.h"
28 #include "rpagrepdep.h"
29
30 rpa_buffer_t * rpa_buffer_from_wchar(const wchar_t *wstr);
31
32
33 int usage(int argc, const wchar_t *argv[])
34 {
35             fwprintf(stderr, L"RPA Grep with RPA Engine: %s \n", rpa_dbex_version());
36                 fwprintf(stderr, L"Copyright (C) 2010 Martin Stoilov\n\n");
37
38                 fwprintf(stderr, L"Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
39                 fwprintf(stderr, L" OPTIONS:\n");
40                 fwprintf(stderr, L"\t-e patterns              BNF Expression.\n");
41                 fwprintf(stderr, L"\t-f patternfile           Read the BNF rules from a file, the last pattern will be executed.\n");
42                 fwprintf(stderr, L"\t-i                       Ignore case.\n");
43                 fwprintf(stderr, L"\t-m                       Match.\n");
44                 fwprintf(stderr, L"\t-p                       Parse.\n");
45                 fwprintf(stderr, L"\t-l                       Line mode.\n");
46                 fwprintf(stderr, L"\t-16                      Force UTF16 encoding.\n");
47                 fwprintf(stderr, L"\t-b                       Force byte encoding.\n");
48                 fwprintf(stderr, L"\t-d                       Dump a production in a tree format.\n");
49                 fwprintf(stderr, L"\t-t                       Display time elapsed.\n");
50                 fwprintf(stderr, L"\t-L, --list-rules         List all patterns.\n");
51                 fwprintf(stderr, L"\t-v                       Display version information.\n");
52                 fwprintf(stderr, L"\t-h, --help               Display this help.\n");
53                 fwprintf(stderr, L"\t    --debug-compile      Display debug compilation information.\n");
54                 fwprintf(stderr, L"\t    --dump-info          Display rules info.\n");
55                 fwprintf(stderr, L"\t    --dump-code rule     Display compiled code for rule.\n");
56                 fwprintf(stderr, L"\t    --dump-alias         Display alias info.\n");
57                 fwprintf(stderr, L"\t    --dump-records       Display rules parsing records.\n");
58                 fwprintf(stderr, L"\t    --no-optimizations   Disable optimizations.\n");
59                 fwprintf(stderr, L"\t    --exec-debug         Execute in debug mode.\n");
60                 fwprintf(stderr, L"\t    --no-cache           Disable execution cache.\n");
61                 fwprintf(stderr, L"\t    --no-bitmap          Disable expression bitmap use.\n");
62
63                 return 0;
64 }
65
66
67 int wmain(int argc, const wchar_t* argv[])
68 {
69         unsigned long sckb;
70         int ret, scanned = 0, i;
71         rpa_grep_t *pGrep = NULL;
72         DWORD eticks, bticks = GetTickCount();
73         rarray_t *buffers;
74
75         buffers = r_array_create(sizeof(rpa_buffer_t *));
76         pGrep = rpa_grep_create();
77
78         if (argc <= 1) {
79                 usage(argc, argv);
80                 goto end;
81         }
82
83         for (i = 1; i < argc; i++) {
84                 if (wcscmp(argv[i], L"-t") == 0) {
85                         pGrep->showtime = 1;
86                 }
87         }
88
89         for (i = 1; i < argc; i++) {
90                 if (wcscmp(argv[i], L"--help") == 0 || wcscmp(argv[i], L"-help") == 0 || wcscmp(argv[i], L"/?") == 0 || wcscmp(argv[i], L"-h") == 0) {
91                         usage(argc, argv);
92                         goto end;
93                 }
94         }
95
96         for (i = 1; i < argc; i++) {
97                 if (wcscmp(argv[i], L"--no-bitmap") == 0) {
98                         rpa_dbex_cfgset(pGrep->hDbex, RPA_DBEXCFG_BITMAP, 0);
99                 }
100         }
101
102         for (i = 1; i < argc; i++) {
103                 if (wstrcmp(argv[i], L"--no-optimizations") == 0) {
104                         rpa_grep_optimizations(pGrep, 0);
105                 }
106         }
107
108         for (i = 1; i < argc; i++) {
109                 if (wcscmp(argv[i], L"-c") == 0) {
110                         if (++i < argc) {
111                                 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
112                                 if (!pattern) {
113                                         goto error;
114                                 }
115                                 rpa_grep_setup_callback(pGrep, pattern);
116                                 rpa_buffer_destroy(pattern);
117                         }
118                 }
119         }
120
121
122         for (i = 1; i < argc; i++) {
123                 if (wcscmp(argv[i], L"-f") == 0) {
124                         if (++i < argc) {
125                                 rpa_buffer_t *pattern = rpa_buffer_map_file(argv[i]);
126                                 if (pattern) {
127                                         ret = rpa_grep_load_pattern(pGrep, pattern);
128                                         r_array_add(buffers, &pattern);
129                                 } else {
130                                         ret = -1;
131                                 }
132                                 if (ret < 0)
133                                         goto error;
134                         }
135                 }
136         }
137
138         for (i = 1; i < argc; i++) {
139                 if (wcscmp(argv[i], L"-e") == 0) {
140                         if (++i < argc) {
141                                 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
142                                 if (!pattern) {
143                                         goto error;
144                                 }
145                                 ret = rpa_grep_load_string_pattern(pGrep, pattern);
146                                 rpa_buffer_destroy(pattern);
147                                 if (ret < 0)
148                                         goto error;
149
150                         }
151                 } 
152         }
153
154
155         for (i = 1; i < argc; i++) {
156                 if (wcscmp(argv[i], L"--dump-code") == 0) {
157                         if (rpa_dbex_compile(pGrep->hDbex) == 0) {
158                                 if (++i < argc) {
159                                         rpa_dbex_dumpcode(pGrep->hDbex, rpa_dbex_lookup_s(pGrep->hDbex, argv[i]));
160                                 }
161                         }
162                         goto end;
163                 }
164         }
165
166
167         for (i = 1; i < argc; i++) {
168                 if (wcscmp(argv[i], L"--dump-info") == 0) {
169                         rpa_grep_dump_pattern_info(pGrep);
170                         goto end;
171                 }
172         }
173
174         for (i = 1; i < argc; i++) {
175                 if (wcscmp(argv[i], L"--debug-compile") == 0) {
176                         rpa_grep_debug_compile(pGrep);
177                         goto end;
178                 }
179         }
180
181         for (i = 1; i < argc; i++) {
182                 if (wcscmp(argv[i], L"--dump-alias") == 0) {
183                         rpa_grep_dump_alias_info(pGrep);
184                         goto end;
185                 }
186         }
187
188         for (i = 1; i < argc; i++) {
189                 if (wcscmp(argv[i], L"--dump-records") == 0) {
190                         rpa_grep_dump_pattern_records(pGrep);
191                         goto end;
192                 }
193         }
194
195         for (i = 1; i < argc; i++) {
196                 if (wcscmp(argv[i], L"--exec-debug") == 0) {
197                         pGrep->execdebug = 1;
198                 }
199         }
200
201         for (i = 1; i < argc; i++) {
202                 if (wcscmp(argv[i], L"--no-cache") == 0) {
203                         pGrep->disablecache = 1;
204                 }
205         }
206
207
208         if (rpa_dbex_compile(pGrep->hDbex) < 0) {
209                 rpa_errinfo_t errinfo;
210                 rpa_dbex_lasterrorinfo(pGrep->hDbex, &errinfo);
211                 if (errinfo.code == RPA_E_UNRESOLVEDSYMBOL) {
212                         fprintf(stdout, "ERROR: Unresolved Symbol: %s\n", errinfo.name);
213                 } else {
214                         fprintf(stdout, "ERROR %ld: Compilation failed.\n", errinfo.code);
215                 }
216                 goto end;
217         }
218
219
220         for (i = 1; i < argc; i++) {
221                 if (wcscmp(argv[i], L"-L") == 0) {
222                         rpa_grep_list_patterns(pGrep);
223                         goto end;
224                 } else if (wcscmp(argv[i], L"-d") == 0) {
225                         if (++i < argc) {
226                                 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
227                                 if (!pattern) {
228                                         goto error;
229                                 }
230                                 rpa_grep_dump_pattern_tree(pGrep, pattern);
231                                 rpa_buffer_destroy(pattern);
232                                 goto end;
233                         }
234                 } else if (wcscmp(argv[i], L"-i") == 0) {
235                         pGrep->icase = 1;
236                 } else if (wcscmp(argv[i], L"-l") == 0) {
237                         pGrep->greptype = RPA_GREPTYPE_SCANLINES;
238                 } else if (wcscmp(argv[i], L"-m") == 0) {
239                         pGrep->greptype = RPA_GREPTYPE_MATCH;
240                 } else if (wcscmp(argv[i], L"-p") == 0) {
241                         pGrep->greptype = RPA_GREPTYPE_PARSE;
242                 } else if (wcscmp(argv[i], L"-a") == 0) {
243                         pGrep->greptype = RPA_GREPTYPE_PARSEAST;
244                 } else if (wcscmp(argv[i], L"-16") == 0) {
245                         pGrep->forceEncoding = RPA_GREP_FORCE_UTF16;
246                 } else if (wcscmp(argv[i], L"-b") == 0) {
247                         pGrep->forceEncoding = RPA_GREP_FORCE_BYTE;
248                 }
249                 
250         }
251
252
253         for (i = 1; i < argc; i++) {
254                 if (wcscmp(argv[i], L"-s") == 0) {
255                         if (++i < argc) {
256                                 rpa_buffer_t *buf = rpa_buffer_from_wchar(argv[i]);
257                                 rpa_grep_scan_buffer(pGrep, buf);
258                                 rpa_buffer_destroy(buf);
259                                 ++scanned;
260                         }
261                 }
262         }
263
264         /* scan files */
265         for (i = 1; i < argc; i++) {
266                 if (argv[i][0] != L'-') {
267                         ++scanned;
268                         rpa_grep_scan_path(pGrep, argv[i]);
269                 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c' || argv[i][1] == L'C'){
270                         ++i;
271                 }
272                 
273         }
274
275         if (!scanned) {
276                 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
277                 if (buf) {
278                         rpa_grep_scan_buffer(pGrep, buf);
279                         rpa_buffer_destroy(buf);
280                 }
281         }
282
283
284         /* scan files */
285         for (i = 1; i < argc; i++) {
286                 if (argv[i][0] != '-') {
287                         ++scanned;
288                         rpa_grep_scan_path(pGrep, argv[i]);
289                 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c'){
290                         ++i;
291                 }
292                 
293         }
294
295         if (!scanned) {
296                 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
297                 if (buf) {
298                         rpa_grep_scan_buffer(pGrep, buf);
299                         rpa_buffer_destroy(buf);
300                 }
301         }
302
303 end:
304         for (i = 0; i < r_array_length(buffers); i++) {
305                 rpa_buffer_destroy(r_array_index(buffers, i, rpa_buffer_t*));
306         }
307         r_object_destroy((robject_t*)buffers);
308         rpa_grep_close(pGrep);
309
310
311         rpa_grep_close(pGrep);
312         sckb = (unsigned long)(pGrep->scsize/1024);
313
314         if (pGrep->showtime) {
315                 unsigned long milsec;
316                 unsigned long minutes;
317                 float sec;
318                 milsec = pGrep->scanmilisec;
319                 if (milsec == 0)
320                         milsec = 1;
321                 minutes = milsec/60000;
322                 sec = (milsec%60000)/1000.0;
323                 fwprintf(stdout, L"\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld \n", 
324                                 minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (long)r_debug_get_maxmem()/1000, (long)r_debug_get_allocmem(),
325                                 pGrep->cachehit);
326         }
327
328         rpa_grep_destroy(pGrep);
329         return 0;
330
331 error:
332         rpa_grep_destroy(pGrep);
333         return 1;
334 }