RPA Toolkit
fixed build warnings for windows.
[rpatk.git] / rgrep / win32 / main.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21
22 #include <wchar.h>
23 #include <windows.h>
24 #include "rlib/rmem.h"
25 #include "rlib/rarray.h"
26 #include "rpa/rpadbex.h"
27 #include "rpagrep.h"
28 #include "rpagrepdep.h"
29
30 rpa_buffer_t * rpa_buffer_from_wchar(const wchar_t *wstr);
31
32
33 int usage(int argc, const wchar_t *argv[])
34 {
35             fwprintf(stderr, L"RPA Grep with RPA Engine: %s \n", rpa_dbex_version());
36                 fwprintf(stderr, L"Copyright (C) 2010 Martin Stoilov\n\n");
37
38                 fwprintf(stderr, L"Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
39                 fwprintf(stderr, L" OPTIONS:\n");
40                 fwprintf(stderr, L"\t-e patterns              BNF Expression.\n");
41                 fwprintf(stderr, L"\t-f patternfile           Read the BNF rules from a file, the last pattern will be executed.\n");
42                 fwprintf(stderr, L"\t-i                       Ignore case.\n");
43                 fwprintf(stderr, L"\t-m                       Match.\n");
44                 fwprintf(stderr, L"\t-p                       Parse.\n");
45                 fwprintf(stderr, L"\t-l                       Line mode.\n");
46                 fwprintf(stderr, L"\t-16                      Force UTF16 encoding.\n");
47                 fwprintf(stderr, L"\t-b                       Force byte encoding.\n");
48                 fwprintf(stderr, L"\t-d                       Dump a production in a tree format.\n");
49                 fwprintf(stderr, L"\t-t                       Display time elapsed.\n");
50                 fwprintf(stderr, L"\t-L, --list-rules         List all patterns.\n");
51                 fwprintf(stderr, L"\t-v                       Display version information.\n");
52                 fwprintf(stderr, L"\t-h, --help               Display this help.\n");
53                 fwprintf(stderr, L"\t    --debug-compile      Display debug compilation information.\n");
54                 fwprintf(stderr, L"\t    --dump-info          Display rules info.\n");
55                 fwprintf(stderr, L"\t    --dump-code rule     Display compiled code for rule.\n");
56                 fwprintf(stderr, L"\t    --dump-alias         Display alias info.\n");
57                 fwprintf(stderr, L"\t    --dump-records       Display rules parsing records.\n");
58                 fwprintf(stderr, L"\t    --no-optimizations   Disable optimizations.\n");
59                 fwprintf(stderr, L"\t    --exec-debug         Execute in debug mode.\n");
60                 fwprintf(stderr, L"\t    --no-cache           Disable execution cache.\n");
61                 fwprintf(stderr, L"\t    --no-bitmap          Disable expression bitmap use.\n");
62
63                 return 0;
64 }
65
66
67 int wmain(int argc, const wchar_t* argv[])
68 {
69         unsigned long sckb;
70         int ret, scanned = 0;
71         long i;
72         rpa_grep_t *pGrep = NULL;
73         rarray_t *buffers;
74
75         buffers = r_array_create(sizeof(rpa_buffer_t *));
76         pGrep = rpa_grep_create();
77
78         if (argc <= 1) {
79                 usage(argc, argv);
80                 goto end;
81         }
82
83         for (i = 1; i < argc; i++) {
84                 if (wcscmp(argv[i], L"-t") == 0) {
85                         pGrep->showtime = 1;
86                 }
87         }
88
89         for (i = 1; i < argc; i++) {
90                 if (wcscmp(argv[i], L"--help") == 0 || wcscmp(argv[i], L"-help") == 0 || wcscmp(argv[i], L"/?") == 0 || wcscmp(argv[i], L"-h") == 0) {
91                         usage(argc, argv);
92                         goto end;
93                 }
94         }
95
96         for (i = 1; i < argc; i++) {
97                 if (wcscmp(argv[i], L"--no-bitmap") == 0) {
98                         rpa_dbex_cfgset(pGrep->hDbex, RPA_DBEXCFG_BITMAP, 0);
99                 }
100         }
101
102         for (i = 1; i < argc; i++) {
103                 if (wcscmp(argv[i], L"--no-optimizations") == 0) {
104                         rpa_grep_optimizations(pGrep, 0);
105                 }
106         }
107
108         for (i = 1; i < argc; i++) {
109                 if (wcscmp(argv[i], L"-c") == 0) {
110                         if (++i < argc) {
111                                 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
112                                 if (!pattern) {
113                                         goto error;
114                                 }
115                                 rpa_grep_setup_callback(pGrep, pattern);
116                                 rpa_buffer_destroy(pattern);
117                         }
118                 }
119         }
120
121
122         for (i = 1; i < argc; i++) {
123                 if (wcscmp(argv[i], L"-f") == 0) {
124                         if (++i < argc) {
125                                 rpa_buffer_t *pattern = rpa_buffer_map_file(argv[i]);
126                                 if (pattern) {
127                                         ret = rpa_grep_load_pattern(pGrep, pattern);
128                                         r_array_add(buffers, &pattern);
129                                 } else {
130                                         ret = -1;
131                                 }
132                                 if (ret < 0)
133                                         goto error;
134                         }
135                 }
136         }
137
138         for (i = 1; i < argc; i++) {
139                 if (wcscmp(argv[i], L"-e") == 0) {
140                         if (++i < argc) {
141                                 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
142                                 if (!pattern) {
143                                         goto error;
144                                 }
145                                 ret = rpa_grep_load_string_pattern(pGrep, pattern);
146                                 rpa_buffer_destroy(pattern);
147                                 if (ret < 0)
148                                         goto error;
149
150                         }
151                 } 
152         }
153
154
155         for (i = 1; i < argc; i++) {
156                 if (wcscmp(argv[i], L"--dump-code") == 0) {
157                         if (rpa_dbex_compile(pGrep->hDbex) == 0) {
158                                 if (++i < argc) {
159                                         rpa_buffer_t *code = rpa_buffer_from_wchar(argv[i]);
160                                         rpa_dbex_dumpcode(pGrep->hDbex, rpa_dbex_lookup_s(pGrep->hDbex, code->s));
161                                         rpa_buffer_destroy(code);
162                                 }
163                         }
164                         goto end;
165                 }
166         }
167
168
169         for (i = 1; i < argc; i++) {
170                 if (wcscmp(argv[i], L"--dump-info") == 0) {
171                         rpa_grep_dump_pattern_info(pGrep);
172                         goto end;
173                 }
174         }
175
176         for (i = 1; i < argc; i++) {
177                 if (wcscmp(argv[i], L"--debug-compile") == 0) {
178                         rpa_grep_debug_compile(pGrep);
179                         goto end;
180                 }
181         }
182
183         for (i = 1; i < argc; i++) {
184                 if (wcscmp(argv[i], L"--dump-alias") == 0) {
185                         rpa_grep_dump_alias_info(pGrep);
186                         goto end;
187                 }
188         }
189
190         for (i = 1; i < argc; i++) {
191                 if (wcscmp(argv[i], L"--dump-records") == 0) {
192                         rpa_grep_dump_pattern_records(pGrep);
193                         goto end;
194                 }
195         }
196
197         for (i = 1; i < argc; i++) {
198                 if (wcscmp(argv[i], L"--exec-debug") == 0) {
199                         pGrep->execdebug = 1;
200                 }
201         }
202
203         for (i = 1; i < argc; i++) {
204                 if (wcscmp(argv[i], L"--no-cache") == 0) {
205                         pGrep->disablecache = 1;
206                 }
207         }
208
209
210         if (rpa_dbex_compile(pGrep->hDbex) < 0) {
211                 rpa_errinfo_t errinfo;
212                 rpa_dbex_lasterrorinfo(pGrep->hDbex, &errinfo);
213                 if (errinfo.code == RPA_E_UNRESOLVEDSYMBOL) {
214                         fprintf(stdout, "ERROR: Unresolved Symbol: %s\n", errinfo.name);
215                 } else {
216                         fprintf(stdout, "ERROR %ld: Compilation failed.\n", errinfo.code);
217                 }
218                 goto end;
219         }
220
221
222         for (i = 1; i < argc; i++) {
223                 if (wcscmp(argv[i], L"-L") == 0) {
224                         rpa_grep_list_patterns(pGrep);
225                         goto end;
226                 } else if (wcscmp(argv[i], L"-d") == 0) {
227                         if (++i < argc) {
228                                 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
229                                 if (!pattern) {
230                                         goto error;
231                                 }
232                                 rpa_grep_dump_pattern_tree(pGrep, pattern);
233                                 rpa_buffer_destroy(pattern);
234                                 goto end;
235                         }
236                 } else if (wcscmp(argv[i], L"-i") == 0) {
237                         pGrep->icase = 1;
238                 } else if (wcscmp(argv[i], L"-l") == 0) {
239                         pGrep->greptype = RPA_GREPTYPE_SCANLINES;
240                 } else if (wcscmp(argv[i], L"-m") == 0) {
241                         pGrep->greptype = RPA_GREPTYPE_MATCH;
242                 } else if (wcscmp(argv[i], L"-p") == 0) {
243                         pGrep->greptype = RPA_GREPTYPE_PARSE;
244                 } else if (wcscmp(argv[i], L"-a") == 0) {
245                         pGrep->greptype = RPA_GREPTYPE_PARSEAST;
246                 } else if (wcscmp(argv[i], L"-16") == 0) {
247                         pGrep->forceEncoding = RPA_GREP_FORCE_UTF16;
248                 } else if (wcscmp(argv[i], L"-b") == 0) {
249                         pGrep->forceEncoding = RPA_GREP_FORCE_BYTE;
250                 }
251                 
252         }
253
254
255         for (i = 1; i < argc; i++) {
256                 if (wcscmp(argv[i], L"-s") == 0) {
257                         if (++i < argc) {
258                                 rpa_buffer_t *buf = rpa_buffer_from_wchar(argv[i]);
259                                 rpa_grep_scan_buffer(pGrep, buf);
260                                 rpa_buffer_destroy(buf);
261                                 ++scanned;
262                         }
263                 }
264         }
265
266         /* scan files */
267         for (i = 1; i < argc; i++) {
268                 if (argv[i][0] != L'-') {
269                         ++scanned;
270                         rpa_grep_scan_path(pGrep, argv[i]);
271                 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c' || argv[i][1] == L'C'){
272                         ++i;
273                 }
274                 
275         }
276
277         if (!scanned) {
278                 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
279                 if (buf) {
280                         rpa_grep_scan_buffer(pGrep, buf);
281                         rpa_buffer_destroy(buf);
282                 }
283         }
284
285
286         /* scan files */
287         for (i = 1; i < argc; i++) {
288                 if (argv[i][0] != '-') {
289                         ++scanned;
290                         rpa_grep_scan_path(pGrep, argv[i]);
291                 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c'){
292                         ++i;
293                 }
294                 
295         }
296
297         if (!scanned) {
298                 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
299                 if (buf) {
300                         rpa_grep_scan_buffer(pGrep, buf);
301                         rpa_buffer_destroy(buf);
302                 }
303         }
304
305 end:
306         for (i = 0; i < (long)r_array_length(buffers); i++) {
307                 rpa_buffer_destroy(r_array_index(buffers, i, rpa_buffer_t*));
308         }
309         r_object_destroy((robject_t*)buffers);
310         rpa_grep_close(pGrep);
311
312
313         rpa_grep_close(pGrep);
314         sckb = (unsigned long)(pGrep->scsize/1024);
315
316         if (pGrep->showtime) {
317                 unsigned long milsec;
318                 unsigned long minutes;
319                 double sec;
320                 milsec = pGrep->scanmilisec;
321                 if (milsec == 0)
322                         milsec = 1;
323                 minutes = milsec/60000;
324                 sec = (milsec%60000)/1000.0;
325                 fwprintf(stdout, L"\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld \n", 
326                                 minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (long)r_debug_get_maxmem()/1000, (long)r_debug_get_allocmem(),
327                                 pGrep->cachehit);
328         }
329
330         rpa_grep_destroy(pGrep);
331         return 0;
332
333 error:
334         rpa_grep_destroy(pGrep);
335         return 1;
336 }