2 * Regular Pattern Analyzer (RPA)
3 * Copyright (c) 2009-2010 Martin Stoilov
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * Martin Stoilov <martin@rpasearch.com>
24 #include "rlib/rmem.h"
25 #include "rlib/rarray.h"
26 #include "rpa/rpadbex.h"
28 #include "rpagrepdep.h"
30 rpa_buffer_t * rpa_buffer_from_wchar(const wchar_t *wstr);
33 int usage(int argc, const wchar_t *argv[])
35 fwprintf(stderr, L"RPA Grep with RPA Engine: %s \n", rpa_dbex_version());
36 fwprintf(stderr, L"Copyright (C) 2010 Martin Stoilov\n\n");
38 fwprintf(stderr, L"Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
39 fwprintf(stderr, L" OPTIONS:\n");
40 fwprintf(stderr, L"\t-e patterns BNF Expression.\n");
41 fwprintf(stderr, L"\t-f patternfile Read the BNF rules from a file, the last pattern will be executed.\n");
42 fwprintf(stderr, L"\t-i Ignore case.\n");
43 fwprintf(stderr, L"\t-m Match.\n");
44 fwprintf(stderr, L"\t-p Parse.\n");
45 fwprintf(stderr, L"\t-l Line mode.\n");
46 fwprintf(stderr, L"\t-16 Force UTF16 encoding.\n");
47 fwprintf(stderr, L"\t-b Force byte encoding.\n");
48 fwprintf(stderr, L"\t-d Dump a production in a tree format.\n");
49 fwprintf(stderr, L"\t-t Display time elapsed.\n");
50 fwprintf(stderr, L"\t-L, --list-rules List all patterns.\n");
51 fwprintf(stderr, L"\t-v Display version information.\n");
52 fwprintf(stderr, L"\t-h, --help Display this help.\n");
53 fwprintf(stderr, L"\t --debug-compile Display debug compilation information.\n");
54 fwprintf(stderr, L"\t --dump-info Display rules info.\n");
55 fwprintf(stderr, L"\t --dump-code rule Display compiled code for rule.\n");
56 fwprintf(stderr, L"\t --dump-alias Display alias info.\n");
57 fwprintf(stderr, L"\t --dump-records Display rules parsing records.\n");
58 fwprintf(stderr, L"\t --no-optimizations Disable optimizations.\n");
59 fwprintf(stderr, L"\t --exec-debug Execute in debug mode.\n");
60 fwprintf(stderr, L"\t --dissable-cache Dissable execution cache.\n");
66 int wmain(int argc, const wchar_t* argv[])
69 int ret, scanned = 0, i;
70 rpa_grep_t *pGrep = NULL;
71 DWORD eticks, bticks = GetTickCount();
74 buffers = r_array_create(sizeof(rpa_buffer_t *));
75 pGrep = rpa_grep_create();
82 for (i = 1; i < argc; i++) {
83 if (wcscmp(argv[i], L"-t") == 0) {
88 for (i = 1; i < argc; i++) {
89 if (wcscmp(argv[i], L"--help") == 0 || wcscmp(argv[i], L"-help") == 0 || wcscmp(argv[i], L"/?") == 0 || wcscmp(argv[i], L"-h") == 0) {
94 for (i = 1; i < argc; i++) {
95 if (wcscmp(argv[i], L"-c") == 0) {
97 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
101 rpa_grep_setup_callback(pGrep, pattern);
102 rpa_buffer_destroy(pattern);
108 for (i = 1; i < argc; i++) {
109 if (wcscmp(argv[i], L"-f") == 0) {
111 rpa_buffer_t *pattern = rpa_buffer_map_file(argv[i]);
113 ret = rpa_grep_load_pattern(pGrep, pattern);
114 r_array_add(buffers, &pattern);
124 for (i = 1; i < argc; i++) {
125 if (wcscmp(argv[i], L"-e") == 0) {
127 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
131 ret = rpa_grep_load_string_pattern(pGrep, pattern);
132 rpa_buffer_destroy(pattern);
141 for (i = 1; i < argc; i++) {
142 if (wcscmp(argv[i], L"--dump-code") == 0) {
143 if (rpa_dbex_compile(pGrep->hDbex) == 0) {
145 rpa_dbex_dumpcode(pGrep->hDbex, rpa_dbex_lookup_s(pGrep->hDbex, argv[i]));
153 for (i = 1; i < argc; i++) {
154 if (wcscmp(argv[i], L"--dump-info") == 0) {
155 rpa_grep_dump_pattern_info(pGrep);
160 for (i = 1; i < argc; i++) {
161 if (wcscmp(argv[i], L"--debug-compile") == 0) {
162 rpa_grep_debug_compile(pGrep);
168 for (i = 1; i < argc; i++) {
169 if (wcscmp(argv[i], L"--dump-alias") == 0) {
170 rpa_grep_dump_alias_info(pGrep);
175 for (i = 1; i < argc; i++) {
176 if (wcscmp(argv[i], L"--dump-records") == 0) {
177 rpa_grep_dump_pattern_records(pGrep);
182 for (i = 1; i < argc; i++) {
183 if (wcscmp(argv[i], L"--exec-debug") == 0) {
184 pGrep->execdebug = 1;
188 for (i = 1; i < argc; i++) {
189 if (wcscmp(argv[i], L"--dissable-cache") == 0) {
190 pGrep->disablecache = 1;
195 if (rpa_dbex_compile(pGrep->hDbex) < 0) {
196 rpa_errinfo_t errinfo;
197 rpa_dbex_lasterrorinfo(pGrep->hDbex, &errinfo);
198 if (errinfo.code == RPA_E_UNRESOLVEDSYMBOL) {
199 fprintf(stdout, "ERROR: Unresolved Symbol: %s\n", errinfo.name);
201 fprintf(stdout, "ERROR %ld: Compilation failed.\n", errinfo.code);
207 for (i = 1; i < argc; i++) {
208 if (wcscmp(argv[i], L"-L") == 0) {
209 rpa_grep_list_patterns(pGrep);
211 } else if (wcscmp(argv[i], L"-d") == 0) {
213 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
217 rpa_grep_dump_pattern_tree(pGrep, pattern);
218 rpa_buffer_destroy(pattern);
221 } else if (wcscmp(argv[i], L"-i") == 0) {
223 } else if (wcscmp(argv[i], L"-l") == 0) {
224 pGrep->greptype = RPA_GREPTYPE_SCANLINES;
225 } else if (wcscmp(argv[i], L"-m") == 0) {
226 pGrep->greptype = RPA_GREPTYPE_MATCH;
227 } else if (wcscmp(argv[i], L"-p") == 0) {
228 pGrep->greptype = RPA_GREPTYPE_PARSE;
229 } else if (wcscmp(argv[i], L"-a") == 0) {
230 pGrep->greptype = RPA_GREPTYPE_PARSEAST;
231 } else if (wcscmp(argv[i], L"-16") == 0) {
232 pGrep->forceEncoding = RPA_GREP_FORCE_UTF16;
233 } else if (wcscmp(argv[i], L"-b") == 0) {
234 pGrep->forceEncoding = RPA_GREP_FORCE_BYTE;
240 for (i = 1; i < argc; i++) {
241 if (wcscmp(argv[i], L"-s") == 0) {
243 rpa_buffer_t *buf = rpa_buffer_from_wchar(argv[i]);
244 rpa_grep_scan_buffer(pGrep, buf);
245 rpa_buffer_destroy(buf);
252 for (i = 1; i < argc; i++) {
253 if (argv[i][0] != L'-') {
255 rpa_grep_scan_path(pGrep, argv[i]);
256 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c' || argv[i][1] == L'C'){
263 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
265 rpa_grep_scan_buffer(pGrep, buf);
266 rpa_buffer_destroy(buf);
272 for (i = 1; i < argc; i++) {
273 if (argv[i][0] != '-') {
275 rpa_grep_scan_path(pGrep, argv[i]);
276 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c'){
283 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
285 rpa_grep_scan_buffer(pGrep, buf);
286 rpa_buffer_destroy(buf);
291 for (i = 0; i < r_array_length(buffers); i++) {
292 rpa_buffer_destroy(r_array_index(buffers, i, rpa_buffer_t*));
294 r_object_destroy((robject_t*)buffers);
295 rpa_grep_close(pGrep);
298 rpa_grep_close(pGrep);
299 sckb = (unsigned long)(pGrep->scsize/1024);
301 if (pGrep->showtime) {
302 unsigned long milsec;
303 unsigned long minutes;
305 milsec = pGrep->scanmilisec;
308 minutes = milsec/60000;
309 sec = (milsec%60000)/1000.0;
310 fwprintf(stdout, L"\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld \n",
311 minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (long)r_debug_get_maxmem()/1000, (long)r_debug_get_allocmem(),
315 rpa_grep_destroy(pGrep);
319 rpa_grep_destroy(pGrep);