2 * Regular Pattern Analyzer (RPA)
3 * Copyright (c) 2009-2010 Martin Stoilov
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * Martin Stoilov <martin@rpasearch.com>
24 #include "rlib/rmem.h"
25 #include "rlib/rarray.h"
26 #include "rpa/rpadbex.h"
28 #include "rpagrepdep.h"
30 rpa_buffer_t * rpa_buffer_from_wchar(const wchar_t *wstr);
33 int usage(int argc, const wchar_t *argv[])
35 fwprintf(stderr, L"RPA Grep with RPA Engine: %s \n", rpa_dbex_version());
36 fwprintf(stderr, L"Copyright (C) 2010 Martin Stoilov\n\n");
38 fwprintf(stderr, L"Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
39 fwprintf(stderr, L" OPTIONS:\n");
40 fwprintf(stderr, L"\t-e patterns BNF Expression.\n");
41 fwprintf(stderr, L"\t-f patternfile Read the BNF rules from a file, the last pattern will be executed.\n");
42 fwprintf(stderr, L"\t-i Ignore case.\n");
43 fwprintf(stderr, L"\t-m Match.\n");
44 fwprintf(stderr, L"\t-p Parse.\n");
45 fwprintf(stderr, L"\t-l Line mode.\n");
46 fwprintf(stderr, L"\t-16 Force UTF16 encoding.\n");
47 fwprintf(stderr, L"\t-b Force byte encoding.\n");
48 fwprintf(stderr, L"\t-d Dump a production in a tree format.\n");
49 fwprintf(stderr, L"\t-t Display time elapsed.\n");
50 fwprintf(stderr, L"\t-L, --list-rules List all patterns.\n");
51 fwprintf(stderr, L"\t-v Display version information.\n");
52 fwprintf(stderr, L"\t-h, --help Display this help.\n");
53 fwprintf(stderr, L"\t --debug-compile Display debug compilation information.\n");
54 fwprintf(stderr, L"\t --dump-info Display rules info.\n");
55 fwprintf(stderr, L"\t --dump-code rule Display compiled code for rule.\n");
56 fwprintf(stderr, L"\t --dump-alias Display alias info.\n");
57 fwprintf(stderr, L"\t --dump-records Display rules parsing records.\n");
58 fwprintf(stderr, L"\t --no-optimizations Disable optimizations.\n");
59 fwprintf(stderr, L"\t --exec-debug Execute in debug mode.\n");
60 fwprintf(stderr, L"\t --no-cache Disable execution cache.\n");
61 fwprintf(stderr, L"\t --no-bitmap Disable expression bitmap use.\n");
67 int wmain(int argc, const wchar_t* argv[])
70 int ret, scanned = 0, i;
71 rpa_grep_t *pGrep = NULL;
72 DWORD eticks, bticks = GetTickCount();
75 buffers = r_array_create(sizeof(rpa_buffer_t *));
76 pGrep = rpa_grep_create();
83 for (i = 1; i < argc; i++) {
84 if (wcscmp(argv[i], L"-t") == 0) {
89 for (i = 1; i < argc; i++) {
90 if (wcscmp(argv[i], L"--help") == 0 || wcscmp(argv[i], L"-help") == 0 || wcscmp(argv[i], L"/?") == 0 || wcscmp(argv[i], L"-h") == 0) {
96 for (i = 1; i < argc; i++) {
97 if (wcscmp(argv[i], L"--no-bitmap") == 0) {
98 rpa_dbex_cfgset(pGrep->hDbex, RPA_DBEXCFG_BITMAP, 0);
102 for (i = 1; i < argc; i++) {
103 if (wstrcmp(argv[i], L"--no-optimizations") == 0) {
104 rpa_grep_optimizations(pGrep, 0);
108 for (i = 1; i < argc; i++) {
109 if (wcscmp(argv[i], L"-c") == 0) {
111 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
115 rpa_grep_setup_callback(pGrep, pattern);
116 rpa_buffer_destroy(pattern);
122 for (i = 1; i < argc; i++) {
123 if (wcscmp(argv[i], L"-f") == 0) {
125 rpa_buffer_t *pattern = rpa_buffer_map_file(argv[i]);
127 ret = rpa_grep_load_pattern(pGrep, pattern);
128 r_array_add(buffers, &pattern);
138 for (i = 1; i < argc; i++) {
139 if (wcscmp(argv[i], L"-e") == 0) {
141 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
145 ret = rpa_grep_load_string_pattern(pGrep, pattern);
146 rpa_buffer_destroy(pattern);
155 for (i = 1; i < argc; i++) {
156 if (wcscmp(argv[i], L"--dump-code") == 0) {
157 if (rpa_dbex_compile(pGrep->hDbex) == 0) {
159 rpa_dbex_dumpcode(pGrep->hDbex, rpa_dbex_lookup_s(pGrep->hDbex, argv[i]));
167 for (i = 1; i < argc; i++) {
168 if (wcscmp(argv[i], L"--dump-info") == 0) {
169 rpa_grep_dump_pattern_info(pGrep);
174 for (i = 1; i < argc; i++) {
175 if (wcscmp(argv[i], L"--debug-compile") == 0) {
176 rpa_grep_debug_compile(pGrep);
181 for (i = 1; i < argc; i++) {
182 if (wcscmp(argv[i], L"--dump-alias") == 0) {
183 rpa_grep_dump_alias_info(pGrep);
188 for (i = 1; i < argc; i++) {
189 if (wcscmp(argv[i], L"--dump-records") == 0) {
190 rpa_grep_dump_pattern_records(pGrep);
195 for (i = 1; i < argc; i++) {
196 if (wcscmp(argv[i], L"--exec-debug") == 0) {
197 pGrep->execdebug = 1;
201 for (i = 1; i < argc; i++) {
202 if (wcscmp(argv[i], L"--no-cache") == 0) {
203 pGrep->disablecache = 1;
208 if (rpa_dbex_compile(pGrep->hDbex) < 0) {
209 rpa_errinfo_t errinfo;
210 rpa_dbex_lasterrorinfo(pGrep->hDbex, &errinfo);
211 if (errinfo.code == RPA_E_UNRESOLVEDSYMBOL) {
212 fprintf(stdout, "ERROR: Unresolved Symbol: %s\n", errinfo.name);
214 fprintf(stdout, "ERROR %ld: Compilation failed.\n", errinfo.code);
220 for (i = 1; i < argc; i++) {
221 if (wcscmp(argv[i], L"-L") == 0) {
222 rpa_grep_list_patterns(pGrep);
224 } else if (wcscmp(argv[i], L"-d") == 0) {
226 rpa_buffer_t *pattern = rpa_buffer_from_wchar(argv[i]);
230 rpa_grep_dump_pattern_tree(pGrep, pattern);
231 rpa_buffer_destroy(pattern);
234 } else if (wcscmp(argv[i], L"-i") == 0) {
236 } else if (wcscmp(argv[i], L"-l") == 0) {
237 pGrep->greptype = RPA_GREPTYPE_SCANLINES;
238 } else if (wcscmp(argv[i], L"-m") == 0) {
239 pGrep->greptype = RPA_GREPTYPE_MATCH;
240 } else if (wcscmp(argv[i], L"-p") == 0) {
241 pGrep->greptype = RPA_GREPTYPE_PARSE;
242 } else if (wcscmp(argv[i], L"-a") == 0) {
243 pGrep->greptype = RPA_GREPTYPE_PARSEAST;
244 } else if (wcscmp(argv[i], L"-16") == 0) {
245 pGrep->forceEncoding = RPA_GREP_FORCE_UTF16;
246 } else if (wcscmp(argv[i], L"-b") == 0) {
247 pGrep->forceEncoding = RPA_GREP_FORCE_BYTE;
253 for (i = 1; i < argc; i++) {
254 if (wcscmp(argv[i], L"-s") == 0) {
256 rpa_buffer_t *buf = rpa_buffer_from_wchar(argv[i]);
257 rpa_grep_scan_buffer(pGrep, buf);
258 rpa_buffer_destroy(buf);
265 for (i = 1; i < argc; i++) {
266 if (argv[i][0] != L'-') {
268 rpa_grep_scan_path(pGrep, argv[i]);
269 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c' || argv[i][1] == L'C'){
276 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
278 rpa_grep_scan_buffer(pGrep, buf);
279 rpa_buffer_destroy(buf);
285 for (i = 1; i < argc; i++) {
286 if (argv[i][0] != '-') {
288 rpa_grep_scan_path(pGrep, argv[i]);
289 } else if (argv[i][1] == L'e' || argv[i][1] == L'f' || argv[i][1] == L'c'){
296 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
298 rpa_grep_scan_buffer(pGrep, buf);
299 rpa_buffer_destroy(buf);
304 for (i = 0; i < r_array_length(buffers); i++) {
305 rpa_buffer_destroy(r_array_index(buffers, i, rpa_buffer_t*));
307 r_object_destroy((robject_t*)buffers);
308 rpa_grep_close(pGrep);
311 rpa_grep_close(pGrep);
312 sckb = (unsigned long)(pGrep->scsize/1024);
314 if (pGrep->showtime) {
315 unsigned long milsec;
316 unsigned long minutes;
318 milsec = pGrep->scanmilisec;
321 minutes = milsec/60000;
322 sec = (milsec%60000)/1000.0;
323 fwprintf(stdout, L"\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld \n",
324 minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (long)r_debug_get_maxmem()/1000, (long)r_debug_get_allocmem(),
328 rpa_grep_destroy(pGrep);
332 rpa_grep_destroy(pGrep);