RPA Toolkit
Removed RMV_ prefix from the opcode display names.
[rpatk.git] / rgrep / unix / main.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include <stdio.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <wchar.h>
31 #include <time.h>
32 #include "rlib/rmem.h"
33 #include "rlib/rarray.h"
34 #include "rpa/rpadbex.h"
35 #include "rpagrep.h"
36 #include "rpagrepdep.h"
37
38
39 int usage(int argc, const char *argv[])
40 {
41             fprintf(stderr, "RPA Grep with RPA Engine: %s \n", rpa_dbex_version());
42                 fprintf(stderr, "Copyright (C) 2010 Martin Stoilov\n\n");
43
44                 fprintf(stderr, "Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
45                 fprintf(stderr, " OPTIONS:\n");
46                 fprintf(stderr, "\t-e patterns              BNF Expression.\n");
47                 fprintf(stderr, "\t-f patternfile           Read the BNF rules from a file, the last pattern will be executed.\n");
48                 fprintf(stderr, "\t-i                       Ignore case.\n");
49                 fprintf(stderr, "\t-m                       Match.\n");
50                 fprintf(stderr, "\t-p                       Parse.\n");
51                 fprintf(stderr, "\t-l                       Line mode.\n");
52                 fprintf(stderr, "\t-16                      Force UTF16 encoding.\n");
53                 fprintf(stderr, "\t-b                       Force byte encoding.\n");
54                 fprintf(stderr, "\t-d                       Dump a production in a tree format.\n");
55                 fprintf(stderr, "\t-t                       Display time elapsed.\n");
56                 fprintf(stderr, "\t-L, --list-rules         List all patterns.\n");
57                 fprintf(stderr, "\t-v                       Display version information.\n");
58                 fprintf(stderr, "\t-h, --help               Display this help.\n");
59                 fprintf(stderr, "\t    --debug-compile      Display debug compilation information.\n");
60                 fprintf(stderr, "\t    --dump-info          Display rules info.\n");
61                 fprintf(stderr, "\t    --dump-code rule     Display compiled code for rule.\n");
62                 fprintf(stderr, "\t    --dump-alias         Display alias info.\n");
63                 fprintf(stderr, "\t    --dump-records       Display rules parsing records.\n");
64                 fprintf(stderr, "\t    --no-optimizations   Disable optimizations.\n");
65                 fprintf(stderr, "\t    --exec-debug         Execute in debug mode.\n");
66                 fprintf(stderr, "\t    --dissable-cache     Dissable execution cache.\n");
67                 
68                 return 0;
69 }
70
71
72 int main(int argc, const char *argv[])
73 {
74         unsigned long sckb = 0;
75         int ret, scanned = 0, i;
76         rpa_grep_t *pGrep;
77         rarray_t *buffers;
78
79         buffers = r_array_create(sizeof(rpa_buffer_t *));
80         pGrep = rpa_grep_create();
81         if (argc <= 1) {
82                 usage(argc, argv);
83                 goto end;
84         }
85
86         for (i = 1; i < argc; i++) {
87                 if (strcmp(argv[i], "-t") == 0) {
88                         pGrep->showtime = 1;
89                 }
90         }
91
92         for (i = 1; i < argc; i++) {
93                 if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "/?") == 0 || strcmp(argv[i], "-h") == 0) {
94                         usage(argc, argv);
95                         goto end;
96                 }
97         }
98
99         for (i = 1; i < argc; i++) {
100                 if (strcmp(argv[i], "-v") == 0) {
101                         fprintf(stderr, "RPA Grep with RPA Engine: %s\n", rpa_dbex_version());
102                         goto end;
103                 }
104         }
105
106         for (i = 1; i < argc; i++) {
107                 if (strcmp(argv[i], "--no-optimizations") == 0) {
108                         rpa_grep_optimizations(pGrep, 0);
109                 }
110         }
111
112         for (i = 1; i < argc; i++) {
113                 if (strcmp(argv[i], "-f") == 0) {
114                         if (++i < argc) {
115                                 rpa_buffer_t *pattern = rpa_buffer_map_file(argv[i]);
116                                 if (pattern) {
117                                         ret = rpa_grep_load_pattern(pGrep, pattern);
118                                         r_array_add(buffers, &pattern);
119                                 } else {
120                                         ret = -1;
121                                 }
122                                 if (ret < 0)
123                                         goto error;
124                         }
125                 }
126         }
127
128         for (i = 1; i < argc; i++) {
129                 if (strcmp(argv[i], "-e") == 0) {
130                         if (++i < argc) {
131                                 rpa_buffer_t pattern;
132                                 pattern.s = (char*)argv[i];
133                                 pattern.size = strlen(argv[i]);
134                                 ret = rpa_grep_load_string_pattern(pGrep, &pattern);
135                                 if (ret < 0)
136                                         goto error;
137                         }
138                         
139                 }
140         }
141
142         for (i = 1; i < argc; i++) {
143                 if (strcmp(argv[i], "--dump-code") == 0) {
144                         if (rpa_dbex_compile(pGrep->hDbex) == 0) {
145                                 if (++i < argc) {
146                                         rpa_dbex_dumpcode(pGrep->hDbex, rpa_dbex_lookup_s(pGrep->hDbex, argv[i]));
147                                 }
148                         }
149                         goto end;
150                 }
151         }
152
153
154         for (i = 1; i < argc; i++) {
155                 if (strcmp(argv[i], "--dump-info") == 0) {
156                         rpa_grep_dump_pattern_info(pGrep);
157                         goto end;
158                 }
159         }
160
161         for (i = 1; i < argc; i++) {
162                 if (strcmp(argv[i], "--debug-compile") == 0) {
163                         rpa_grep_debug_compile(pGrep);
164                         goto end;
165                 }
166         }
167
168
169         for (i = 1; i < argc; i++) {
170                 if (strcmp(argv[i], "--dump-alias") == 0) {
171                         rpa_grep_dump_alias_info(pGrep);
172                         goto end;
173                 }
174         }
175
176         for (i = 1; i < argc; i++) {
177                 if (strcmp(argv[i], "--dump-records") == 0) {
178                         rpa_grep_dump_pattern_records(pGrep);
179                         goto end;
180                 }
181         }
182
183         for (i = 1; i < argc; i++) {
184                 if (strcmp(argv[i], "--exec-debug") == 0) {
185                         pGrep->execdebug = 1;
186                 }
187         }
188
189         for (i = 1; i < argc; i++) {
190                 if (strcmp(argv[i], "--dissable-cache") == 0) {
191                         pGrep->disablecache = 1;
192                 }
193         }
194
195
196         if (rpa_dbex_compile(pGrep->hDbex) < 0) {
197                 rpa_errinfo_t errinfo;
198                 rpa_dbex_lasterrorinfo(pGrep->hDbex, &errinfo);
199                 if (errinfo.code == RPA_E_UNRESOLVEDSYMBOL) {
200                         fprintf(stdout, "ERROR: Unresolved Symbol: %s\n", errinfo.name);
201                 } else {
202                         fprintf(stdout, "ERROR %ld: Compilation failed.\n", errinfo.code);
203                 }
204                 goto end;
205         }
206
207
208         for (i = 1; i < argc; i++) {
209                 if (strcmp(argv[i], "--dump-records") == 0) {
210                         rpa_grep_dump_pattern_records(pGrep);
211                         goto end;
212                 } else if (strcmp(argv[i], "--dump-info") == 0) {
213                         rpa_grep_dump_pattern_info(pGrep);
214                         goto end;
215                 } else if (strcmp(argv[i], "-L") == 0 || strcmp(argv[i], "--list-rules") == 0) {
216                         rpa_grep_list_patterns(pGrep);
217                         goto end;
218                 } else if (strcmp(argv[i], "-d") == 0) {
219                         if (++i < argc) {
220                                 if (argv[i]) {
221                                         rpa_buffer_t pattern;
222                                         pattern.s = (char*)argv[i];
223                                         pattern.size = strlen(argv[i]);                                 
224                                         rpa_grep_dump_pattern_tree(pGrep, &pattern);
225                                         goto end;
226                                 }
227                         }
228                 } else if (strcmp(argv[i], "-i") == 0) {
229                         pGrep->icase = 1;
230                 } else if (strcmp(argv[i], "-l") == 0) {
231                         pGrep->greptype = RPA_GREPTYPE_SCANLINES;
232                 } else if (strcmp(argv[i], "-m") == 0) {
233                         pGrep->greptype = RPA_GREPTYPE_MATCH;
234                 } else if (strcmp(argv[i], "-p") == 0) {
235                         pGrep->greptype = RPA_GREPTYPE_PARSE;
236                 } else if (strcmp(argv[i], "-a") == 0) {
237                         pGrep->greptype = RPA_GREPTYPE_PARSEAST;
238                 } else if (strcmp(argv[i], "-16") == 0) {
239                         pGrep->forceEncoding = RPA_GREP_FORCE_UTF16;
240                 } else if (strcmp(argv[i], "-b") == 0) {
241                         pGrep->forceEncoding = RPA_GREP_FORCE_BYTE;
242                 }
243                 
244         }
245
246
247         for (i = 1; i < argc; i++) {
248                 if (strcmp(argv[i], "-s") == 0) {
249                         if (++i < argc) {
250                                 rpa_buffer_t buf;
251                                 buf.s = (char*)argv[i];
252                                 buf.size = r_strlen(argv[i]);
253                                 rpa_grep_scan_buffer(pGrep, &buf);
254                                 ++scanned;
255                         }
256                 }
257         }
258
259         /* scan files */
260         for (i = 1; i < argc; i++) {
261                 if (argv[i][0] != '-') {
262                         ++scanned;
263                         rpa_grep_scan_path(pGrep, argv[i]);
264                 } else if (argv[i][1] == 'e' || argv[i][1] == 'f' || argv[i][1] == 'c' || argv[i][1] == 'C'){
265                         ++i;
266                 }
267                 
268         }
269
270         if (!scanned) {
271                 rpa_buffer_t *buf = rpa_buffer_loadfile(stdin);
272                 if (buf) {
273                         rpa_grep_scan_buffer(pGrep, buf);
274                         rpa_buffer_destroy(buf);
275                 }
276         }
277
278 end:
279         for (i = 0; i < r_array_length(buffers); i++) {
280                 rpa_buffer_destroy(r_array_index(buffers, i, rpa_buffer_t*));
281         }
282         r_object_destroy((robject_t*)buffers);
283         rpa_grep_close(pGrep);
284         if (pGrep->showtime) {
285                 sckb = (unsigned long)(pGrep->scsize/1024);
286                 unsigned long milsec;
287                 unsigned long minutes;
288                 float sec;
289                 milsec = pGrep->scanmilisec;
290                 if (milsec == 0)
291                         milsec = 1;
292                 minutes = milsec/60000;
293                 sec = (milsec%60000)/1000.0;
294                 fprintf(stdout, "\ntime: %0ldm%1.3fs, %ld KB (%ld KB/sec), stack: %ld KB, memory: %ld KB (leaked %ld Bytes), cachehit: %ld \n",
295                                 minutes, sec, sckb, 1000*sckb/milsec, pGrep->usedstack / 1000, (long)r_debug_get_maxmem()/1000, (long)r_debug_get_allocmem(),
296                                 pGrep->cachehit);
297         }
298
299         rpa_grep_destroy(pGrep);
300         return 0;
301
302 error:
303         rpa_grep_destroy(pGrep);
304         return 1;
305 }
306
307