RPA Toolkit
Work on documentation.
[rpatk.git] / rexgrep / unix / main.c
1 /*
2  *  Regular Pattern Analyzer Toolkit (RPA/Tk)
3  *  Copyright (c) 2009-2012 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include <stdio.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <wchar.h>
31 #include <time.h>
32 #include <errno.h>
33 #include "rlib/rmem.h"
34 #include "rlib/rarray.h"
35 #include "rex/rexdfaconv.h"
36 #include "rex/rexdfa.h"
37 #include "rexgrep.h"
38 #include "rexgrepdep.h"
39
40
41 int usage(int argc, const char *argv[])
42 {
43                 fprintf(stderr, "REX Grep - using library version: %s \n", rex_db_version());
44                 fprintf(stderr, "Copyright (C) 2012 Martin Stoilov\n\n");
45
46                 fprintf(stderr, "Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
47                 fprintf(stderr, " OPTIONS:\n");
48                 fprintf(stderr, "\t-e patterns              Regular Expression.\n");
49                 fprintf(stderr, "\t-f patternfile           Read Regular Expressions from a file.\n");
50                 fprintf(stderr, "\t-b binfile               Use DFA from binfile.\n");
51                 fprintf(stderr, "\t-c                       Compile DFA and save to binfile. Use -b option to specify the name of the file.\n");
52                 fprintf(stderr, "\t-o, --only-matching      Show only the part of a line matching PATTERN\n");
53                 fprintf(stderr, "\t-l                       Line mode.\n");
54                 fprintf(stderr, "\t-N                       Use NFA.\n");
55                 fprintf(stderr, "\t-D                       Dump states.\n");
56                 fprintf(stderr, "\t-S                       Include DFA substates.\n");
57                 fprintf(stderr, "\t-q                       Quiet mode.\n");
58                 fprintf(stderr, "\t-t                       Display statistics. Works only when built in DEBUG mode.\n");
59                 fprintf(stderr, "\t-s string                Search in string.\n");
60                 fprintf(stderr, "\t-v                       Display version information.\n");
61                 fprintf(stderr, "\t-h, --help               Display this help.\n");
62                 
63                 return 0;
64 }
65
66
67 int grep_buffer_realloc(rbuffer_t *buffer, unsigned long size)
68 {
69         char *s;
70
71         s = (char *)r_realloc(buffer->s, size);
72         if (!s)
73                 return -1;
74         buffer->s = s;
75         buffer->size = size;
76         return 0;
77
78 }
79
80
81 rbuffer_t *grep_buffer_loadfile(FILE *pFile)
82 {
83         unsigned long memchunk = 256;
84         long ret = 0, inputsize = 0;
85         rbuffer_t *buf;
86
87         buf = r_buffer_create(2 * memchunk);
88         if (!buf)
89                 return (void*)0;
90
91         do {
92                 if ((buf->size - inputsize) < memchunk) {
93                         if (grep_buffer_realloc(buf, buf->size + memchunk) < 0) {
94                                 fprintf(stderr, "Out of memory!\n");
95                                 exit(1);
96                         }
97                 }
98                 ret = (long)fread(&buf->s[inputsize], 1, memchunk - 1, pFile);
99                 if ((ret <= 0) && ferror(pFile)) {
100                         r_buffer_destroy(buf);
101                         return (void*)0;
102                 }
103                 inputsize += ret;
104                 buf->s[inputsize] = '\0';
105                 buf->size = inputsize;
106         } while (!feof(pFile));
107
108         return buf;
109 }
110
111
112 #define REXGREP_BINOP_NONE 0
113 #define REXGREP_BINOP_READ 1
114 #define REXGREP_BINOP_WRITE 2
115
116
117 int main(int argc, const char *argv[])
118 {
119         int ret, scanned = 0, i;
120         rexgrep_t *pGrep;
121         rarray_t *buffers;
122         const char *binfile = NULL;
123         int binop = REXGREP_BINOP_NONE;
124         FILE *devnull = NULL;
125
126         buffers = r_array_create(sizeof(rbuffer_t *));
127         pGrep = rex_grep_create();
128         pGrep->greptype = REX_GREPTYPE_SCANLINES;
129         pGrep->usedfa = 1;
130         if (argc <= 1) {
131                 usage(argc, argv);
132                 goto end;
133         }
134
135         for (i = 1; i < argc; i++) {
136                 if (strcmp(argv[i], "-t") == 0) {
137                         pGrep->showtime = 1;
138                 }
139         }
140
141         for (i = 1; i < argc; i++) {
142                 if (strcmp(argv[i], "-S") == 0) {
143                         pGrep->withsubstates = 1;
144                 }
145         }
146
147         for (i = 1; i < argc; i++) {
148                 if (strcmp(argv[i], "-H") == 0 || strcmp(argv[i], "--with-filename") == 0) {
149                         pGrep->showfilename = 1;
150                 }
151         }
152
153         for (i = 1; i < argc; i++) {
154                 if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "/?") == 0 || strcmp(argv[i], "-h") == 0) {
155                         usage(argc, argv);
156                         goto end;
157                 }
158         }
159
160         for (i = 1; i < argc; i++) {
161                 if (strcmp(argv[i], "-v") == 0) {
162                         fprintf(stderr, "REX Grep with REX Engine: %s\n", rex_db_version());
163                         goto end;
164                 }
165         }
166
167         for (i = 1; i < argc; i++) {
168                 if (strcmp(argv[i], "-b") == 0) {
169                         if (++i < argc) {
170                                 binfile = argv[i];
171                                 binop = REXGREP_BINOP_READ;
172                         }
173                 }
174         }
175
176         for (i = 1; i < argc; i++) {
177                 if (strcmp(argv[i], "-c") == 0) {
178                         binop = REXGREP_BINOP_WRITE;
179                         if (!binfile)
180                                 binfile = "rex.bin";
181                 }
182         }
183
184         if (binop != REXGREP_BINOP_READ) {
185                 for (i = 1; i < argc; i++) {
186                         if (strcmp(argv[i], "-f") == 0) {
187                                 if (++i < argc) {
188                                         rbuffer_t *pattern = rex_buffer_map_file(argv[i]);
189                                         if (pattern) {
190                                                 ret = rex_grep_load_pattern(pGrep, pattern);
191                                                 r_array_add(buffers, &pattern);
192                                         } else {
193                                                 ret = -1;
194                                         }
195                                         if (ret < 0)
196                                                 goto error;
197                                 }
198                         }
199                 }
200                 for (i = 1; i < argc; i++) {
201                         if (strcmp(argv[i], "-e") == 0) {
202                                 if (++i < argc) {
203                                         rbuffer_t pattern;
204                                         pattern.s = (char*)argv[i];
205                                         pattern.size = strlen(argv[i]);
206                                         ret = rex_grep_load_string_pattern(pGrep, &pattern);
207                                         if (ret < 0)
208                                                 goto error;
209                                 }
210
211                         }
212                 }
213                 for (i = 1; i < argc; i++) {
214                         if (strcmp(argv[i], "-N") == 0) {
215                                 pGrep->usedfa = 0;
216                         }
217                 }
218         }
219
220         for (i = 1; i < argc; i++) {
221                 if (strcmp(argv[i], "-l") == 0) {
222                         pGrep->greptype = REX_GREPTYPE_SCANLINES;
223                 } else if (strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--only-matching") == 0) {
224                         pGrep->greptype = REX_GREPTYPE_MATCH;
225                 } else if (strcmp(argv[i], "-q") == 0) {
226                         devnull = fopen("/dev/null", "w");
227                         stdout = devnull;
228                 }
229         }
230
231         if (!pGrep->dfa && binop == REXGREP_BINOP_READ) {
232                 FILE *pfile = NULL;
233                 rexdfa_t dfa;
234                 r_memset(&dfa, 0, sizeof(dfa));
235                 pfile = fopen(binfile, "rb");
236                 if (!pfile) {
237                         fprintf(stderr, "Failed to open file: %s, %s\n", binfile, strerror(errno));
238                         goto error;
239                 }
240                 if (fread(&dfa, sizeof(dfa), 1, pfile) != 1)
241                         goto error;
242                 pGrep->dfa = rex_dfa_create(dfa.nstates, dfa.ntrans, dfa.naccsubstates, dfa.nsubstates);
243                 if (fread(pGrep->dfa->states, sizeof(*dfa.states), dfa.nstates, pfile) != dfa.nstates)
244                         goto error;
245                 if (fread(pGrep->dfa->trans, sizeof(*dfa.trans), dfa.ntrans, pfile) != dfa.ntrans)
246                         goto error;
247                 if (fread(pGrep->dfa->accsubstates, sizeof(*dfa.accsubstates), dfa.naccsubstates, pfile) != dfa.naccsubstates)
248                         goto error;
249                 if (fread(pGrep->dfa->substates, sizeof(*dfa.substates), dfa.nsubstates, pfile) != dfa.nsubstates)
250                         goto error;
251                 fclose(pfile);
252         }
253
254         if (!pGrep->dfa && !rex_db_isempty(pGrep->nfa) && pGrep->usedfa) {
255                 rexdb_t *dfadb = rex_db_createdfa(pGrep->nfa, pGrep->startuid);
256                 pGrep->dfa = rex_db_todfa(dfadb, pGrep->withsubstates);
257                 rex_db_destroy(dfadb);
258         }
259
260         for (i = 1; i < argc; i++) {
261                 if (strcmp(argv[i], "-D") == 0) {
262                         int j;
263                         if (pGrep->dfa) {
264                                 for (j = 0; j < pGrep->dfa->nstates; j++) {
265                                         rex_dfa_dumpstate(pGrep->dfa, j);
266                                 }
267                         } else if (pGrep->nfa) {
268                                 rexdb_t *db = pGrep->nfa;
269                                 for (j = 0; j < r_array_length(db->states); j++) {
270                                         rex_db_dumpstate(db, j);
271                                 }
272                         }
273                         goto end;
274                 }
275         }
276
277         if (pGrep->dfa && binop == REXGREP_BINOP_WRITE) {
278                 rexdfa_t dfa = *pGrep->dfa;
279                 FILE *pfile = fopen(binfile, "wb");
280                 dfa.substates = NULL;
281                 dfa.states = NULL;
282                 dfa.trans = NULL;
283                 dfa.accsubstates = NULL;
284                 if (!pfile) {
285                         fprintf(stderr, "Failed to create file: %s, %s\n", binfile, strerror(errno));
286                         goto error;
287                 }
288                 fwrite(&dfa, sizeof(dfa), 1, pfile);
289                 dfa.states = pGrep->dfa->states;
290                 dfa.trans = pGrep->dfa->trans;
291                 dfa.accsubstates = pGrep->dfa->accsubstates;
292                 dfa.substates = pGrep->dfa->substates;
293                 fwrite(dfa.states, sizeof(*dfa.states), dfa.nstates, pfile);
294                 fwrite(dfa.trans, sizeof(*dfa.trans), dfa.ntrans, pfile);
295                 fwrite(dfa.accsubstates, sizeof(*dfa.accsubstates), dfa.naccsubstates, pfile);
296                 fwrite(dfa.substates, sizeof(*dfa.substates), dfa.nsubstates, pfile);
297                 fclose(pfile);
298                 goto end;
299         }
300         if (!pGrep->dfa && pGrep->usedfa)
301                 goto end;
302         if (rex_db_isempty(pGrep->nfa) && !pGrep->usedfa)
303                 goto end;
304         for (i = 1; i < argc; i++) {
305                 if (strcmp(argv[i], "-s") == 0) {
306                         if (++i < argc) {
307                                 rbuffer_t buf;
308                                 buf.s = (char*)argv[i];
309                                 buf.size = r_strlen(argv[i]);
310                                 rex_grep_scan_buffer(pGrep, &buf);
311                                 ++scanned;
312                         }
313                 }
314         }
315
316         /* scan files */
317         for (i = 1; i < argc; i++) {
318                 if (argv[i][0] != '-') {
319                         ++scanned;
320                         rex_grep_scan_path(pGrep, argv[i]);
321                 } else if (argv[i][1] == 'e' || argv[i][1] == 'f' || argv[i][1] == 'b'){
322                         ++i;
323                 }
324                 
325         }
326
327         if (!scanned) {
328                 rbuffer_t *buf = grep_buffer_loadfile(stdin);
329                 if (buf) {
330                         rex_grep_scan_buffer(pGrep, buf);
331                         r_buffer_destroy(buf);
332                 }
333         }
334
335 end:
336         for (i = 0; i < r_array_length(buffers); i++) {
337                 r_buffer_destroy(r_array_index(buffers, i, rbuffer_t*));
338         }
339         r_object_destroy((robject_t*)buffers);
340         ret = pGrep->ret;
341         if (pGrep->showtime && pGrep->dfa) {
342                 rexdfa_t *dfa = pGrep->dfa;
343                 unsigned long sizestates = dfa->nstates * sizeof(rexdfs_t);
344                 unsigned long sizetrans = dfa->ntrans * sizeof(rexdft_t);
345                 unsigned long sizeaccsubs = dfa->naccsubstates * sizeof(rexdfss_t);
346                 unsigned long sizesubs = dfa->nsubstates * sizeof(rexdfss_t);
347                 unsigned long sizetotal = sizestates + sizetrans + sizeaccsubs + sizesubs;
348                 fprintf(stdout, "\n\n");
349                 fprintf(stdout, "\tDFA Memory: %ld KB, States: %ld KB (%.2f), Transitions: %ld KB (%.2f), Accecpting Substates: %ld KB(%.2f), Substates: %ld KB (%.2f)\n",
350                                 sizetotal/1024, sizestates/1024, (100.0*sizestates/sizetotal), sizetrans/1024, (100.0*sizetrans/sizetotal),
351                                 sizeaccsubs/1024, (100.0*sizeaccsubs/sizetotal), sizesubs/1024, (100.0*sizesubs/sizetotal));
352         }
353         rex_grep_destroy(pGrep);
354         if (pGrep->showtime) {
355                 fprintf(stdout, "\tmemory: %ld KB (leaked %ld Bytes)\n", (long)r_debug_get_maxmem()/1024, (long)r_debug_get_allocmem());
356         }
357
358         if (devnull)
359                 fclose(devnull);
360         return ret;
361
362 error:
363         if (devnull)
364                 fclose(devnull);
365         rex_grep_destroy(pGrep);
366         return 2;
367 }