2 * Regular Pattern Analyzer Toolkit (RPA/Tk)
3 * Copyright (c) 2009-2012 Martin Stoilov
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 * Martin Stoilov <martin@rpasearch.com>
22 #include <sys/types.h>
24 #include <sys/types.h>
33 #include "rlib/rmem.h"
34 #include "rlib/rarray.h"
35 #include "rex/rexdfaconv.h"
36 #include "rex/rexdfa.h"
38 #include "rexgrepdep.h"
41 int usage(int argc, const char *argv[])
43 fprintf(stderr, "REX Grep - using library version: %s \n", rex_db_version());
44 fprintf(stderr, "Copyright (C) 2012 Martin Stoilov\n\n");
46 fprintf(stderr, "Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
47 fprintf(stderr, " OPTIONS:\n");
48 fprintf(stderr, "\t-e patterns Regular Expression.\n");
49 fprintf(stderr, "\t-f patternfile Read Regular Expressions from a file.\n");
50 fprintf(stderr, "\t-b binfile Use DFA from binfile.\n");
51 fprintf(stderr, "\t-c Compile DFA and save to binfile. Use -b option to specify the name of the file.\n");
52 fprintf(stderr, "\t-o, --only-matching Show only the part of a line matching PATTERN\n");
53 fprintf(stderr, "\t-l Line mode.\n");
54 fprintf(stderr, "\t-N Use NFA.\n");
55 fprintf(stderr, "\t-D Dump states.\n");
56 fprintf(stderr, "\t-S Include DFA substates.\n");
57 fprintf(stderr, "\t-q Quiet mode.\n");
58 fprintf(stderr, "\t-t Display statistics. Works only when built in DEBUG mode.\n");
59 fprintf(stderr, "\t-s string Search in string.\n");
60 fprintf(stderr, "\t-v Display version information.\n");
61 fprintf(stderr, "\t-h, --help Display this help.\n");
67 int grep_buffer_realloc(rbuffer_t *buffer, unsigned long size)
71 s = (char *)r_realloc(buffer->s, size);
81 rbuffer_t *grep_buffer_loadfile(FILE *pFile)
83 unsigned long memchunk = 256;
84 long ret = 0, inputsize = 0;
87 buf = r_buffer_create(2 * memchunk);
92 if ((buf->size - inputsize) < memchunk) {
93 if (grep_buffer_realloc(buf, buf->size + memchunk) < 0) {
94 fprintf(stderr, "Out of memory!\n");
98 ret = (long)fread(&buf->s[inputsize], 1, memchunk - 1, pFile);
99 if ((ret <= 0) && ferror(pFile)) {
100 r_buffer_destroy(buf);
104 buf->s[inputsize] = '\0';
105 buf->size = inputsize;
106 } while (!feof(pFile));
112 #define REXGREP_BINOP_NONE 0
113 #define REXGREP_BINOP_READ 1
114 #define REXGREP_BINOP_WRITE 2
117 int main(int argc, const char *argv[])
119 int ret, scanned = 0, i;
122 const char *binfile = NULL;
123 int binop = REXGREP_BINOP_NONE;
124 FILE *devnull = NULL;
126 buffers = r_array_create(sizeof(rbuffer_t *));
127 pGrep = rex_grep_create();
128 pGrep->greptype = REX_GREPTYPE_SCANLINES;
135 for (i = 1; i < argc; i++) {
136 if (strcmp(argv[i], "-t") == 0) {
141 for (i = 1; i < argc; i++) {
142 if (strcmp(argv[i], "-S") == 0) {
143 pGrep->withsubstates = 1;
147 for (i = 1; i < argc; i++) {
148 if (strcmp(argv[i], "-H") == 0 || strcmp(argv[i], "--with-filename") == 0) {
149 pGrep->showfilename = 1;
153 for (i = 1; i < argc; i++) {
154 if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "/?") == 0 || strcmp(argv[i], "-h") == 0) {
160 for (i = 1; i < argc; i++) {
161 if (strcmp(argv[i], "-v") == 0) {
162 fprintf(stderr, "REX Grep with REX Engine: %s\n", rex_db_version());
167 for (i = 1; i < argc; i++) {
168 if (strcmp(argv[i], "-b") == 0) {
171 binop = REXGREP_BINOP_READ;
176 for (i = 1; i < argc; i++) {
177 if (strcmp(argv[i], "-c") == 0) {
178 binop = REXGREP_BINOP_WRITE;
184 if (binop != REXGREP_BINOP_READ) {
185 for (i = 1; i < argc; i++) {
186 if (strcmp(argv[i], "-f") == 0) {
188 rbuffer_t *pattern = rex_buffer_map_file(argv[i]);
190 ret = rex_grep_load_pattern(pGrep, pattern);
191 r_array_add(buffers, &pattern);
200 for (i = 1; i < argc; i++) {
201 if (strcmp(argv[i], "-e") == 0) {
204 pattern.s = (char*)argv[i];
205 pattern.size = strlen(argv[i]);
206 ret = rex_grep_load_string_pattern(pGrep, &pattern);
213 for (i = 1; i < argc; i++) {
214 if (strcmp(argv[i], "-N") == 0) {
220 for (i = 1; i < argc; i++) {
221 if (strcmp(argv[i], "-l") == 0) {
222 pGrep->greptype = REX_GREPTYPE_SCANLINES;
223 } else if (strcmp(argv[i], "-o") == 0 || strcmp(argv[i], "--only-matching") == 0) {
224 pGrep->greptype = REX_GREPTYPE_MATCH;
225 } else if (strcmp(argv[i], "-q") == 0) {
226 devnull = fopen("/dev/null", "w");
231 if (!pGrep->dfa && binop == REXGREP_BINOP_READ) {
234 r_memset(&dfa, 0, sizeof(dfa));
235 pfile = fopen(binfile, "rb");
237 fprintf(stderr, "Failed to open file: %s, %s\n", binfile, strerror(errno));
240 if (fread(&dfa, sizeof(dfa), 1, pfile) != 1)
242 pGrep->dfa = rex_dfa_create(dfa.nstates, dfa.ntrans, dfa.naccsubstates, dfa.nsubstates);
243 if (fread(pGrep->dfa->states, sizeof(*dfa.states), dfa.nstates, pfile) != dfa.nstates)
245 if (fread(pGrep->dfa->trans, sizeof(*dfa.trans), dfa.ntrans, pfile) != dfa.ntrans)
247 if (fread(pGrep->dfa->accsubstates, sizeof(*dfa.accsubstates), dfa.naccsubstates, pfile) != dfa.naccsubstates)
249 if (fread(pGrep->dfa->substates, sizeof(*dfa.substates), dfa.nsubstates, pfile) != dfa.nsubstates)
254 if (!pGrep->dfa && !rex_db_isempty(pGrep->nfa) && pGrep->usedfa) {
255 rexdb_t *dfadb = rex_db_createdfa(pGrep->nfa, pGrep->startuid);
256 pGrep->dfa = rex_db_todfa(dfadb, pGrep->withsubstates);
257 rex_db_destroy(dfadb);
260 for (i = 1; i < argc; i++) {
261 if (strcmp(argv[i], "-D") == 0) {
264 for (j = 0; j < pGrep->dfa->nstates; j++) {
265 rex_dfa_dumpstate(pGrep->dfa, j);
267 } else if (pGrep->nfa) {
268 rexdb_t *db = pGrep->nfa;
269 for (j = 0; j < r_array_length(db->states); j++) {
270 rex_db_dumpstate(db, j);
277 if (pGrep->dfa && binop == REXGREP_BINOP_WRITE) {
278 rexdfa_t dfa = *pGrep->dfa;
279 FILE *pfile = fopen(binfile, "wb");
280 dfa.substates = NULL;
283 dfa.accsubstates = NULL;
285 fprintf(stderr, "Failed to create file: %s, %s\n", binfile, strerror(errno));
288 fwrite(&dfa, sizeof(dfa), 1, pfile);
289 dfa.states = pGrep->dfa->states;
290 dfa.trans = pGrep->dfa->trans;
291 dfa.accsubstates = pGrep->dfa->accsubstates;
292 dfa.substates = pGrep->dfa->substates;
293 fwrite(dfa.states, sizeof(*dfa.states), dfa.nstates, pfile);
294 fwrite(dfa.trans, sizeof(*dfa.trans), dfa.ntrans, pfile);
295 fwrite(dfa.accsubstates, sizeof(*dfa.accsubstates), dfa.naccsubstates, pfile);
296 fwrite(dfa.substates, sizeof(*dfa.substates), dfa.nsubstates, pfile);
300 if (!pGrep->dfa && pGrep->usedfa)
302 if (rex_db_isempty(pGrep->nfa) && !pGrep->usedfa)
304 for (i = 1; i < argc; i++) {
305 if (strcmp(argv[i], "-s") == 0) {
308 buf.s = (char*)argv[i];
309 buf.size = r_strlen(argv[i]);
310 rex_grep_scan_buffer(pGrep, &buf);
317 for (i = 1; i < argc; i++) {
318 if (argv[i][0] != '-') {
320 rex_grep_scan_path(pGrep, argv[i]);
321 } else if (argv[i][1] == 'e' || argv[i][1] == 'f' || argv[i][1] == 'b'){
328 rbuffer_t *buf = grep_buffer_loadfile(stdin);
330 rex_grep_scan_buffer(pGrep, buf);
331 r_buffer_destroy(buf);
336 for (i = 0; i < r_array_length(buffers); i++) {
337 r_buffer_destroy(r_array_index(buffers, i, rbuffer_t*));
339 r_object_destroy((robject_t*)buffers);
341 if (pGrep->showtime && pGrep->dfa) {
342 rexdfa_t *dfa = pGrep->dfa;
343 unsigned long sizestates = dfa->nstates * sizeof(rexdfs_t);
344 unsigned long sizetrans = dfa->ntrans * sizeof(rexdft_t);
345 unsigned long sizeaccsubs = dfa->naccsubstates * sizeof(rexdfss_t);
346 unsigned long sizesubs = dfa->nsubstates * sizeof(rexdfss_t);
347 unsigned long sizetotal = sizestates + sizetrans + sizeaccsubs + sizesubs;
348 fprintf(stdout, "\n\n");
349 fprintf(stdout, "\tDFA Memory: %ld KB, States: %ld KB (%.2f), Transitions: %ld KB (%.2f), Accecpting Substates: %ld KB(%.2f), Substates: %ld KB (%.2f)\n",
350 sizetotal/1024, sizestates/1024, (100.0*sizestates/sizetotal), sizetrans/1024, (100.0*sizetrans/sizetotal),
351 sizeaccsubs/1024, (100.0*sizeaccsubs/sizetotal), sizesubs/1024, (100.0*sizesubs/sizetotal));
353 rex_grep_destroy(pGrep);
354 if (pGrep->showtime) {
355 fprintf(stdout, "\tmemory: %ld KB (leaked %ld Bytes)\n", (long)r_debug_get_maxmem()/1024, (long)r_debug_get_allocmem());
365 rex_grep_destroy(pGrep);