RPA Toolkit
Work on rexcc. Better support for rexdfa_t generation(without including NFA substates).
[rpatk.git] / rexcc / unix / main.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include <stdio.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <wchar.h>
31 #include <time.h>
32 #include <errno.h>
33 #include "rlib/rmem.h"
34 #include "rlib/rarray.h"
35 #include "rex/rexdfaconv.h"
36 #include "rex/rexdfa.h"
37 #include "rexcc.h"
38
39
40 void rex_buffer_unmap_file(rbuffer_t *buf)
41 {
42         if (buf) {
43                 munmap(buf->s, buf->size);
44                 r_free(buf);
45         }
46 }
47
48
49 rbuffer_t * rex_buffer_map_file(const char *filename)
50 {
51         struct stat st;
52         rbuffer_t *str;
53         char *buffer;
54
55         int fd = open(filename, O_RDONLY);
56         if (fd < 0) {
57                 return (void*)0;
58         }
59         if (fstat(fd, &st) < 0) {
60                 close(fd);
61                 return (void*)0;
62         }
63         buffer = (char*)mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
64         if (buffer == (void*)-1) {
65                 close(fd);
66                 return (void*)0;
67         }
68         str = (rbuffer_t *)r_malloc(sizeof(rbuffer_t));
69         if (!str)
70                 goto error;
71         memset(str, 0, sizeof(*str));
72         str->s = buffer;
73         str->size = st.st_size;
74         str->userdata = (void*)((unsigned long)fd);
75         str->alt_destroy = rex_buffer_unmap_file;
76         close(fd);
77         return str;
78
79 error:
80         munmap(buffer, st.st_size);
81         close(fd);
82         return str;
83 }
84
85
86 int usage(int argc, const char *argv[])
87 {
88                 fprintf(stderr, "REX Code Compiler - using library version: %s \n", rex_db_version());
89                 fprintf(stderr, "Copyright (C) 2012 Martin Stoilov\n\n");
90
91                 fprintf(stderr, "Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
92                 fprintf(stderr, " OPTIONS:\n");
93                 fprintf(stderr, "\t-e <expression>          Regular Expression.\n");
94                 fprintf(stderr, "\t-f <patternfile>         Read Regular Expressions from a file.\n");
95                 fprintf(stderr, "\t-c <cfile>               Output .c file.\n");
96                 fprintf(stderr, "\t-h <hfile>               Output .h file.\n");
97                 fprintf(stderr, "\t-D                       Dump states.\n");
98                 fprintf(stderr, "\t-S                       Include substates.\n");
99                 fprintf(stderr, "\t-t                       Display statistics.\n");
100                 fprintf(stderr, "\t-v                       Display version information.\n");
101                 fprintf(stderr, "\t--help                   Display this help.\n");
102                 
103                 return 0;
104 }
105
106
107 int rexcc_buffer_realloc(rbuffer_t *buffer, unsigned long size)
108 {
109         char *s;
110
111         s = (char *)r_realloc(buffer->s, size);
112         if (!s)
113                 return -1;
114         buffer->s = s;
115         buffer->size = size;
116         return 0;
117
118 }
119
120
121 rbuffer_t *rexcc_buffer_loadfile(FILE *pFile)
122 {
123         unsigned long memchunk = 256;
124         long ret = 0, inputsize = 0;
125         rbuffer_t *buf;
126
127         buf = r_buffer_create(2 * memchunk);
128         if (!buf)
129                 return (void*)0;
130
131         do {
132                 if ((buf->size - inputsize) < memchunk) {
133                         if (rexcc_buffer_realloc(buf, buf->size + memchunk) < 0) {
134                                 fprintf(stderr, "Out of memory!\n");
135                                 exit(1);
136                         }
137                 }
138                 ret = (long)fread(&buf->s[inputsize], 1, memchunk - 1, pFile);
139                 if ((ret <= 0) && ferror(pFile)) {
140                         r_buffer_destroy(buf);
141                         return (void*)0;
142                 }
143                 inputsize += ret;
144                 buf->s[inputsize] = '\0';
145                 buf->size = inputsize;
146         } while (!feof(pFile));
147
148         return buf;
149 }
150
151
152 int main(int argc, const char *argv[])
153 {
154         int i, ret = 0;
155         rexcc_t *pCC;
156         rarray_t *buffers;
157         int withsubstates = 0;
158         FILE *devnull = NULL;
159         rexdb_t *tempdb = NULL;
160         FILE *cfile = NULL;
161         FILE *hfile = NULL;
162
163         buffers = r_array_create(sizeof(rbuffer_t *));
164         pCC = rex_cc_create();
165         if (argc <= 1) {
166                 usage(argc, argv);
167                 goto end;
168         }
169
170         for (i = 1; i < argc; i++) {
171                 if (strcmp(argv[i], "-t") == 0) {
172                         pCC->showtime = 1;
173                 }
174         }
175
176         for (i = 1; i < argc; i++) {
177                 if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0) {
178                         usage(argc, argv);
179                         goto end;
180                 }
181         }
182
183         for (i = 1; i < argc; i++) {
184                 if (strcmp(argv[i], "-v") == 0) {
185                         fprintf(stderr, "REX Grep with REX Engine: %s\n", rex_db_version());
186                         goto end;
187                 }
188         }
189
190         for (i = 1; i < argc; i++) {
191                 if (strcmp(argv[i], "-c") == 0) {
192                         if (++i < argc) {
193                                 cfile = fopen(argv[i], "wb");
194                                 if (!cfile) {
195                                         fprintf(stderr, "Failed to create file: %s, %s\n", argv[i], strerror(errno));
196                                         goto error;
197                                 }
198
199                         }
200                 }
201         }
202
203         for (i = 1; i < argc; i++) {
204                 if (strcmp(argv[i], "-h") == 0) {
205                         if (++i < argc) {
206                                 hfile = fopen(argv[i], "wb");
207                                 if (!hfile) {
208                                         fprintf(stderr, "Failed to create file: %s, %s\n", argv[i], strerror(errno));
209                                         goto error;
210                                 }
211
212                         }
213                 }
214         }
215
216
217         for (i = 1; i < argc; i++) {
218                 if (strcmp(argv[i], "-f") == 0) {
219                         if (++i < argc) {
220 #if 0
221                                 rbuffer_t *text = rex_buffer_map_file(argv[i]);
222 #endif
223                         }
224                 }
225         }
226
227         for (i = 1; i < argc; i++) {
228                 if (strcmp(argv[i], "-e") == 0) {
229                         if (++i < argc) {
230                                 rbuffer_t pattern;
231                                 rexuserdata_t userdata = 0;
232                                 pattern.s = (char*)argv[i];
233                                 pattern.size = strlen(argv[i]);
234                                 if ((i + 1) < argc && argv[i + 1][0] != '-') {
235                                         ++i;
236                                         userdata = strtoul(argv[i], NULL, 10);
237                                 }
238                                 ret = rex_cc_load_string_pattern(pCC, &pattern, userdata);
239                                 if (ret < 0)
240                                         goto error;
241                         }
242                         
243                 }
244         }
245
246         for (i = 1; i < argc; i++) {
247                 if (strcmp(argv[i], "-S") == 0) {
248                         withsubstates = 1;
249                 }
250         }
251
252         if (pCC->startuid < 0)
253                 goto error;
254         tempdb = rex_db_createdfa(pCC->nfa, pCC->startuid);
255         pCC->dfa = rex_db_todfa(tempdb, withsubstates);
256         rex_db_destroy(tempdb);
257
258         for (i = 1; i < argc; i++) {
259                 if (strcmp(argv[i], "-D") == 0) {
260                         int j;
261                         if (pCC->dfa) {
262                                 for (j = 0; j < pCC->dfa->nstates; j++) {
263                                         rex_dfa_dumpstate(pCC->dfa, j);
264                                 }
265                         } else if (pCC->nfa) {
266                                 rexdb_t *db = pCC->nfa;
267                                 for (j = 0; j < r_array_length(db->states); j++) {
268                                         rex_db_dumpstate(db, j);
269                                 }
270                         }
271                         goto end;
272                 }
273         }
274
275         rex_cc_output(pCC, cfile, hfile);
276
277 end:
278         rex_cc_destroy(pCC);
279         if (pCC->showtime) {
280                 fprintf(stdout, "memory: %ld KB (leaked %ld Bytes)\n", (long)r_debug_get_maxmem()/1024, (long)r_debug_get_allocmem());
281         }
282
283         if (devnull)
284                 fclose(devnull);
285         if (cfile)
286                 fclose(cfile);
287         if (hfile)
288                 fclose(hfile);
289         return ret;
290
291 error:
292         if (devnull)
293                 fclose(devnull);
294         if (cfile)
295                 fclose(cfile);
296         if (hfile)
297                 fclose(hfile);
298         rex_cc_destroy(pCC);
299         return 2;
300 }