RPA Toolkit
work on rexcc
[rpatk.git] / rexcc / unix / main.c
1 /*
2  *  Regular Pattern Analyzer (RPA)
3  *  Copyright (c) 2009-2010 Martin Stoilov
4  *
5  *  This program is free software: you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation, either version 3 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program.  If not, see <http://www.gnu.org/licenses/>.
17  *
18  *  Martin Stoilov <martin@rpasearch.com>
19  */
20
21 #include <stdio.h>
22 #include <sys/types.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/mman.h>
28 #include <string.h>
29 #include <stdlib.h>
30 #include <wchar.h>
31 #include <time.h>
32 #include <errno.h>
33 #include "rlib/rmem.h"
34 #include "rlib/rarray.h"
35 #include "rex/rexdfaconv.h"
36 #include "rex/rexdfa.h"
37 #include "rexcc.h"
38
39
40 void rex_buffer_unmap_file(rbuffer_t *buf)
41 {
42         if (buf) {
43                 munmap(buf->s, buf->size);
44                 r_free(buf);
45         }
46 }
47
48
49 rbuffer_t * rex_buffer_map_file(const char *filename)
50 {
51         struct stat st;
52         rbuffer_t *str;
53         char *buffer;
54
55         int fd = open(filename, O_RDONLY);
56         if (fd < 0) {
57                 return (void*)0;
58         }
59         if (fstat(fd, &st) < 0) {
60                 close(fd);
61                 return (void*)0;
62         }
63         buffer = (char*)mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
64         if (buffer == (void*)-1) {
65                 close(fd);
66                 return (void*)0;
67         }
68         str = (rbuffer_t *)r_malloc(sizeof(rbuffer_t));
69         if (!str)
70                 goto error;
71         memset(str, 0, sizeof(*str));
72         str->s = buffer;
73         str->size = st.st_size;
74         str->userdata = (void*)((unsigned long)fd);
75         str->alt_destroy = rex_buffer_unmap_file;
76         close(fd);
77         return str;
78
79 error:
80         munmap(buffer, st.st_size);
81         close(fd);
82         return str;
83 }
84
85
86 int usage(int argc, const char *argv[])
87 {
88                 fprintf(stderr, "REX Code Compiler - using library version: %s \n", rex_db_version());
89                 fprintf(stderr, "Copyright (C) 2012 Martin Stoilov\n\n");
90
91                 fprintf(stderr, "Usage: \n %s [OPTIONS] <filename>\n", argv[0]);
92                 fprintf(stderr, " OPTIONS:\n");
93                 fprintf(stderr, "\t-e <expression>          Regular Expression.\n");
94                 fprintf(stderr, "\t-f <patternfile>         Read Regular Expressions from a file.\n");
95                 fprintf(stderr, "\t-o <cfile>               Output .c file.\n");
96                 fprintf(stderr, "\t-D                       Dump states.\n");
97                 fprintf(stderr, "\t-S                       Include substates.\n");
98                 fprintf(stderr, "\t-t                       Display statistics.\n");
99                 fprintf(stderr, "\t-v                       Display version information.\n");
100                 fprintf(stderr, "\t-h, --help               Display this help.\n");
101                 
102                 return 0;
103 }
104
105
106 int rexcc_buffer_realloc(rbuffer_t *buffer, unsigned long size)
107 {
108         char *s;
109
110         s = (char *)r_realloc(buffer->s, size);
111         if (!s)
112                 return -1;
113         buffer->s = s;
114         buffer->size = size;
115         return 0;
116
117 }
118
119
120 rbuffer_t *rexcc_buffer_loadfile(FILE *pFile)
121 {
122         unsigned long memchunk = 256;
123         long ret = 0, inputsize = 0;
124         rbuffer_t *buf;
125
126         buf = r_buffer_create(2 * memchunk);
127         if (!buf)
128                 return (void*)0;
129
130         do {
131                 if ((buf->size - inputsize) < memchunk) {
132                         if (rexcc_buffer_realloc(buf, buf->size + memchunk) < 0) {
133                                 fprintf(stderr, "Out of memory!\n");
134                                 exit(1);
135                         }
136                 }
137                 ret = (long)fread(&buf->s[inputsize], 1, memchunk - 1, pFile);
138                 if ((ret <= 0) && ferror(pFile)) {
139                         r_buffer_destroy(buf);
140                         return (void*)0;
141                 }
142                 inputsize += ret;
143                 buf->s[inputsize] = '\0';
144                 buf->size = inputsize;
145         } while (!feof(pFile));
146
147         return buf;
148 }
149
150
151 int main(int argc, const char *argv[])
152 {
153         int i, ret = 0;
154         rexcc_t *pCC;
155         rarray_t *buffers;
156         int withsubstates = 0;
157         FILE *devnull = NULL;
158         rexdb_t *tempdb = NULL;
159         FILE *cfile = stdout;
160         FILE *hfile = NULL;
161
162         buffers = r_array_create(sizeof(rbuffer_t *));
163         pCC = rex_cc_create();
164         if (argc <= 1) {
165                 usage(argc, argv);
166                 goto end;
167         }
168
169         for (i = 1; i < argc; i++) {
170                 if (strcmp(argv[i], "-t") == 0) {
171                         pCC->showtime = 1;
172                 }
173         }
174
175         for (i = 1; i < argc; i++) {
176                 if (strcmp(argv[i], "--help") == 0 || strcmp(argv[i], "-help") == 0 || strcmp(argv[i], "/?") == 0 || strcmp(argv[i], "-h") == 0) {
177                         usage(argc, argv);
178                         goto end;
179                 }
180         }
181
182         for (i = 1; i < argc; i++) {
183                 if (strcmp(argv[i], "-v") == 0) {
184                         fprintf(stderr, "REX Grep with REX Engine: %s\n", rex_db_version());
185                         goto end;
186                 }
187         }
188
189         for (i = 1; i < argc; i++) {
190                 if (strcmp(argv[i], "-o") == 0) {
191                         if (++i < argc) {
192                                 cfile = fopen(argv[i], "wb");
193                                 if (!cfile) {
194                                         fprintf(stderr, "Failed to create file: %s, %s\n", argv[i], strerror(errno));
195                                         goto error;
196                                 }
197
198                         }
199                 }
200         }
201
202         for (i = 1; i < argc; i++) {
203                 if (strcmp(argv[i], "-e") == 0) {
204                         if (++i < argc) {
205                                 rbuffer_t pattern;
206                                 rexuserdata_t userdata = 0;
207                                 pattern.s = (char*)argv[i];
208                                 pattern.size = strlen(argv[i]);
209                                 if ((i + 1) < argc && argv[i + 1][0] != '-') {
210                                         ++i;
211                                         userdata = strtoul(argv[i], NULL, 10);
212                                 }
213                                 ret = rex_cc_load_string_pattern(pCC, &pattern, userdata);
214                                 if (ret < 0)
215                                         goto error;
216                         }
217                         
218                 }
219         }
220
221         for (i = 1; i < argc; i++) {
222                 if (strcmp(argv[i], "-S") == 0) {
223                         withsubstates = 1;
224                 }
225         }
226
227         if (pCC->startuid >= 0) {
228                 tempdb = rex_db_createdfa(pCC->nfa, pCC->startuid);
229                 pCC->dfa = rex_db_todfa(tempdb, withsubstates);
230                 rex_db_destroy(tempdb);
231         }
232         for (i = 1; i < argc; i++) {
233                 if (strcmp(argv[i], "-D") == 0) {
234                         int j;
235                         if (pCC->dfa) {
236                                 for (j = 0; j < pCC->dfa->nstates; j++) {
237                                         rex_dfa_dumpstate(pCC->dfa, j);
238                                 }
239                         } else if (pCC->nfa) {
240                                 rexdb_t *db = pCC->nfa;
241                                 for (j = 0; j < r_array_length(db->states); j++) {
242                                         rex_db_dumpstate(db, j);
243                                 }
244                         }
245                         goto end;
246                 }
247         }
248
249         for (i = 1; i < argc; i++) {
250                 if (strcmp(argv[i], "-f") == 0) {
251                         if (++i < argc) {
252                                 rbuffer_t *text = rex_buffer_map_file(argv[i]);
253                                 if (text) {
254                                         if (rex_cc_load_buffer(pCC, text) < 0) {
255                                                 /*
256                                                  * Error
257                                                  */
258                                         }
259                                         r_buffer_destroy(text);
260                                         goto end;
261                                 }
262                         }
263                 }
264         }
265
266         rex_cc_output(pCC, cfile);
267
268 end:
269         rex_cc_destroy(pCC);
270         if (pCC->showtime) {
271                 fprintf(stdout, "memory: %ld KB (leaked %ld Bytes)\n", (long)r_debug_get_maxmem()/1024, (long)r_debug_get_allocmem());
272         }
273
274         if (devnull)
275                 fclose(devnull);
276         if (cfile)
277                 fclose(cfile);
278         if (hfile)
279                 fclose(hfile);
280         return ret;
281
282 error:
283         if (devnull)
284                 fclose(devnull);
285         if (cfile)
286                 fclose(cfile);
287         if (hfile)
288                 fclose(hfile);
289         rex_cc_destroy(pCC);
290         return 2;
291 }