if (!hStat)
return -1;
rpa_stat_cachedisable(hStat, pGrep->disablecache);
- rpa_stat_setencoding(hStat, pGrep->encoding);
hStat->debug = pGrep->execdebug;
- ret = rpa_stat_match(hStat, pGrep->hPattern, input, start, end);
+ ret = rpa_stat_match(hStat, pGrep->hPattern, pGrep->encoding, input, start, end);
if (ret > 0) {
rpa_grep_print_filename(pGrep);
rpa_grep_output(pGrep, input, ret, pGrep->encoding);
if (!hStat)
return -1;
rpa_stat_cachedisable(hStat, pGrep->disablecache);
- rpa_stat_setencoding(hStat, pGrep->encoding);
hStat->debug = pGrep->execdebug;
- ret = rpa_stat_parse(hStat, pGrep->hPattern, input, start, end, records);
+ ret = rpa_stat_parse(hStat, pGrep->hPattern, pGrep->encoding, input, start, end, records);
if (ret < 0) {
rpa_errinfo_t err;
rpa_stat_lasterrorinfo(hStat, &err);
hStat = rpa_stat_create(pGrep->hDbex, 0);
if (!hStat)
return -1;
- rpa_stat_setencoding(hStat, pGrep->encoding);
rpa_stat_cachedisable(hStat, pGrep->disablecache);
hStat->debug = pGrep->execdebug;
pGrep->cachehit = hStat->cache->hit;
again:
- ret = rpa_stat_scan(hStat, pGrep->hPattern, input, start, end, &matched);
+ ret = rpa_stat_scan(hStat, pGrep->hPattern, pGrep->encoding, input, start, end, &matched);
pGrep->cachehit += hStat->cache->hit;
if (ret > 0) {
hStat = rpa_stat_create(pGrep->hDbex, 0);
if (!hStat)
return -1;
- rpa_stat_setencoding(hStat, pGrep->encoding);
hStat->debug = pGrep->execdebug;
again:
}
if (!lines)
return 0;
- ret = rpa_stat_scan(hStat, pGrep->hPattern, lstart, lstart, lend, &matched);
+ ret = rpa_stat_scan(hStat, pGrep->hPattern, pGrep->encoding, lstart, lstart, lend, &matched);
if (ret > 0) {
if (!displayed) {
displayed = 1;
{
rlong res = 0;
rpastat_t *stat = rpa_stat_create(parser->dbex, 4096);
- res = rpa_stat_parse(stat, rpa_dbex_last(parser->dbex), script, script, script + size, ast);
+ res = rpa_stat_parse(stat, rpa_dbex_last(parser->dbex), RPA_ENCODING_UTF8, script, script, script + size, ast);
if (res < 0 && error) {
rpa_errinfo_t rpaerror;
rpa_stat_lasterrorinfo(stat, &rpaerror);
rpastat_t *stat = pa->stat;
rpa_stat_cachedisable(stat, 0);
- if (rpa_stat_exec(stat, rvm_codegen_getcode(co->cg, 0), pa->main, prods, prods, prods + size, records) < 0)
+ if (rpa_stat_exec(stat, rvm_codegen_getcode(co->cg, 0), pa->main, RPA_ENCODING_UTF8, prods, prods, prods + size, records) < 0)
return -1;
ret = (rlong)RVM_CPUREG_GETL(stat->cpu, R0);
if (ret < 0)
}
-rint rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
+rint rpa_stat_init(rpastat_t *stat, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
{
rulong size;
}
r_memset(&stat->err, 0, sizeof(stat->err));
size = end - start;
+ stat->encoding = encoding;
stat->start = start;
stat->end = end;
stat->input = input;
}
-rint rpa_stat_setencoding(rpastat_t *stat, ruint encoding)
-{
- if (!stat) {
- return -1;
- }
-
- stat->encoding = encoding;
- return 0;
-}
-
-
-rlong rpa_stat_exec(rpastat_t *stat, rvm_asmins_t *prog, rword off, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
+rlong rpa_stat_exec(rpastat_t *stat, rvm_asmins_t *prog, rword off, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
{
rlong ret;
return -1;
}
rpa_stat_cacheinvalidate(stat);
- rpa_stat_init(stat, input, start, end, records);
+ rpa_stat_init(stat, encoding, input, start, end, records);
+
if (stat->debug) {
ret = rvm_cpu_exec_debug(stat->cpu, prog, off);
} else {
}
-static rlong rpa_stat_exec_rid(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
+static rlong rpa_stat_exec_rid(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
{
rlong topsiz = 0;
rpainput_t *ptp;
- if ((topsiz = rpa_stat_exec(stat, rpa_dbex_executable(stat->dbex), rpa_dbex_executableoffset(stat->dbex, rid), input, start, end, records)) < 0) {
+ if ((topsiz = rpa_stat_exec(stat, rpa_dbex_executable(stat->dbex), rpa_dbex_executableoffset(stat->dbex, rid), encoding, input, start, end, records)) < 0) {
return -1;
}
if (topsiz <= 0)
}
-rlong rpa_stat_scan(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end, const rchar **where)
+rlong rpa_stat_scan(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end, const rchar **where)
{
rlong ret;
while (input < end) {
- ret = rpa_stat_exec_rid(stat, rid, input, start, end, NULL);
+ ret = rpa_stat_exec_rid(stat, rid, encoding, input, start, end, NULL);
if (ret < 0)
return -1;
if (ret > 0) {
}
-rlong rpa_stat_match(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end)
+rlong rpa_stat_match(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end)
{
- return rpa_stat_exec_rid(stat, rid, input, start, end, NULL);
+ return rpa_stat_exec_rid(stat, rid, encoding, input, start, end, NULL);
}
-rlong rpa_stat_parse(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
+rlong rpa_stat_parse(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records)
{
- return rpa_stat_exec_rid(stat, rid, input, start, end, records);
+ return rpa_stat_exec_rid(stat, rid, encoding, input, start, end, records);
}
#define RPA_ENCODING_UTF8 0
#define RPA_ENCODING_BYTE 1
#define RPA_ENCODING_UTF16LE 2
-#define RPA_ENCODING_MASK ((1 << 8) - 1)
#define RPA_ENCODING_ICASE (1 << 8)
#define RPA_ENCODING_ICASE_BYTE (RPA_ENCODING_BYTE | RPA_ENCODING_ICASE)
#define RPA_ENCODING_ICASE_UTF8 (RPA_ENCODING_UTF8 | RPA_ENCODING_ICASE)
#define RPA_ENCODING_ICASE_UTF16LE (RPA_ENCODING_UTF16LE | RPA_ENCODING_ICASE)
-
+#define RPA_ENCODING_MASK ((1 << 8) - 1)
#define RPA_DEFAULT_STACKSIZE (256 * 1024)
#ifdef __cplusplus
* Scan the stream using the specified rule id.
* \param stat Pointer to object of type \ref rpastat_t
* \param rid Rule ID of the BNF root.
- * \param input The starting point of the operation. <b>Important:</b> input >= start && input < end
+ * \param encoding Input stream encoding. This parameter specifies how to interpret the data in the input stream. If you want the parser to
+ * ignore the case of the parsed data use encodings with _ICASE_. Supported encodings are:
+ * - RPA_ENCODING_UTF8
+ * - RPA_ENCODING_UTF16LE
+ * - RPA_ENCODING_BYTE
+ * - RPA_ENCODING_ICASE_UTF8
+ * - RPA_ENCODING_ICASE_UTF16LE
+ * - RPA_ENCODING_ICASE_BYTE
+ * \param input The starting point of the operation. The input pointer must be: input >= start and input < end
* \param start The start of the buffer.
- * \param end The end of the buffer, end will never be dereferenced.
+ * \param end The end of the buffer, it should be: end = start + buffersize.
* \param where If this function returns a number greater than 0 (a match was found) this parameter will be
* initialized with a pointer the place in the buffer where the match was found.
* \return If successful return the size of the matched string in bytes, if no match was found
* return 0, return negative in case of error.
*/
-rlong rpa_stat_scan(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end, const rchar **where);
-rlong rpa_stat_parse(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end, rarray_t *records);
-rlong rpa_stat_match(rpastat_t *stat, rparule_t rid, const rchar *input, const rchar *start, const rchar *end);
-rlong rpa_stat_exec(rpastat_t *stat, rvm_asmins_t *prog, rword off, const rchar *input, const rchar *start, const rchar *end, rarray_t *records);
+rlong rpa_stat_scan(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end, const rchar **where);
+
+/**
+ * \brief Parse an input stream
+ *
+ * Parse the stream using the specified rule id.
+ * \param stat Pointer to object of type \ref rpastat_t
+ * \param rid Rule ID of the BNF root.
+ * \param encoding Input stream encoding. This parameter specifies how to interpret the data in the input stream. If you want the parser to
+ * ignore the case of the parsed data use encodings with _ICASE_. Supported encodings are:
+ * - RPA_ENCODING_UTF8
+ * - RPA_ENCODING_UTF16LE
+ * - RPA_ENCODING_BYTE
+ * - RPA_ENCODING_ICASE_UTF8
+ * - RPA_ENCODING_ICASE_UTF16LE
+ * - RPA_ENCODING_ICASE_BYTE
+ * \param input The starting point of the operation. The input pointer must be: input >= start and input < end
+ * \param start The start of the buffer.
+ * \param end The end of the buffer, it should be: end = start + buffersize.
+ * \param records If the function is successful this parameter will be used to
+ * store the AST records the parser generates. The records stored in the array
+ * are of type \ref rparecord_t
+ * \return If successful return the size of the matched string in bytes, if the input stream cannot be matched against the BNF
+ * return 0, return negative in case of error.
+ */
+rlong rpa_stat_parse(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records);
+
+/**
+ * \brief Match an input stream
+ *
+ * Match the stream using the specified rule id. This function is similar to /ref rpa_stat_parse, but it
+ * doesn't generate parsing records. It just returs the size of the matched input stream.
+ * \param stat Pointer to object of type \ref rpastat_t
+ * \param rid Rule ID of the BNF root.
+ * \param encoding Input stream encoding. This parameter specifies how to interpret the data in the input stream. If you want the parser to
+ * ignore the case of the parsed data use encodings with _ICASE_. Supported encodings are:
+ * - RPA_ENCODING_UTF8
+ * - RPA_ENCODING_UTF16LE
+ * - RPA_ENCODING_BYTE
+ * - RPA_ENCODING_ICASE_UTF8
+ * - RPA_ENCODING_ICASE_UTF16LE
+ * - RPA_ENCODING_ICASE_BYTE
+ * \param input The starting point of the operation. The input pointer must be: input >= start and input < end
+ * \param start The start of the buffer.
+ * \param end The end of the buffer, it should be: end = start + buffersize.
+ * \return If successful return the size of the matched string in bytes, if the input stream cannot be matched against the BNF
+ * return 0, return negative in case of error.
+ */
+rlong rpa_stat_match(rpastat_t *stat, rparule_t rid, ruint encoding, const rchar *input, const rchar *start, const rchar *end);
+
+
+/**
+ * \brief Execute the parser byte code to parse/match an input stream.
+ *
+ * This is a low level function used by \ref rpa_stat_parse \ref rpa_stat_match and \ref rpa_stat_scan. You shouldn't
+ * need to use it directly.
+ * \param stat Pointer to object of type \ref rpastat_t
+ * \param prog Byte code
+ * \param off Execution start point
+ * \param encoding Input stream encoding. This parameter specifies how to interpret the data in the input stream. If you want the parser to
+ * ignore the case of the parsed data use encodings with _ICASE_. Supported encodings are:
+ * - RPA_ENCODING_UTF8
+ * - RPA_ENCODING_UTF16LE
+ * - RPA_ENCODING_BYTE
+ * - RPA_ENCODING_ICASE_UTF8
+ * - RPA_ENCODING_ICASE_UTF16LE
+ * - RPA_ENCODING_ICASE_BYTE
+ * \param input The starting point of the operation. The input pointer must be: input >= start and input < end
+ * \param start The start of the buffer.
+ * \param end The end of the buffer, it should be: end = start + buffersize.
+ * \param records If the function is successful this parameter will be used to
+ * store the AST records the parser generates. The records stored in the array
+ * are of type \ref rparecord_t
+ * \return If successful return the size of the matched string in bytes, if the input stream cannot be matched against the BNF
+ * return 0, return negative in case of error.
+ */
+rlong rpa_stat_exec(rpastat_t *stat, rvm_asmins_t *prog, rword off, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records);
+
+
+/**
+ * \brief Abort the current operation.
+ *
+ * Use this function to abort \ref rpa_stat_parse \ref rpa_stat_match and \ref rpa_stat_scan or \ref rpa_stat_exec
+ * \param stat Pointer to object of type \ref rpastat_t
+ * \return If sucessful return 0, otherwise return negative.
+ */
rint rpa_stat_abort(rpastat_t *stat);
-rint rpa_stat_setencoding(rpastat_t *stat, ruint encoding);
/**
* \brief Return the error code of the last occurred error.
*
- * \param dbex Pointer to \ref rpadbex_t object.
+ * \param stat Pointer to \ref rpastat_t object.
* \return The error code of the last occurred error. If this function fails the
* return value is negative.
*/
/**
* \brief Get error information for the last occurred error.
*
- * \param dbex Pointer to \ref rpadbex_t object.
+ * \param stat Pointer to \ref rpastat_t object.
* \param errinfo Pointer to \ref rpa_errinfo_t buffer that will be
* filled with the error information. This parameter cannot be NULL.
* \return Return 0 if the function is successful or negative otherwise. If this function fails the
rvmcpu_t *cpu;
};
-rint rpa_stat_init(rpastat_t *stat, const rchar *input, const rchar *start, const rchar *end, rarray_t *records);
+rint rpa_stat_init(rpastat_t *stat, ruint encoding, const rchar *input, const rchar *start, const rchar *end, rarray_t *records);
void rpa_stat_cachedisable(rpastat_t *stat, ruint disable);
void rpa_stat_cacheinvalidate(rpastat_t *stat);
rint rpa_stat_matchchr(rpastat_t *stat, rssize_t top, rulong wc);
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+30, records);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+30, records);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+50);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+50);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_LOO, DA, XX, 5));
}
for (i = 0; i < 1000; i++) {
- rpa_stat_init(stat, teststr, teststr, teststr+170);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+170);
rpa_stat_cachedisable(stat, 0);
rvm_cpu_exec(stat->cpu, rvm_codegen_getcode(co->cg, 0), mainoff);
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, records);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, records);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+3, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+3, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+3, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+3, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));
stat = rpa_stat_create(NULL, 4096);
rvm_cpu_addswitable(stat->cpu, "common_table", common_calltable);
- rpa_stat_init(stat, teststr, teststr, teststr+12, NULL);
+ rpa_stat_init(stat, RPA_ENCODING_UTF8, teststr, teststr, teststr+12, NULL);
mainoff = rvm_codegen_addins(co->cg, rvm_asml(RVM_NOP, XX, XX, XX, -1));
rvm_codegen_addins(co->cg, rvm_asml(RVM_MOV, R_TOP, DA, XX, -1));