2012-10-10 15:58:03 +02:00
|
|
|
#include <string.h>
|
2013-02-02 19:31:18 -05:00
|
|
|
#include "../internal.h"
|
2012-05-26 16:00:43 +02:00
|
|
|
#include "parser_internal.h"
|
|
|
|
|
|
|
|
|
|
static HParseResult* parse_charset(void *env, HParseState *state) {
|
|
|
|
|
uint8_t in = h_read_bits(&state->input_stream, 8, false);
|
|
|
|
|
HCharset cs = (HCharset)env;
|
|
|
|
|
|
|
|
|
|
if (charset_isset(cs, in)) {
|
|
|
|
|
HParsedToken *tok = a_new(HParsedToken, 1);
|
|
|
|
|
tok->token_type = TT_UINT; tok->uint = in;
|
2013-05-11 19:04:59 +02:00
|
|
|
return make_result(state->arena, tok);
|
2012-05-26 16:00:43 +02:00
|
|
|
} else
|
|
|
|
|
return NULL;
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-02 19:31:18 -05:00
|
|
|
static HCFChoice* desugar_charset(HAllocator *mm__, void *env) {
|
|
|
|
|
HCFChoice *ret = h_new(HCFChoice, 1);
|
|
|
|
|
ret->type = HCF_CHARSET;
|
|
|
|
|
ret->charset = (HCharset)env;
|
|
|
|
|
ret->action = NULL;
|
|
|
|
|
return ret;
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-22 18:06:17 -07:00
|
|
|
// FUTURE: this is horribly inefficient
|
|
|
|
|
static bool cs_ctrvm(HRVMProg *prog, void *env) {
|
|
|
|
|
HCharset cs = (HCharset)env;
|
|
|
|
|
uint16_t start = h_rvm_get_ip(prog);
|
|
|
|
|
for (size_t i=0; i<256; ++i) {
|
|
|
|
|
if (charset_isset(cs, i)) {
|
|
|
|
|
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_MATCH, i & i << 8);
|
|
|
|
|
h_rvm_insert_insn(prog, RVM_GOTO, 0);
|
|
|
|
|
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0);
|
|
|
|
|
for (size_t i=start; i<jump; ++i) {
|
|
|
|
|
if (RVM_GOTO == prog->insns[i].op)
|
|
|
|
|
h_rvm_patch_arg(prog, i, jump);
|
|
|
|
|
}
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
2012-05-26 16:00:43 +02:00
|
|
|
static const HParserVtable charset_vt = {
|
|
|
|
|
.parse = parse_charset,
|
2012-12-18 18:10:40 -05:00
|
|
|
.isValidRegular = h_true,
|
|
|
|
|
.isValidCF = h_true,
|
2013-02-02 19:31:18 -05:00
|
|
|
.desugar = desugar_charset,
|
2013-04-22 18:06:17 -07:00
|
|
|
.compile_to_rvm = cs_ctrvm,
|
2012-05-26 16:00:43 +02:00
|
|
|
};
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_ch_range(const uint8_t lower, const uint8_t upper) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_ch_range__m(&system_allocator, lower, upper);
|
|
|
|
|
}
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HCharset cs = new_charset(mm__);
|
2012-05-26 16:00:43 +02:00
|
|
|
for (int i = 0; i < 256; i++)
|
|
|
|
|
charset_set(cs, i, (lower <= i) && (i <= upper));
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &charset_vt, cs);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
static HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) {
|
2012-10-10 15:58:03 +02:00
|
|
|
HCharset cs = new_charset(mm__);
|
2012-05-28 14:35:28 +02:00
|
|
|
for (size_t i = 0; i < 256; i++)
|
2012-05-29 00:01:30 +02:00
|
|
|
charset_set(cs, i, 1-val);
|
2012-05-28 14:35:28 +02:00
|
|
|
for (size_t i = 0; i < count; i++)
|
2012-05-29 00:01:30 +02:00
|
|
|
charset_set(cs, options[i], val);
|
2012-05-26 16:00:43 +02:00
|
|
|
|
2013-04-27 04:17:47 +02:00
|
|
|
return h_new_parser(mm__, &charset_vt, cs);
|
2012-05-26 16:00:43 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_in(const uint8_t *options, size_t count) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_in_or_not__m(&system_allocator, options, count, 1);
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_in_or_not__m(mm__, options, count, 1);
|
2012-05-29 00:01:30 +02:00
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_not_in(const uint8_t *options, size_t count) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_in_or_not__m(&system_allocator, options, count, 0);
|
|
|
|
|
}
|
|
|
|
|
|
2013-04-26 20:36:54 -07:00
|
|
|
HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) {
|
2012-10-10 15:58:03 +02:00
|
|
|
return h_in_or_not__m(mm__, options, count, 0);
|
2012-05-29 00:01:30 +02:00
|
|
|
}
|
|
|
|
|
|