Regex all works now! (Merge branch 'master' of https://github.com/thequux/hammer)

This commit is contained in:
Meredith L. Patterson 2013-05-24 06:09:21 -07:00
commit cb6e7f4229
27 changed files with 245 additions and 150 deletions

View file

@ -15,21 +15,21 @@ const HParser* document = NULL;
void init_parser(void) void init_parser(void)
{ {
// CORE // CORE
const HParser *digit = h_ch_range(0x30, 0x39); HParser *digit = h_ch_range(0x30, 0x39);
const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL);
// AUX. // AUX.
const HParser *plus = h_ch('+'); HParser *plus = h_ch('+');
const HParser *slash = h_ch('/'); HParser *slash = h_ch('/');
const HParser *equals = h_ch('='); HParser *equals = h_ch('=');
const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL);
const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16);
const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4);
const HParser *base64_3 = h_repeat_n(bsfdig, 4); HParser *base64_3 = h_repeat_n(bsfdig, 4);
const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL);
const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL);
const HParser *base64 = h_sequence(h_many(base64_3), HParser *base64 = h_sequence(h_many(base64_3),
h_optional(h_choice(base64_2, h_optional(h_choice(base64_2,
base64_1, NULL)), base64_1, NULL)),
NULL); NULL);

View file

@ -22,7 +22,7 @@
// They must be named act_<rulename>. // They must be named act_<rulename>.
/// ///
const HParsedToken *act_bsfdig(const HParseResult *p) HParsedToken *act_bsfdig(const HParseResult *p)
{ {
HParsedToken *res = H_MAKE_UINT(0); HParsedToken *res = H_MAKE_UINT(0);
@ -53,7 +53,7 @@ H_ACT_APPLY(act_index0, h_act_index, 0);
#define act_document act_index0 #define act_document act_index0
// General-form action to turn a block of base64 digits into bytes. // General-form action to turn a block of base64 digits into bytes.
const HParsedToken *act_base64_n(int n, const HParseResult *p) HParsedToken *act_base64_n(int n, const HParseResult *p)
{ {
HParsedToken *res = H_MAKE_SEQN(n); HParsedToken *res = H_MAKE_SEQN(n);
@ -82,7 +82,7 @@ H_ACT_APPLY(act_base64_3, act_base64_n, 3);
H_ACT_APPLY(act_base64_2, act_base64_n, 2); H_ACT_APPLY(act_base64_2, act_base64_n, 2);
H_ACT_APPLY(act_base64_1, act_base64_n, 1); H_ACT_APPLY(act_base64_1, act_base64_n, 1);
const HParsedToken *act_base64(const HParseResult *p) HParsedToken *act_base64(const HParseResult *p)
{ {
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used == 2); assert(p->ast->seq->used == 2);
@ -96,7 +96,7 @@ const HParsedToken *act_base64(const HParseResult *p)
h_seq_append(res, seq->elements[i]); h_seq_append(res, seq->elements[i]);
// append one trailing base64_2 or _1 block // append one trailing base64_2 or _1 block
const HParsedToken *tok = h_seq_index(p->ast, 1); HParsedToken *tok = h_seq_index(p->ast, 1);
if(tok->token_type == TT_SEQUENCE) if(tok->token_type == TT_SEQUENCE)
h_seq_append(res, tok); h_seq_append(res, tok);
@ -108,7 +108,7 @@ const HParsedToken *act_base64(const HParseResult *p)
// Set up the parser with the grammar to be recognized. // Set up the parser with the grammar to be recognized.
/// ///
const HParser *init_parser(void) HParser *init_parser(void)
{ {
// CORE // CORE
H_RULE (digit, h_ch_range(0x30, 0x39)); H_RULE (digit, h_ch_range(0x30, 0x39));

View file

@ -48,7 +48,7 @@ uint8_t bsfdig_value(const HParsedToken *p)
// helper: append a byte value to a sequence // helper: append a byte value to a sequence
#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b)) #define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b))
const HParsedToken *act_base64(const HParseResult *p) HParsedToken *act_base64(const HParseResult *p)
{ {
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used == 2); assert(p->ast->seq->used == 2);

View file

@ -86,7 +86,7 @@ void set_rdata(struct dns_rr *rr, HCountedArray *rdata) {
} }
} }
const HParsedToken* act_header(const HParseResult *p) { HParsedToken* act_header(const HParseResult *p) {
HParsedToken **fields = h_seq_elements(p->ast); HParsedToken **fields = h_seq_elements(p->ast);
dns_header_t header_ = { dns_header_t header_ = {
.id = H_CAST_UINT(fields[0]), .id = H_CAST_UINT(fields[0]),
@ -109,7 +109,7 @@ const HParsedToken* act_header(const HParseResult *p) {
return H_MAKE(dns_header_t, header); return H_MAKE(dns_header_t, header);
} }
const HParsedToken* act_label(const HParseResult *p) { HParsedToken* act_label(const HParseResult *p) {
dns_label_t *r = H_ALLOC(dns_label_t); dns_label_t *r = H_ALLOC(dns_label_t);
r->len = h_seq_len(p->ast); r->len = h_seq_len(p->ast);
@ -121,7 +121,7 @@ const HParsedToken* act_label(const HParseResult *p) {
return H_MAKE(dns_label_t, r); return H_MAKE(dns_label_t, r);
} }
const HParsedToken* act_rr(const HParseResult *p) { HParsedToken* act_rr(const HParseResult *p) {
dns_rr_t *rr = H_ALLOC(dns_rr_t); dns_rr_t *rr = H_ALLOC(dns_rr_t);
rr->name = *H_FIELD(dns_domain_t, 0); rr->name = *H_FIELD(dns_domain_t, 0);
@ -136,7 +136,7 @@ const HParsedToken* act_rr(const HParseResult *p) {
return H_MAKE(dns_rr_t, rr); return H_MAKE(dns_rr_t, rr);
} }
const HParsedToken* act_question(const HParseResult *p) { HParsedToken* act_question(const HParseResult *p) {
dns_question_t *q = H_ALLOC(dns_question_t); dns_question_t *q = H_ALLOC(dns_question_t);
HParsedToken **fields = h_seq_elements(p->ast); HParsedToken **fields = h_seq_elements(p->ast);
@ -153,7 +153,7 @@ const HParsedToken* act_question(const HParseResult *p) {
return H_MAKE(dns_question_t, q); return H_MAKE(dns_question_t, q);
} }
const HParsedToken* act_message(const HParseResult *p) { HParsedToken* act_message(const HParseResult *p) {
h_pprint(stdout, p->ast, 0, 2); h_pprint(stdout, p->ast, 0, 2);
dns_message_t *msg = H_ALLOC(dns_message_t); dns_message_t *msg = H_ALLOC(dns_message_t);

View file

@ -18,8 +18,8 @@ bool validate_label(HParseResult *p) {
#define act_label h_act_flatten #define act_label h_act_flatten
const HParsedToken* act_domain(const HParseResult *p) { HParsedToken* act_domain(const HParseResult *p) {
const HParsedToken *ret = NULL; HParsedToken *ret = NULL;
char *arr = NULL; char *arr = NULL;
switch(p->ast->token_type) { switch(p->ast->token_type) {
@ -56,8 +56,8 @@ const HParsedToken* act_domain(const HParseResult *p) {
return ret; return ret;
} }
const HParser* init_domain() { HParser* init_domain() {
static const HParser *ret = NULL; static HParser *ret = NULL;
if (ret) if (ret)
return ret; return ret;
@ -76,8 +76,8 @@ const HParser* init_domain() {
return ret; return ret;
} }
const HParser* init_character_string() { HParser* init_character_string() {
static const HParser *cstr = NULL; static HParser *cstr = NULL;
if (cstr) if (cstr)
return cstr; return cstr;

View file

@ -4,9 +4,9 @@
#include "../src/hammer.h" #include "../src/hammer.h"
#include "../src/glue.h" #include "../src/glue.h"
const HParser* init_domain(); HParser* init_domain();
const HParser* init_character_string(); HParser* init_character_string();
const HParsedToken* act_index0(const HParseResult *p); HParsedToken* act_index0(const HParseResult *p);
#endif #endif

View file

@ -17,7 +17,7 @@ bool validate_null(HParseResult *p) {
return (65536 > p->ast->seq->used); return (65536 > p->ast->seq->used);
} }
const HParsedToken *act_null(const HParseResult *p) { HParsedToken *act_null(const HParseResult *p) {
dns_rr_null_t *null = H_ALLOC(dns_rr_null_t); dns_rr_null_t *null = H_ALLOC(dns_rr_null_t);
size_t len = h_seq_len(p->ast); size_t len = h_seq_len(p->ast);
@ -28,7 +28,7 @@ const HParsedToken *act_null(const HParseResult *p) {
return H_MAKE(dns_rr_null_t, null); return H_MAKE(dns_rr_null_t, null);
} }
const HParsedToken *act_txt(const HParseResult *p) { HParsedToken *act_txt(const HParseResult *p) {
dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t); dns_rr_txt_t *txt = H_ALLOC(dns_rr_txt_t);
const HCountedArray *arr = H_CAST_SEQ(p->ast); const HCountedArray *arr = H_CAST_SEQ(p->ast);
@ -47,7 +47,7 @@ const HParsedToken *act_txt(const HParseResult *p) {
return H_MAKE(dns_rr_txt_t, txt); return H_MAKE(dns_rr_txt_t, txt);
} }
const HParsedToken* act_cstr(const HParseResult *p) { HParsedToken* act_cstr(const HParseResult *p) {
dns_cstr_t *cs = H_ALLOC(dns_cstr_t); dns_cstr_t *cs = H_ALLOC(dns_cstr_t);
const HCountedArray *arr = H_CAST_SEQ(p->ast); const HCountedArray *arr = H_CAST_SEQ(p->ast);
@ -60,7 +60,7 @@ const HParsedToken* act_cstr(const HParseResult *p) {
return H_MAKE(dns_cstr_t, cs); return H_MAKE(dns_cstr_t, cs);
} }
const HParsedToken* act_soa(const HParseResult *p) { HParsedToken* act_soa(const HParseResult *p) {
dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t); dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t);
soa->mname = *H_FIELD(dns_domain_t, 0); soa->mname = *H_FIELD(dns_domain_t, 0);
@ -74,7 +74,7 @@ const HParsedToken* act_soa(const HParseResult *p) {
return H_MAKE(dns_rr_soa_t, soa); return H_MAKE(dns_rr_soa_t, soa);
} }
const HParsedToken* act_wks(const HParseResult *p) { HParsedToken* act_wks(const HParseResult *p) {
dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t); dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t);
wks->address = H_FIELD_UINT(0); wks->address = H_FIELD_UINT(0);
@ -87,7 +87,7 @@ const HParsedToken* act_wks(const HParseResult *p) {
return H_MAKE(dns_rr_wks_t, wks); return H_MAKE(dns_rr_wks_t, wks);
} }
const HParsedToken* act_hinfo(const HParseResult *p) { HParsedToken* act_hinfo(const HParseResult *p) {
dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t); dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t);
hinfo->cpu = *H_FIELD(dns_cstr_t, 0); hinfo->cpu = *H_FIELD(dns_cstr_t, 0);
@ -96,7 +96,7 @@ const HParsedToken* act_hinfo(const HParseResult *p) {
return H_MAKE(dns_rr_hinfo_t, hinfo); return H_MAKE(dns_rr_hinfo_t, hinfo);
} }
const HParsedToken* act_minfo(const HParseResult *p) { HParsedToken* act_minfo(const HParseResult *p) {
dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t); dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t);
minfo->rmailbx = *H_FIELD(dns_domain_t, 0); minfo->rmailbx = *H_FIELD(dns_domain_t, 0);
@ -105,7 +105,7 @@ const HParsedToken* act_minfo(const HParseResult *p) {
return H_MAKE(dns_rr_minfo_t, minfo); return H_MAKE(dns_rr_minfo_t, minfo);
} }
const HParsedToken* act_mx(const HParseResult *p) { HParsedToken* act_mx(const HParseResult *p) {
dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t); dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t);
mx->preference = H_FIELD_UINT(0); mx->preference = H_FIELD_UINT(0);
@ -120,8 +120,8 @@ const HParsedToken* act_mx(const HParseResult *p) {
/// ///
#define RDATA_TYPE_MAX 16 #define RDATA_TYPE_MAX 16
const HParser* init_rdata(uint16_t type) { HParser* init_rdata(uint16_t type) {
static const HParser *parsers[RDATA_TYPE_MAX+1]; static HParser *parsers[RDATA_TYPE_MAX+1];
static int inited = 0; static int inited = 0;
if (type >= sizeof(parsers)) if (type >= sizeof(parsers))

View file

@ -3,6 +3,6 @@
#include "../src/hammer.h" #include "../src/hammer.h"
const HParser* init_rdata(uint16_t type); HParser* init_rdata(uint16_t type);
#endif #endif

View file

@ -53,7 +53,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length), HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
**heads_n = a_new(HRVMTrace*, prog->length); **heads_n = a_new(HRVMTrace*, prog->length);
HRVMTrace *ret_trace; HRVMTrace *ret_trace = NULL;
uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
HRVMThread *ip_queue = a_new(HRVMThread, prog->length); HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
@ -62,6 +62,7 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
#define THREAD ip_queue[ipq_top-1] #define THREAD ip_queue[ipq_top-1]
#define PUSH_SVM(op_, arg_) do { \ #define PUSH_SVM(op_, arg_) do { \
HRVMTrace *nt = a_new(HRVMTrace, 1); \ HRVMTrace *nt = a_new(HRVMTrace, 1); \
@ -102,15 +103,18 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
uint8_t hi, lo; uint8_t hi, lo;
uint16_t arg; uint16_t arg;
while(ipq_top > 0) { while(ipq_top > 0) {
if (insn_seen[THREAD.ip] == 1) if (insn_seen[THREAD.ip] == 1) {
ipq_top--; // Kill thread.
continue; continue;
}
insn_seen[THREAD.ip] = 1; insn_seen[THREAD.ip] = 1;
arg = prog->insns[THREAD.ip].arg; arg = prog->insns[THREAD.ip].arg;
switch(prog->insns[THREAD.ip].op) { switch(prog->insns[THREAD.ip].op) {
case RVM_ACCEPT: case RVM_ACCEPT:
PUSH_SVM(SVM_ACCEPT, 0); PUSH_SVM(SVM_ACCEPT, 0);
ret_trace = THREAD.trace; ret_trace = THREAD.trace;
goto run_trace; ipq_top--;
goto next_insn;
case RVM_MATCH: case RVM_MATCH:
hi = (arg >> 8) & 0xff; hi = (arg >> 8) & 0xff;
lo = arg & 0xff; lo = arg & 0xff;
@ -163,10 +167,12 @@ void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const uint8_t* input, size_
} }
// No accept was reached. // No accept was reached.
match_fail: match_fail:
h_delete_arena(arena); if (ret_trace == NULL) {
return NULL; // No match found; definite failure.
h_delete_arena(arena);
return NULL;
}
run_trace:
// Invert the direction of the trace linked list. // Invert the direction of the trace linked list.
ret_trace = invert_trace(ret_trace); ret_trace = invert_trace(ret_trace);
@ -213,8 +219,9 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
case SVM_ACTION: case SVM_ACTION:
// Action should modify stack appropriately // Action should modify stack appropriately
if (!orig_prog->actions[cur->arg].action(arena, &ctx, orig_prog->actions[cur->arg].env)) { if (!orig_prog->actions[cur->arg].action(arena, &ctx, orig_prog->actions[cur->arg].env)) {
// action failed... abort somehow // action failed... abort somehow
// TODO: Actually abort goto fail;
} }
break; break;
case SVM_CAPTURE: case SVM_CAPTURE:
@ -243,7 +250,7 @@ HParseResult *run_trace(HAllocator *mm__, HRVMProg *orig_prog, HRVMTrace *trace,
return res; return res;
} }
} }
fail:
h_delete_arena(arena); h_delete_arena(arena);
return NULL; return NULL;
} }
@ -294,7 +301,7 @@ void h_rvm_patch_arg(HRVMProg *prog, uint16_t ip, uint16_t new_val) {
size_t h_svm_count_to_mark(HSVMContext *ctx) { size_t h_svm_count_to_mark(HSVMContext *ctx) {
size_t ctm; size_t ctm;
for (ctm = 0; ctm < ctx->stack_count-1; ctm++) { for (ctm = 0; ctm < ctx->stack_count; ctm++) {
if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK) if (ctx->stack[ctx->stack_count - 1 - ctm]->token_type == TT_MARK)
return ctm; return ctm;
} }
@ -315,13 +322,13 @@ bool h_svm_action_make_sequence(HArena *arena, HSVMContext *ctx, void* env) {
for (size_t i = 0; i < n_items; i++) { for (size_t i = 0; i < n_items; i++) {
ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i]; ret_carray->elements[i] = ctx->stack[ctx->stack_count - n_items + i];
} }
ret_carray->used = n_items;
ctx->stack_count -= n_items; ctx->stack_count -= n_items;
return true; return true;
} }
bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) { bool h_svm_action_clear_to_mark(HArena *arena, HSVMContext *ctx, void* env) {
while (ctx->stack_count > 0) { while (ctx->stack_count > 0) { if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
if (ctx->stack[--ctx->stack_count]->token_type == TT_MARK)
return true; return true;
} }
return false; // no mark found. return false; // no mark found.

View file

@ -67,12 +67,12 @@ void dump_rvm_prog(HRVMProg *prog) {
if (high < low) if (high < low)
printf("NONE\n"); printf("NONE\n");
else { else {
if (low >= 0x32 && low <= 0x7e) if (low >= 0x20 && low <= 0x7e)
printf("%02hhx ('%c')", low, low); printf("%02hhx ('%c')", low, low);
else else
printf("%02hhx", low); printf("%02hhx", low);
if (high >= 0x32 && high <= 0x7e) if (high >= 0x20 && high <= 0x7e)
printf(" - %02hhx ('%c')\n", high, high); printf(" - %02hhx ('%c')\n", high, high);
else else
printf(" - %02hhx\n", high); printf(" - %02hhx\n", high);

View file

@ -9,7 +9,8 @@
HCountedArray *h_carray_new_sized(HArena * arena, size_t size) { HCountedArray *h_carray_new_sized(HArena * arena, size_t size) {
HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray)); HCountedArray *ret = h_arena_malloc(arena, sizeof(HCountedArray));
assert(size > 0); if (size == 0)
size = 1;
ret->used = 0; ret->used = 0;
ret->capacity = size; ret->capacity = size;
ret->arena = arena; ret->arena = arena;

View file

@ -5,7 +5,7 @@
#include "parsers/parser_internal.h" #include "parsers/parser_internal.h"
// Helper to build HAction's that pick one index out of a sequence. // Helper to build HAction's that pick one index out of a sequence.
const HParsedToken *h_act_index(int i, const HParseResult *p) HParsedToken *h_act_index(int i, const HParseResult *p)
{ {
if(!p) return NULL; if(!p) return NULL;
@ -23,7 +23,7 @@ const HParsedToken *h_act_index(int i, const HParseResult *p)
return tok->seq->elements[i]; return tok->seq->elements[i];
} }
const HParsedToken *h_act_first(const HParseResult *p) { HParsedToken *h_act_first(const HParseResult *p) {
assert(p->ast); assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0); assert(p->ast->seq->used > 0);
@ -31,7 +31,7 @@ const HParsedToken *h_act_first(const HParseResult *p) {
return p->ast->seq->elements[0]; return p->ast->seq->elements[0];
} }
const HParsedToken *h_act_second(const HParseResult *p) { HParsedToken *h_act_second(const HParseResult *p) {
assert(p->ast); assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0); assert(p->ast->seq->used > 0);
@ -39,7 +39,7 @@ const HParsedToken *h_act_second(const HParseResult *p) {
return p->ast->seq->elements[1]; return p->ast->seq->elements[1];
} }
const HParsedToken *h_act_last(const HParseResult *p) { HParsedToken *h_act_last(const HParseResult *p) {
assert(p->ast); assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0); assert(p->ast->seq->used > 0);
@ -59,7 +59,7 @@ static void act_flatten_(HCountedArray *seq, const HParsedToken *tok) {
} }
} }
const HParsedToken *h_act_flatten(const HParseResult *p) { HParsedToken *h_act_flatten(const HParseResult *p) {
HCountedArray *seq = h_carray_new(p->arena); HCountedArray *seq = h_carray_new(p->arena);
act_flatten_(seq, p->ast); act_flatten_(seq, p->ast);
@ -72,7 +72,7 @@ const HParsedToken *h_act_flatten(const HParseResult *p) {
return res; return res;
} }
const HParsedToken *h_act_ignore(const HParseResult *p) { HParsedToken *h_act_ignore(const HParseResult *p) {
return NULL; return NULL;
} }

View file

@ -55,13 +55,13 @@
// //
#define H_RULE(rule, def) const HParser *rule = def #define H_RULE(rule, def) HParser *rule = def
#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) #define H_ARULE(rule, def) HParser *rule = h_action(def, act_ ## rule)
#define H_VRULE(rule, def) const HParser *rule = \ #define H_VRULE(rule, def) HParser *rule = \
h_attr_bool(def, validate_ ## rule) h_attr_bool(def, validate_ ## rule)
#define H_VARULE(rule, def) const HParser *rule = \ #define H_VARULE(rule, def) HParser *rule = \
h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule)
#define H_AVRULE(rule, def) const HParser *rule = \ #define H_AVRULE(rule, def) HParser *rule = \
h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule)
@ -88,17 +88,17 @@
// action such as h_act_index. // action such as h_act_index.
// //
const HParsedToken *h_act_index(int i, const HParseResult *p); HParsedToken *h_act_index(int i, const HParseResult *p);
const HParsedToken *h_act_first(const HParseResult *p); HParsedToken *h_act_first(const HParseResult *p);
const HParsedToken *h_act_second(const HParseResult *p); HParsedToken *h_act_second(const HParseResult *p);
const HParsedToken *h_act_last(const HParseResult *p); HParsedToken *h_act_last(const HParseResult *p);
const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_act_flatten(const HParseResult *p);
const HParsedToken *h_act_ignore(const HParseResult *p); HParsedToken *h_act_ignore(const HParseResult *p);
// Define 'myaction' as a specialization of 'paction' by supplying the leading // Define 'myaction' as a specialization of 'paction' by supplying the leading
// parameters. // parameters.
#define H_ACT_APPLY(myaction, paction, ...) \ #define H_ACT_APPLY(myaction, paction, ...) \
const HParsedToken *myaction(const HParseResult *p) { \ HParsedToken *myaction(const HParseResult *p) { \
return paction(__VA_ARGS__, p); \ return paction(__VA_ARGS__, p); \
} }

View file

@ -111,7 +111,7 @@ typedef struct HBitWriter_ HBitWriter;
* say, structs) and stuff values for them into the void* in the * say, structs) and stuff values for them into the void* in the
* tagged union in HParsedToken. * tagged union in HParsedToken.
*/ */
typedef const HParsedToken* (*HAction)(const HParseResult *p); typedef HParsedToken* (*HAction)(const HParseResult *p);
/** /**
* Type of a boolean attribute-checking function, used in the * Type of a boolean attribute-checking function, used in the
@ -370,7 +370,7 @@ HAMMER_FN_DECL_NOARG(HParser*, h_nothing_p);
* *
* Result token type: TT_SEQUENCE * Result token type: TT_SEQUENCE
*/ */
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, const HParser* p); HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, HParser* p);
/** /**
* Given an array of parsers, p_array, apply each parser in order. The * Given an array of parsers, p_array, apply each parser in order. The
@ -379,7 +379,7 @@ HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_sequence, con
* *
* Result token type: The type of the first successful parser's result. * Result token type: The type of the first successful parser's result.
*/ */
HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, const HParser* p); HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), HParser*, h_choice, HParser* p);
/** /**
* Given two parsers, p1 and p2, this parser succeeds in the following * Given two parsers, p1 and p2, this parser succeeds in the following
@ -605,11 +605,11 @@ void h_bit_writer_free(HBitWriter* w);
// General-purpose actions for use with h_action // General-purpose actions for use with h_action
// XXX to be consolidated with glue.h when merged upstream // XXX to be consolidated with glue.h when merged upstream
const HParsedToken *h_act_first(const HParseResult *p); HParsedToken *h_act_first(const HParseResult *p);
const HParsedToken *h_act_second(const HParseResult *p); HParsedToken *h_act_second(const HParseResult *p);
const HParsedToken *h_act_last(const HParseResult *p); HParsedToken *h_act_last(const HParseResult *p);
const HParsedToken *h_act_flatten(const HParseResult *p); HParsedToken *h_act_flatten(const HParseResult *p);
const HParsedToken *h_act_ignore(const HParseResult *p); HParsedToken *h_act_ignore(const HParseResult *p);
// {{{ Benchmark functions // {{{ Benchmark functions
HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases); HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, HParser* parser, HParserTestcase* testcases);

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
typedef struct { typedef struct {
@ -45,9 +46,35 @@ static bool action_isValidCF(void *env) {
return a->p->vtable->isValidCF(a->p->env); return a->p->vtable->isValidCF(a->p->env);
} }
static bool h_svm_action_action(HArena *arena, HSVMContext *ctx, void* arg) {
HParseResult res;
HAction action = arg;
assert(ctx->stack_count >= 1);
if (ctx->stack[ctx->stack_count-1]->token_type != TT_MARK) {
assert(ctx->stack_count >= 2 && ctx->stack[ctx->stack_count-2]->token_type == TT_MARK);
res.ast = ctx->stack[ctx->stack_count-2] = ctx->stack[ctx->stack_count-1];
ctx->stack_count--;
// mark replaced.
} else {
res.ast = NULL;
}
res.arena = arena;
HParsedToken *tok = action(&res);
if (tok != NULL)
ctx->stack[ctx->stack_count-1] = tok;
else
ctx->stack_count--;
return true; // action can't fail
}
static bool action_ctrvm(HRVMProg *prog, void* env) { static bool action_ctrvm(HRVMProg *prog, void* env) {
HParseAction *a = (HParseAction*)env; HParseAction *a = (HParseAction*)env;
return a->p->vtable->compile_to_rvm(prog, a->p->env); h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, a->p))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_action, a->action));
return true;
} }
static const HParserVtable action_vt = { static const HParserVtable action_vt = {

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
typedef struct { typedef struct {
@ -48,9 +49,30 @@ static HCFChoice* desugar_ab(HAllocator *mm__, void *env) {
return ret; return ret;
} }
static bool h_svm_action_attr_bool(HArena *arena, HSVMContext *ctx, void* arg) {
HParseResult res;
HPredicate pred = arg;
assert(ctx->stack_count >= 1);
if (ctx->stack[ctx->stack_count-1]->token_type != TT_MARK) {
assert(ctx->stack_count >= 2 && ctx->stack[ctx->stack_count-2]->token_type == TT_MARK);
ctx->stack_count--;
res.ast = ctx->stack[ctx->stack_count-1] = ctx->stack[ctx->stack_count];
// mark replaced.
} else {
ctx->stack_count--;
res.ast = NULL;
}
res.arena = arena;
return pred(&res);
}
static bool ab_ctrvm(HRVMProg *prog, void *env) { static bool ab_ctrvm(HRVMProg *prog, void *env) {
HAttrBool *ab = (HAttrBool*)env; HAttrBool *ab = (HAttrBool*)env;
return h_compile_regex(prog, ab->p); h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, ab->p))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_attr_bool, ab->pred));
return true;
} }
static const HParserVtable attr_bool_vt = { static const HParserVtable attr_bool_vt = {

View file

@ -45,10 +45,10 @@ static HParsedToken *reshape_bits(const HParseResult *p, bool signedp) {
return ret; return ret;
} }
static const HParsedToken *reshape_bits_unsigned(const HParseResult *p) { static HParsedToken *reshape_bits_unsigned(const HParseResult *p) {
return reshape_bits(p, false); return reshape_bits(p, false);
} }
static const HParsedToken *reshape_bits_signed(const HParseResult *p) { static HParsedToken *reshape_bits_signed(const HParseResult *p) {
return reshape_bits(p, true); return reshape_bits(p, true);
} }

View file

@ -56,7 +56,7 @@ static bool cs_ctrvm(HRVMProg *prog, void *env) {
if (collecting) { if (collecting) {
collecting = false; collecting = false;
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
h_rvm_insert_insn(prog, RVM_MATCH, range_start | i << 8); h_rvm_insert_insn(prog, RVM_MATCH, range_start | (i-1) << 8);
h_rvm_insert_insn(prog, RVM_GOTO, 0); h_rvm_insert_insn(prog, RVM_GOTO, 0);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
} }

View file

@ -3,7 +3,7 @@
typedef struct { typedef struct {
size_t len; size_t len;
const HParser **p_array; HParser **p_array;
} HSequence; } HSequence;
@ -58,16 +58,16 @@ static HCFChoice* desugar_choice(HAllocator *mm__, void *env) {
static bool choice_ctrvm(HRVMProg *prog, void* env) { static bool choice_ctrvm(HRVMProg *prog, void* env) {
HSequence *s = (HSequence*)env; HSequence *s = (HSequence*)env;
uint16_t gotos[s->len]; uint16_t gotos[s->len];
uint16_t start = h_rvm_get_ip(prog);
for (size_t i=0; i<s->len; ++i) { for (size_t i=0; i<s->len; ++i) {
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, s->p_array[i]->env)) if (!h_compile_regex(prog, s->p_array[i]))
return false; return false;
gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 0); gotos[i] = h_rvm_insert_insn(prog, RVM_GOTO, 65535);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
} }
uint16_t jump = h_rvm_insert_insn(prog, RVM_STEP, 0); h_rvm_insert_insn(prog, RVM_MATCH, 0x00FF); // fail.
for (size_t i=start; i<s->len; ++i) { uint16_t jump = h_rvm_get_ip(prog);
for (size_t i=0; i<s->len; ++i) {
h_rvm_patch_arg(prog, gotos[i], jump); h_rvm_patch_arg(prog, gotos[i], jump);
} }
return true; return true;
@ -81,7 +81,7 @@ static const HParserVtable choice_vt = {
.compile_to_rvm = choice_ctrvm, .compile_to_rvm = choice_ctrvm,
}; };
HParser* h_choice(const HParser* p, ...) { HParser* h_choice(HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
HParser* ret = h_choice__mv(&system_allocator, p, ap); HParser* ret = h_choice__mv(&system_allocator, p, ap);
@ -89,7 +89,7 @@ HParser* h_choice(const HParser* p, ...) {
return ret; return ret;
} }
HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) { HParser* h_choice__m(HAllocator* mm__, HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
HParser* ret = h_choice__mv(mm__, p, ap); HParser* ret = h_choice__mv(mm__, p, ap);
@ -97,28 +97,28 @@ HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) {
return ret; return ret;
} }
HParser* h_choice__v(const HParser* p, va_list ap) { HParser* h_choice__v(HParser* p, va_list ap) {
return h_choice__mv(&system_allocator, p, ap); return h_choice__mv(&system_allocator, p, ap);
} }
HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) { HParser* h_choice__mv(HAllocator* mm__, HParser* p, va_list ap_) {
va_list ap; va_list ap;
size_t len = 0; size_t len = 0;
HSequence *s = h_new(HSequence, 1); HSequence *s = h_new(HSequence, 1);
const HParser *arg; HParser *arg;
va_copy(ap, ap_); va_copy(ap, ap_);
do { do {
len++; len++;
arg = va_arg(ap, const HParser *); arg = va_arg(ap, HParser *);
} while (arg); } while (arg);
va_end(ap); va_end(ap);
s->p_array = h_new(const HParser *, len); s->p_array = h_new(HParser *, len);
va_copy(ap, ap_); va_copy(ap, ap_);
s->p_array[0] = p; s->p_array[0] = p;
for (size_t i = 1; i < len; i++) { for (size_t i = 1; i < len; i++) {
s->p_array[i] = va_arg(ap, const HParser *); s->p_array[i] = va_arg(ap, HParser *);
} while (arg); } while (arg);
va_end(ap); va_end(ap);
@ -139,7 +139,7 @@ HParser* h_choice__ma(HAllocator* mm__, void *args[]) {
} while(arg); } while(arg);
HSequence *s = h_new(HSequence, 1); HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(const HParser *, len); s->p_array = h_new(HParser *, len);
for (size_t i = 0; i < len; i++) { for (size_t i = 0; i < len; i++) {
s->p_array[i] = ((HParser **)args)[i]; s->p_array[i] = ((HParser **)args)[i];

View file

@ -47,7 +47,7 @@ static bool h_svm_action_pop(HArena *arena, HSVMContext *ctx, void* arg) {
static bool ignore_ctrvm(HRVMProg *prog, void *env) { static bool ignore_ctrvm(HRVMProg *prog, void *env) {
HParser *p = (HParser*)env; HParser *p = (HParser*)env;
h_compile_regex(prog, p->env); h_compile_regex(prog, p);
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_pop, NULL));
return true; return true;
} }

View file

@ -83,7 +83,7 @@ static bool h_svm_action_ignoreseq(HArena *arena, HSVMContext *ctx, void* env) {
// stack. // stack.
assert(seq->len >= 1); assert(seq->len >= 1);
for (int i = seq->len - 1; i>=0; i--) { for (int i = seq->len - 1; i>=0; i--) {
if (i == (int)seq->which && ctx->stack[ctx->stack_count]->token_type != TT_MARK) if (i == (int)seq->which && ctx->stack[ctx->stack_count-1]->token_type != TT_MARK)
save = ctx->stack[ctx->stack_count-1]; save = ctx->stack[ctx->stack_count-1];
// skip over everything up to and including the mark. // skip over everything up to and including the mark.
while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK) while (ctx->stack[--ctx->stack_count]->token_type != TT_MARK)

View file

@ -1,3 +1,4 @@
#include <assert.h>
#include "parser_internal.h" #include "parser_internal.h"
// TODO: split this up. // TODO: split this up.
@ -14,7 +15,7 @@ static HParseResult *parse_many(void* env, HParseState *state) {
HInputStream bak; HInputStream bak;
while (env_->min_p || env_->count > count) { while (env_->min_p || env_->count > count) {
bak = state->input_stream; bak = state->input_stream;
if (count > 0) { if (count > 0 && env_->sep != NULL) {
HParseResult *sep = h_do_parse(env_->sep, state); HParseResult *sep = h_do_parse(env_->sep, state);
if (!sep) if (!sep)
goto err0; goto err0;
@ -47,13 +48,15 @@ static HParseResult *parse_many(void* env, HParseState *state) {
static bool many_isValidRegular(void *env) { static bool many_isValidRegular(void *env) {
HRepeat *repeat = (HRepeat*)env; HRepeat *repeat = (HRepeat*)env;
return (repeat->p->vtable->isValidRegular(repeat->p->env) && return (repeat->p->vtable->isValidRegular(repeat->p->env) &&
repeat->sep->vtable->isValidRegular(repeat->sep->env)); (repeat->sep == NULL ||
repeat->sep->vtable->isValidRegular(repeat->sep->env)));
} }
static bool many_isValidCF(void *env) { static bool many_isValidCF(void *env) {
HRepeat *repeat = (HRepeat*)env; HRepeat *repeat = (HRepeat*)env;
return (repeat->p->vtable->isValidCF(repeat->p->env) && return (repeat->p->vtable->isValidCF(repeat->p->env) &&
repeat->sep->vtable->isValidCF(repeat->sep->env)); (repeat->sep == NULL ||
repeat->sep->vtable->isValidCF(repeat->sep->env)));
} }
static HCFChoice* desugar_many(HAllocator *mm__, void *env) { static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
@ -70,7 +73,9 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
-> \epsilon -> \epsilon
*/ */
HCFChoice *sep = h_desugar(mm__, repeat->sep); HParser *epsilon = h_epsilon_p__m(mm__);
HCFChoice *sep = h_desugar(mm__, (repeat->sep != NULL) ? repeat->sep : epsilon);
HCFChoice *a = h_desugar(mm__, repeat->p); HCFChoice *a = h_desugar(mm__, repeat->p);
HCFChoice *ma = h_new(HCFChoice, 1); HCFChoice *ma = h_new(HCFChoice, 1);
HCFChoice *mar = h_new(HCFChoice, 1); HCFChoice *mar = h_new(HCFChoice, 1);
@ -119,24 +124,56 @@ static HCFChoice* desugar_many(HAllocator *mm__, void *env) {
static bool many_ctrvm(HRVMProg *prog, void *env) { static bool many_ctrvm(HRVMProg *prog, void *env) {
HRepeat *repeat = (HRepeat*)env; HRepeat *repeat = (HRepeat*)env;
// FIXME: Implement clear_to_mark
uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL); uint16_t clear_to_mark = h_rvm_create_action(prog, h_svm_action_clear_to_mark, NULL);
h_rvm_insert_insn(prog, RVM_PUSH, 0); // TODO: implement min & max properly. Right now, it's always
// TODO: implement min and max properly. Right now, it's always min==0, max==inf // max==inf, min={0,1}
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
if (!h_compile_regex(prog, repeat->p))
return false;
if (repeat->sep != NULL) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
}
h_rvm_insert_insn(prog, RVM_GOTO, insn);
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL)); // Structure:
return true; // Min == 0:
// FORK end // if Min == 0
// GOTO mid
// nxt: <SEP>
// mid: <ELEM>
// FORK nxt
// end:
if (repeat->min_p) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
assert(repeat->count < 2); // TODO: The other cases should be supported later.
uint16_t end_fork;
if (repeat->count == 0)
end_fork = h_rvm_insert_insn(prog, RVM_FORK, 0xFFFF);
uint16_t goto_mid = h_rvm_insert_insn(prog, RVM_GOTO, 0xFFFF);
uint16_t nxt = h_rvm_get_ip(prog);
if (repeat->sep != NULL) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
}
h_rvm_patch_arg(prog, goto_mid, h_rvm_get_ip(prog));
if (!h_compile_regex(prog, repeat->p))
return false;
h_rvm_insert_insn(prog, RVM_FORK, nxt);
h_rvm_patch_arg(prog, end_fork, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true;
} else {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
for (size_t i = 0; i < repeat->count; i++) {
if (repeat->sep != NULL && i != 0) {
h_rvm_insert_insn(prog, RVM_PUSH, 0);
if (!h_compile_regex(prog, repeat->sep))
return false;
h_rvm_insert_insn(prog, RVM_ACTION, clear_to_mark);
}
if (!h_compile_regex(prog, repeat->p))
return false;
}
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_make_sequence, NULL));
return true;
}
} }
static const HParserVtable many_vt = { static const HParserVtable many_vt = {
@ -153,7 +190,7 @@ HParser* h_many(const HParser* p) {
HParser* h_many__m(HAllocator* mm__, const HParser* p) { HParser* h_many__m(HAllocator* mm__, const HParser* p) {
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
env->sep = h_epsilon_p__m(mm__); env->sep = NULL;
env->count = 0; env->count = 0;
env->min_p = true; env->min_p = true;
return h_new_parser(mm__, &many_vt, env); return h_new_parser(mm__, &many_vt, env);
@ -165,7 +202,7 @@ HParser* h_many1(const HParser* p) {
HParser* h_many1__m(HAllocator* mm__, const HParser* p) { HParser* h_many1__m(HAllocator* mm__, const HParser* p) {
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
env->sep = h_epsilon_p__m(mm__); env->sep = NULL;
env->count = 1; env->count = 1;
env->min_p = true; env->min_p = true;
return h_new_parser(mm__, &many_vt, env); return h_new_parser(mm__, &many_vt, env);
@ -177,7 +214,7 @@ HParser* h_repeat_n(const HParser* p, const size_t n) {
HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) { HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) {
HRepeat *env = h_new(HRepeat, 1); HRepeat *env = h_new(HRepeat, 1);
env->p = p; env->p = p;
env->sep = h_epsilon_p__m(mm__); env->sep = NULL;
env->count = n; env->count = n;
env->min_p = false; env->min_p = false;
return h_new_parser(mm__, &many_vt, env); return h_new_parser(mm__, &many_vt, env);
@ -222,7 +259,7 @@ static HParseResult* parse_length_value(void *env, HParseState *state) {
// TODO: allocate this using public functions // TODO: allocate this using public functions
HRepeat repeat = { HRepeat repeat = {
.p = lv->value, .p = lv->value,
.sep = h_epsilon_p(), .sep = NULL,
.count = len->ast->uint, .count = len->ast->uint,
.min_p = false .min_p = false
}; };

View file

@ -22,7 +22,7 @@ static bool opt_isValidCF(void *env) {
return p->vtable->isValidCF(p->env); return p->vtable->isValidCF(p->env);
} }
static const HParsedToken* reshape_optional(const HParseResult *p) { static HParsedToken* reshape_optional(const HParseResult *p) {
assert(p->ast); assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
assert(p->ast->seq->used > 0); assert(p->ast->seq->used > 0);
@ -83,7 +83,7 @@ static bool opt_ctrvm(HRVMProg *prog, void* env) {
h_rvm_insert_insn(prog, RVM_PUSH, 0); h_rvm_insert_insn(prog, RVM_PUSH, 0);
uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0); uint16_t insn = h_rvm_insert_insn(prog, RVM_FORK, 0);
HParser *p = (HParser*) env; HParser *p = (HParser*) env;
if (!h_compile_regex(prog, p->env)) if (!h_compile_regex(prog, p))
return false; return false;
h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog)); h_rvm_patch_arg(prog, insn, h_rvm_get_ip(prog));
h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL)); h_rvm_insert_insn(prog, RVM_ACTION, h_rvm_create_action(prog, h_svm_action_optional, NULL));

View file

@ -4,7 +4,7 @@
typedef struct { typedef struct {
size_t len; size_t len;
const HParser **p_array; HParser **p_array;
} HSequence; } HSequence;
static HParseResult* parse_sequence(void *env, HParseState *state) { static HParseResult* parse_sequence(void *env, HParseState *state) {
@ -43,7 +43,7 @@ static bool sequence_isValidCF(void *env) {
return true; return true;
} }
static const HParsedToken *reshape_sequence(const HParseResult *p) { static HParsedToken *reshape_sequence(const HParseResult *p) {
assert(p->ast); assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);
@ -101,7 +101,7 @@ static const HParserVtable sequence_vt = {
.compile_to_rvm = sequence_ctrvm, .compile_to_rvm = sequence_ctrvm,
}; };
HParser* h_sequence(const HParser* p, ...) { HParser* h_sequence(HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
HParser* ret = h_sequence__mv(&system_allocator, p, ap); HParser* ret = h_sequence__mv(&system_allocator, p, ap);
@ -109,7 +109,7 @@ HParser* h_sequence(const HParser* p, ...) {
return ret; return ret;
} }
HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) { HParser* h_sequence__m(HAllocator* mm__, HParser* p, ...) {
va_list ap; va_list ap;
va_start(ap, p); va_start(ap, p);
HParser* ret = h_sequence__mv(mm__, p, ap); HParser* ret = h_sequence__mv(mm__, p, ap);
@ -117,27 +117,27 @@ HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) {
return ret; return ret;
} }
HParser* h_sequence__v(const HParser* p, va_list ap) { HParser* h_sequence__v(HParser* p, va_list ap) {
return h_sequence__mv(&system_allocator, p, ap); return h_sequence__mv(&system_allocator, p, ap);
} }
HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) { HParser* h_sequence__mv(HAllocator* mm__, HParser *p, va_list ap_) {
va_list ap; va_list ap;
size_t len = 0; size_t len = 0;
const HParser *arg; const HParser *arg;
va_copy(ap, ap_); va_copy(ap, ap_);
do { do {
len++; len++;
arg = va_arg(ap, const HParser *); arg = va_arg(ap, HParser *);
} while (arg); } while (arg);
va_end(ap); va_end(ap);
HSequence *s = h_new(HSequence, 1); HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(const HParser *, len); s->p_array = h_new(HParser *, len);
va_copy(ap, ap_); va_copy(ap, ap_);
s->p_array[0] = p; s->p_array[0] = p;
for (size_t i = 1; i < len; i++) { for (size_t i = 1; i < len; i++) {
s->p_array[i] = va_arg(ap, const HParser *); s->p_array[i] = va_arg(ap, HParser *);
} while (arg); } while (arg);
va_end(ap); va_end(ap);
@ -158,7 +158,7 @@ HParser* h_sequence__ma(HAllocator* mm__, void *args[]) {
} while(arg); } while(arg);
HSequence *s = h_new(HSequence, 1); HSequence *s = h_new(HSequence, 1);
s->p_array = h_new(const HParser *, len); s->p_array = h_new(HParser *, len);
for (size_t i = 0; i < len; i++) { for (size_t i = 0; i < len; i++) {
s->p_array[i] = ((HParser **)args)[i]; s->p_array[i] = ((HParser **)args)[i];

View file

@ -20,7 +20,7 @@ static HParseResult* parse_token(void *env, HParseState *state) {
} }
static const HParsedToken *reshape_token(const HParseResult *p) { static HParsedToken *reshape_token(const HParseResult *p) {
// fetch sequence of uints from p // fetch sequence of uints from p
assert(p->ast); assert(p->ast);
assert(p->ast->token_type == TT_SEQUENCE); assert(p->ast->token_type == TT_SEQUENCE);

View file

@ -15,9 +15,9 @@ static void test_end(void) {
} }
static void test_example_1(void) { static void test_example_1(void) {
const HParser *c = h_many(h_ch('x')); HParser *c = h_many(h_ch('x'));
const HParser *q = h_sequence(c, h_ch('y'), NULL); HParser *q = h_sequence(c, h_ch('y'), NULL);
const HParser *p = h_choice(q, h_end_p(), NULL); HParser *p = h_choice(q, h_end_p(), NULL);
HCFGrammar *g = h_cfgrammar(&system_allocator, p); HCFGrammar *g = h_cfgrammar(&system_allocator, p);
g_check_nonterminal(g, c); g_check_nonterminal(g, c);

View file

@ -162,7 +162,7 @@ static void test_middle(gconstpointer backend) {
#include <ctype.h> #include <ctype.h>
const HParsedToken* upcase(const HParseResult *p) { HParsedToken* upcase(const HParseResult *p) {
switch(p->ast->token_type) { switch(p->ast->token_type) {
case TT_SEQUENCE: case TT_SEQUENCE:
{ {
@ -180,17 +180,17 @@ const HParsedToken* upcase(const HParseResult *p) {
} }
} }
ret->seq = seq; ret->seq = seq;
return (const HParsedToken*)ret; return ret;
} }
case TT_UINT: case TT_UINT:
{ {
HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); HParsedToken *ret = a_new_(p->arena, HParsedToken, 1);
ret->token_type = TT_UINT; ret->token_type = TT_UINT;
ret->uint = toupper(p->ast->uint); ret->uint = toupper(p->ast->uint);
return (const HParsedToken*)ret; return ret;
} }
default: default:
return p->ast; return (HParsedToken*)p->ast;
} }
} }
@ -526,6 +526,7 @@ void register_parser_tests(void) {
g_test_add_data_func("/core/parser/regex/choice", GINT_TO_POINTER(PB_REGULAR), test_choice); g_test_add_data_func("/core/parser/regex/choice", GINT_TO_POINTER(PB_REGULAR), test_choice);
g_test_add_data_func("/core/parser/regex/many", GINT_TO_POINTER(PB_REGULAR), test_many); g_test_add_data_func("/core/parser/regex/many", GINT_TO_POINTER(PB_REGULAR), test_many);
g_test_add_data_func("/core/parser/regex/many1", GINT_TO_POINTER(PB_REGULAR), test_many1); g_test_add_data_func("/core/parser/regex/many1", GINT_TO_POINTER(PB_REGULAR), test_many1);
g_test_add_data_func("/core/parser/regex/repeat_n", GINT_TO_POINTER(PB_REGULAR), test_repeat_n);
g_test_add_data_func("/core/parser/regex/optional", GINT_TO_POINTER(PB_REGULAR), test_optional); g_test_add_data_func("/core/parser/regex/optional", GINT_TO_POINTER(PB_REGULAR), test_optional);
g_test_add_data_func("/core/parser/regex/sepBy", GINT_TO_POINTER(PB_REGULAR), test_sepBy); g_test_add_data_func("/core/parser/regex/sepBy", GINT_TO_POINTER(PB_REGULAR), test_sepBy);
g_test_add_data_func("/core/parser/regex/sepBy1", GINT_TO_POINTER(PB_REGULAR), test_sepBy1); g_test_add_data_func("/core/parser/regex/sepBy1", GINT_TO_POINTER(PB_REGULAR), test_sepBy1);