Added regex backend, does not compile
This commit is contained in:
parent
ca8751bcfe
commit
b8314f6662
6 changed files with 206 additions and 0 deletions
|
|
@ -19,6 +19,7 @@
|
||||||
#define HAMMER_ALLOCATOR__H__
|
#define HAMMER_ALLOCATOR__H__
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
|
||||||
typedef struct HAllocator_ {
|
typedef struct HAllocator_ {
|
||||||
void* (*alloc)(struct HAllocator_* allocator, size_t size);
|
void* (*alloc)(struct HAllocator_* allocator, size_t size);
|
||||||
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
|
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);
|
||||||
|
|
|
||||||
138
src/backends/regex.c
Normal file
138
src/backends/regex.c
Normal file
|
|
@ -0,0 +1,138 @@
|
||||||
|
#include "../internal.h"
|
||||||
|
#include "../parsers/parser_internal.h"
|
||||||
|
|
||||||
|
#undef a_new
|
||||||
|
#define a_new(typ, count) a_new_(arena, typ, count);
|
||||||
|
// Stack VM
|
||||||
|
typedef enum HSVMOp_ {
|
||||||
|
SVM_PUSH, // Push a mark. There is no VM insn to push an object.
|
||||||
|
SVM_NOP, // Used to start the chain, and possibly elsewhere. Does nothing.
|
||||||
|
SVM_ACTION, // Same meaning as RVM_ACTION
|
||||||
|
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
|
||||||
|
SVM_ACCEPT,
|
||||||
|
} HSVMOp;
|
||||||
|
|
||||||
|
typedef struct HRVMTrace_ {
|
||||||
|
struct HRVMTrace_ *next; // When parsing, these are
|
||||||
|
// reverse-threaded. There is a postproc
|
||||||
|
// step that inverts all the pointers.
|
||||||
|
uint16_t arg;
|
||||||
|
uint8_t opcode;
|
||||||
|
} HRVMTrace;
|
||||||
|
|
||||||
|
typedef struct HRVMThread_ {
|
||||||
|
HRVMTrace *trace;
|
||||||
|
uint16_t ip;
|
||||||
|
} HRVMThread;
|
||||||
|
|
||||||
|
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const char* input, size_t len) {
|
||||||
|
HArena *arena = h_new_arena(mm__, 0);
|
||||||
|
HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
|
||||||
|
**heads_n = a_new(HRVMTrace*, prog->length), **heads_t;
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
|
||||||
|
HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
|
||||||
|
size_t ipq_top;
|
||||||
|
|
||||||
|
#define THREAD ip_queue[ipq_top-1]
|
||||||
|
#define PUSH_SVM(op_, arg_) do { \
|
||||||
|
HRVMTrace *nt = a_new(HRVMTrace, 1); \
|
||||||
|
nt->arg = (arg_); \
|
||||||
|
nt->opcode = (op_); \
|
||||||
|
nt->next = THREAD.trace; \
|
||||||
|
THREAD.trace = nt; \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
heads_n[0] = a_new(HRVMTrace, 1); // zeroing
|
||||||
|
heads_n[0]->opcode = SVM_NOP;
|
||||||
|
|
||||||
|
size_t off = 0;
|
||||||
|
int live_threads = 1;
|
||||||
|
for (off = 0; off <= len; off++) {
|
||||||
|
uint8_t ch = ((off == len) ? 0 : input[off]);
|
||||||
|
size_t ip_s, ip;
|
||||||
|
/* scope */ {
|
||||||
|
HRVMTrace **heads_t;
|
||||||
|
heads_t = heads_n;
|
||||||
|
heads_n = heads_p;
|
||||||
|
heads_p = heads_t;
|
||||||
|
memset(heads_n, 0, prog->length * sizeof(*heads_n));
|
||||||
|
}
|
||||||
|
memset(insn_seen, 0, prog->length); // no insns seen yet
|
||||||
|
if (!live_threads)
|
||||||
|
return NULL;
|
||||||
|
live_threads = 0;
|
||||||
|
for (ip_s = 0; ip_s < prog->length; ip_s++) {
|
||||||
|
ipq_top = 1;
|
||||||
|
// TODO: Write this as a threaded VM
|
||||||
|
if (!heads_p[ip_s])
|
||||||
|
continue;
|
||||||
|
THREAD.ip = ip_s;
|
||||||
|
|
||||||
|
uint8_t hi, lo;
|
||||||
|
uint16_t arg;
|
||||||
|
while(ipq_top > 0) {
|
||||||
|
if (insns_seen[THREAD.ip] == 1)
|
||||||
|
continue;
|
||||||
|
insns_seen[THREAD.ip] = 1;
|
||||||
|
arg = prog->insns[THREAD.ip].arg;
|
||||||
|
switch(prog->insns[THREAD.ip].op) {
|
||||||
|
case RVM_ACCEPT:
|
||||||
|
// TODO: save current SVM pos, and jump to end
|
||||||
|
abort();
|
||||||
|
case RVM_MATCH:
|
||||||
|
// Doesn't actually validate the "must be followed by MATCH
|
||||||
|
// or STEP. It should. Preproc perhaps?
|
||||||
|
hi = (arg >> 8) & 0xff;
|
||||||
|
lo = arg & 0xff;
|
||||||
|
THREAD.ip++;
|
||||||
|
if (ch < lo && ch > hi)
|
||||||
|
ipq_top--; // terminate thread
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_GOTO:
|
||||||
|
THREAD.ip = arg;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_FORK:
|
||||||
|
THREAD.ip++;
|
||||||
|
if (!insns_seen[arg]) {
|
||||||
|
insns_seen[THREAD.ip] = 2;
|
||||||
|
HRVMTrace* tr = THREAD.trace;
|
||||||
|
ipq_top++;
|
||||||
|
THREAD.ip = arg;
|
||||||
|
THREAD.trace = tr;
|
||||||
|
}
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_PUSH:
|
||||||
|
PUSH_SVM(SVM_PUSH, off);
|
||||||
|
THREAD.ip++;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_ACTION:
|
||||||
|
PUSH_SVM(SVM_ACTION, arg);
|
||||||
|
THREAD.ip++;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_CAPTURE:
|
||||||
|
PUSH_SVM(SVM_CAPTURE, 0);
|
||||||
|
THREAD.ip++;
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_EOF:
|
||||||
|
THREAD.ip++;
|
||||||
|
if (off != len)
|
||||||
|
ipq_top--; // Terminate thread
|
||||||
|
goto next_insn;
|
||||||
|
case RVM_STEP:
|
||||||
|
// save thread
|
||||||
|
live_threads++;
|
||||||
|
heads_n[THREAD.ip++] = THREAD.trace;
|
||||||
|
ipq_top--;
|
||||||
|
goto next_insn;
|
||||||
|
}
|
||||||
|
next_insn:
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
38
src/backends/regex.h
Normal file
38
src/backends/regex.h
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
// Internal defs
|
||||||
|
#ifndef HAMMER_BACKEND_REGEX__H
|
||||||
|
#define HAMMER_BACKEND_REGEX__H
|
||||||
|
|
||||||
|
// each insn is an 8-bit opcode and a 16-bit parameter
|
||||||
|
// [a] are actions; they add an instruction to the stackvm that is being output.
|
||||||
|
// [m] are match ops; they can either succeed or fail, depending on the current character
|
||||||
|
// [c] are control ops. They affect the pc non-linearly.
|
||||||
|
typedef enum HRVMOp_ {
|
||||||
|
RVM_ACCEPT, // [a]
|
||||||
|
RVM_GOTO, // [c] parameter is an offset into the instruction table
|
||||||
|
RVM_FORK, // [c] parameter is an offset into the instruction table
|
||||||
|
RVM_PUSH, // [a] No arguments, just pushes a mark onto the stack
|
||||||
|
RVM_ACTION, // [a] argument is an action ID
|
||||||
|
RVM_CAPTURE, // [a] Capture the last string, and push it on the stack. No arg.
|
||||||
|
RVM_EOF, // [m] Succeeds only if at EOF.
|
||||||
|
RVM_MATCH, // [m] The high byte of the parameter is an upper bound
|
||||||
|
// and the low byte is a lower bound, both
|
||||||
|
// inclusive. An inverted match should be handled
|
||||||
|
// as two ranges.
|
||||||
|
RVM_STEP, // [a] Step to the next byte of input
|
||||||
|
RVM_OPCOUNT
|
||||||
|
} HRVMOp;
|
||||||
|
|
||||||
|
typedef struct HRVMInsn_{
|
||||||
|
uint8_t op;
|
||||||
|
uint16_t arg;
|
||||||
|
} HRVMInsn;
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct HRVMProg_ {
|
||||||
|
size_t length;
|
||||||
|
size_t action_count;
|
||||||
|
HAction *actions;
|
||||||
|
HRVMInsn *insns;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -34,6 +34,10 @@ typedef struct HParseState_ HParseState;
|
||||||
typedef enum HParserBackend_ {
|
typedef enum HParserBackend_ {
|
||||||
PB_MIN = 0,
|
PB_MIN = 0,
|
||||||
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
|
||||||
|
PB_LALR, // Not Implemented
|
||||||
|
PB_LLk, // Not Implemented
|
||||||
|
PB_GLR, // Not Implemented
|
||||||
|
PB_REGULAR, // Not Implemented
|
||||||
PB_MAX
|
PB_MAX
|
||||||
} HParserBackend;
|
} HParserBackend;
|
||||||
|
|
||||||
|
|
@ -114,6 +118,7 @@ typedef struct HParserVtable_ {
|
||||||
HParseResult* (*parse)(void *env, HParseState *state);
|
HParseResult* (*parse)(void *env, HParseState *state);
|
||||||
bool (*isValidRegular)(void *env);
|
bool (*isValidRegular)(void *env);
|
||||||
bool (*isValidCF)(void *env);
|
bool (*isValidCF)(void *env);
|
||||||
|
|
||||||
} HParserVtable;
|
} HParserVtable;
|
||||||
|
|
||||||
typedef struct HParser_ {
|
typedef struct HParser_ {
|
||||||
|
|
|
||||||
|
|
@ -193,6 +193,7 @@ struct HBitWriter_ {
|
||||||
|
|
||||||
// }}}
|
// }}}
|
||||||
|
|
||||||
|
|
||||||
// Backends {{{
|
// Backends {{{
|
||||||
extern HParserBackendVTable h__packrat_backend_vtable;
|
extern HParserBackendVTable h__packrat_backend_vtable;
|
||||||
// }}}
|
// }}}
|
||||||
|
|
|
||||||
|
|
@ -52,6 +52,28 @@
|
||||||
} \
|
} \
|
||||||
} while(0)
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_regular(lang) do { \
|
||||||
|
if (!lang->isValidRegular(lang->env)) { \
|
||||||
|
g_test_message("Language is not regular"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_contextfree(lang) do { \
|
||||||
|
if (!lang->isValidCF(lang->env)) { \
|
||||||
|
g_test_message("Language is not context-free"); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
#define g_check_compilable(lang, backend, params) do { \
|
||||||
|
if (!h_compile(lang, backend, params)) { \
|
||||||
|
g_test_message("Language is not %s(%s)", #backend, params); \
|
||||||
|
g_test_fail(); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
|
||||||
// TODO: replace uses of this with g_check_parse_failed
|
// TODO: replace uses of this with g_check_parse_failed
|
||||||
#define g_check_failed(res) do { \
|
#define g_check_failed(res) do { \
|
||||||
const HParseResult *result = (res); \
|
const HParseResult *result = (res); \
|
||||||
|
|
@ -99,4 +121,5 @@
|
||||||
#define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
|
#define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#endif // #ifndef HAMMER_TEST_SUITE__H
|
#endif // #ifndef HAMMER_TEST_SUITE__H
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue