Added regex backend, does not compile

This commit is contained in:
Dan Hirsch 2013-01-13 17:01:10 +01:00
parent ca8751bcfe
commit b8314f6662
6 changed files with 206 additions and 0 deletions

View file

@ -19,6 +19,7 @@
#define HAMMER_ALLOCATOR__H__ #define HAMMER_ALLOCATOR__H__
#include <sys/types.h> #include <sys/types.h>
// TODO(thequux): Turn this into an "HAllocatorVtable", and add a wrapper that also takes an environment pointer.
typedef struct HAllocator_ { typedef struct HAllocator_ {
void* (*alloc)(struct HAllocator_* allocator, size_t size); void* (*alloc)(struct HAllocator_* allocator, size_t size);
void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size); void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size);

138
src/backends/regex.c Normal file
View file

@ -0,0 +1,138 @@
#include "../internal.h"
#include "../parsers/parser_internal.h"
#undef a_new
#define a_new(typ, count) a_new_(arena, typ, count);
// Stack VM
typedef enum HSVMOp_ {
SVM_PUSH, // Push a mark. There is no VM insn to push an object.
SVM_NOP, // Used to start the chain, and possibly elsewhere. Does nothing.
SVM_ACTION, // Same meaning as RVM_ACTION
SVM_CAPTURE, // Same meaning as RVM_CAPTURE
SVM_ACCEPT,
} HSVMOp;
typedef struct HRVMTrace_ {
struct HRVMTrace_ *next; // When parsing, these are
// reverse-threaded. There is a postproc
// step that inverts all the pointers.
uint16_t arg;
uint8_t opcode;
} HRVMTrace;
typedef struct HRVMThread_ {
HRVMTrace *trace;
uint16_t ip;
} HRVMThread;
void* h_rvm_run__m(HAllocator *mm__, HRVMProg *prog, const char* input, size_t len) {
HArena *arena = h_new_arena(mm__, 0);
HRVMTrace **heads_p = a_new(HRVMTrace*, prog->length),
**heads_n = a_new(HRVMTrace*, prog->length), **heads_t;
uint8_t *insn_seen = a_new(uint8_t, prog->length); // 0 -> not seen, 1->processed, 2->queued
HRVMThread *ip_queue = a_new(HRVMThread, prog->length);
size_t ipq_top;
#define THREAD ip_queue[ipq_top-1]
#define PUSH_SVM(op_, arg_) do { \
HRVMTrace *nt = a_new(HRVMTrace, 1); \
nt->arg = (arg_); \
nt->opcode = (op_); \
nt->next = THREAD.trace; \
THREAD.trace = nt; \
} while(0)
heads_n[0] = a_new(HRVMTrace, 1); // zeroing
heads_n[0]->opcode = SVM_NOP;
size_t off = 0;
int live_threads = 1;
for (off = 0; off <= len; off++) {
uint8_t ch = ((off == len) ? 0 : input[off]);
size_t ip_s, ip;
/* scope */ {
HRVMTrace **heads_t;
heads_t = heads_n;
heads_n = heads_p;
heads_p = heads_t;
memset(heads_n, 0, prog->length * sizeof(*heads_n));
}
memset(insn_seen, 0, prog->length); // no insns seen yet
if (!live_threads)
return NULL;
live_threads = 0;
for (ip_s = 0; ip_s < prog->length; ip_s++) {
ipq_top = 1;
// TODO: Write this as a threaded VM
if (!heads_p[ip_s])
continue;
THREAD.ip = ip_s;
uint8_t hi, lo;
uint16_t arg;
while(ipq_top > 0) {
if (insns_seen[THREAD.ip] == 1)
continue;
insns_seen[THREAD.ip] = 1;
arg = prog->insns[THREAD.ip].arg;
switch(prog->insns[THREAD.ip].op) {
case RVM_ACCEPT:
// TODO: save current SVM pos, and jump to end
abort();
case RVM_MATCH:
// Doesn't actually validate the "must be followed by MATCH
// or STEP. It should. Preproc perhaps?
hi = (arg >> 8) & 0xff;
lo = arg & 0xff;
THREAD.ip++;
if (ch < lo && ch > hi)
ipq_top--; // terminate thread
goto next_insn;
case RVM_GOTO:
THREAD.ip = arg;
goto next_insn;
case RVM_FORK:
THREAD.ip++;
if (!insns_seen[arg]) {
insns_seen[THREAD.ip] = 2;
HRVMTrace* tr = THREAD.trace;
ipq_top++;
THREAD.ip = arg;
THREAD.trace = tr;
}
goto next_insn;
case RVM_PUSH:
PUSH_SVM(SVM_PUSH, off);
THREAD.ip++;
goto next_insn;
case RVM_ACTION:
PUSH_SVM(SVM_ACTION, arg);
THREAD.ip++;
goto next_insn;
case RVM_CAPTURE:
PUSH_SVM(SVM_CAPTURE, 0);
THREAD.ip++;
goto next_insn;
case RVM_EOF:
THREAD.ip++;
if (off != len)
ipq_top--; // Terminate thread
goto next_insn;
case RVM_STEP:
// save thread
live_threads++;
heads_n[THREAD.ip++] = THREAD.trace;
ipq_top--;
goto next_insn;
}
next_insn:
}
}

38
src/backends/regex.h Normal file
View file

@ -0,0 +1,38 @@
// Internal defs
#ifndef HAMMER_BACKEND_REGEX__H
#define HAMMER_BACKEND_REGEX__H
// each insn is an 8-bit opcode and a 16-bit parameter
// [a] are actions; they add an instruction to the stackvm that is being output.
// [m] are match ops; they can either succeed or fail, depending on the current character
// [c] are control ops. They affect the pc non-linearly.
typedef enum HRVMOp_ {
RVM_ACCEPT, // [a]
RVM_GOTO, // [c] parameter is an offset into the instruction table
RVM_FORK, // [c] parameter is an offset into the instruction table
RVM_PUSH, // [a] No arguments, just pushes a mark onto the stack
RVM_ACTION, // [a] argument is an action ID
RVM_CAPTURE, // [a] Capture the last string, and push it on the stack. No arg.
RVM_EOF, // [m] Succeeds only if at EOF.
RVM_MATCH, // [m] The high byte of the parameter is an upper bound
// and the low byte is a lower bound, both
// inclusive. An inverted match should be handled
// as two ranges.
RVM_STEP, // [a] Step to the next byte of input
RVM_OPCOUNT
} HRVMOp;
typedef struct HRVMInsn_{
uint8_t op;
uint16_t arg;
} HRVMInsn;
typedef struct HRVMProg_ {
size_t length;
size_t action_count;
HAction *actions;
HRVMInsn *insns;
};
#endif

View file

@ -34,6 +34,10 @@ typedef struct HParseState_ HParseState;
typedef enum HParserBackend_ { typedef enum HParserBackend_ {
PB_MIN = 0, PB_MIN = 0,
PB_PACKRAT = PB_MIN, // PB_MIN is always the default. PB_PACKRAT = PB_MIN, // PB_MIN is always the default.
PB_LALR, // Not Implemented
PB_LLk, // Not Implemented
PB_GLR, // Not Implemented
PB_REGULAR, // Not Implemented
PB_MAX PB_MAX
} HParserBackend; } HParserBackend;
@ -114,6 +118,7 @@ typedef struct HParserVtable_ {
HParseResult* (*parse)(void *env, HParseState *state); HParseResult* (*parse)(void *env, HParseState *state);
bool (*isValidRegular)(void *env); bool (*isValidRegular)(void *env);
bool (*isValidCF)(void *env); bool (*isValidCF)(void *env);
} HParserVtable; } HParserVtable;
typedef struct HParser_ { typedef struct HParser_ {

View file

@ -193,6 +193,7 @@ struct HBitWriter_ {
// }}} // }}}
// Backends {{{ // Backends {{{
extern HParserBackendVTable h__packrat_backend_vtable; extern HParserBackendVTable h__packrat_backend_vtable;
// }}} // }}}

View file

@ -52,6 +52,28 @@
} \ } \
} while(0) } while(0)
#define g_check_regular(lang) do { \
if (!lang->isValidRegular(lang->env)) { \
g_test_message("Language is not regular"); \
g_test_fail(); \
} \
} while(0)
#define g_check_contextfree(lang) do { \
if (!lang->isValidCF(lang->env)) { \
g_test_message("Language is not context-free"); \
g_test_fail(); \
} \
} while(0)
#define g_check_compilable(lang, backend, params) do { \
if (!h_compile(lang, backend, params)) { \
g_test_message("Language is not %s(%s)", #backend, params); \
g_test_fail(); \
} \
} while(0)
// TODO: replace uses of this with g_check_parse_failed // TODO: replace uses of this with g_check_parse_failed
#define g_check_failed(res) do { \ #define g_check_failed(res) do { \
const HParseResult *result = (res); \ const HParseResult *result = (res); \
@ -99,4 +121,5 @@
#define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2) #define g_check_cmpdouble(n1, op, n2) g_check_inttype("%g", double, n1, op, n2)
#endif // #ifndef HAMMER_TEST_SUITE__H #endif // #ifndef HAMMER_TEST_SUITE__H