Regex VM finished but untested.
This commit is contained in:
parent
a8ab63e096
commit
c19d7bb66e
8 changed files with 283 additions and 32 deletions
112
src/backends/regexvm_asm.pl
Normal file
112
src/backends/regexvm_asm.pl
Normal file
|
|
@ -0,0 +1,112 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
# The input file consists of a sequence of blocks, which can be parsed
|
||||
# as SVM test cases, RVM test cases, or C functions. Each block starts
|
||||
# with a header line, then a sequence of options, and finally text in
|
||||
# a format defined by the block type.
|
||||
#
|
||||
# Header lines start with "+TYPE", optionally followed by a name. This
|
||||
# name is semantically meaningful for SVM and RVM blocks; it
|
||||
# determines the name of the test case.
|
||||
|
||||
# A C block's name is not used, and it takes no options. The body
|
||||
# (which continues until the first line that looks like a header), is
|
||||
# just passed straight through into the C source.
|
||||
|
||||
# SVM blocks' names are the GLib test case name. The underlying
|
||||
# function's name is derived by substituting invalid characters with
|
||||
# '_'. Note that this can result in collisions (eg, /foo_bar/baz
|
||||
# collides with /foo/bar_baz). If this happens, it's your own damn
|
||||
# fault; rename the blocks. SVM blocks take three different options:
|
||||
# @input, @output, and @pre. The @input pragma's argument is a
|
||||
# C-quoted string that gets passed into the VM as the input string,
|
||||
# and @output is a C-quoted string that is compared against
|
||||
# h_write_result_unamb. @pre lines are prepended verbatim to the
|
||||
# function body (with the @pre stripped, of course); they can be used
|
||||
# to initialize environment values.
|
||||
#
|
||||
# SVM instructions consist of either two or four fields:
|
||||
#
|
||||
# input_pos opcode [arg env]
|
||||
#
|
||||
# input_pos and opcode correspond to the fields in HRVMTrace. arg and
|
||||
# env are used to populate an HSVMAction; arg is the function, and env
|
||||
# is the object whose address should be used as the env.
|
||||
|
||||
# RVM blocks are very similar to SVM blocks; the name and options are
|
||||
# handled exactly the same way. The assembly text is handled slightly
|
||||
# differently; the format is:
|
||||
#
|
||||
# [label:] opcode [arg ...]
|
||||
#
|
||||
# For FORK and GOTO, the arg should be a label that is defined
|
||||
# elsewhere.
|
||||
#
|
||||
# For ACTION, the arguments are handled the same way as with SVM.
|
||||
#
|
||||
# MATCH takes two arguments, each of which can be any C integer
|
||||
# constant (not including character constants), which form the lower
|
||||
# and upper bounds of the matched character, respectively.
|
||||
#
|
||||
# No other RVM instructions take an argument.
|
||||
|
||||
# At the beginning of any line, comments preceeded by '#' are allowed;
|
||||
# they are replaced by C++ comments and inserted in the nearest valid
|
||||
# location in the output.
|
||||
|
||||
my $mode == "TOP";
|
||||
|
||||
# common regexes:
|
||||
my $re_ident = qr/[A-Za-z_][A-Za-z0-9_]*/;
|
||||
my $re_cstr = qr/"(?:[^\\"]|\\["'abefnrtv0\\]|\\x[0-9a-fA-F]{2}|\\[0-7]{3})*"/;
|
||||
|
||||
|
||||
my %svm = (
|
||||
name => sub {
|
||||
my ($env, $name) = @_;
|
||||
$env->{name} = $name;
|
||||
},
|
||||
pragma => sub {
|
||||
my ($env, $name, $val) = @_;
|
||||
if ($name eq "input") {
|
||||
chomp($env->{input} = $val);
|
||||
} elsif ($name eq "output") {
|
||||
chomp($env->{output} = $val);
|
||||
} elsif ($name eq "pre") {
|
||||
# Do I have the ref precedence right here?
|
||||
push(@$env->{pre}, $val);
|
||||
} else {
|
||||
warn "Invalid SVM pragma";
|
||||
}
|
||||
},
|
||||
body => sub {
|
||||
my ($env, $line) = @_;
|
||||
my ($ipos, $op, $arg, $argenv);
|
||||
if ($line =~ /^\s*(\d+)\s+(PUSH|NOP|ACTION|CAPTURE|ACCEPT)(?:\s+($re_ident)\s+($re_ident))?/) {
|
||||
if ($2 eq "PUSH") {
|
||||
# TODO: implement all the opcodes
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
|
||||
while (<>) {
|
||||
if (/^+(C|RVM|SVM)/) {
|
||||
$mode = $1;
|
||||
}
|
||||
|
||||
if ($mode eq "TOP") {
|
||||
if (/^#(.*)/) {
|
||||
print "// $1";
|
||||
next;
|
||||
}
|
||||
} elsif ($mode eq "SVM") {
|
||||
} elsif ($mode eq "RVM") {
|
||||
} elsif ($mode eq "C") {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue