Merge pull request #131 from michiexile/master

Algebraic Generating Functions from parser
This commit is contained in:
Meredith L. Patterson 2015-04-11 01:16:25 +02:00
commit 33af1ec65c
4 changed files with 536 additions and 1 deletions

View file

@ -7,4 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
base64 = example.Program('base64', 'base64.c')
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2])
ties = example.Program('ties', ['ties.c', 'grammar.c'])
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])

179
examples/grammar.c Normal file
View file

@ -0,0 +1,179 @@
// Generates a system of equations for generating functions from a grammar.
//
// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
//
// If a desugared parser has user_data set, the generating function systems will try
// to interpret it as a string:
//
// If this string for an h_ch starts with the character 0, then that character
// will have weight 0 in the generating function.
//
// Use the remaining string to set the preferred name of that parser in the
// generating function.
//
#include <inttypes.h>
#include "../src/backends/contextfree.h"
#include "../src/backends/lr.h"
#include "grammar.h"
#include <stdio.h>
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
// if user_data exists and is printable:
if(nt->user_data != NULL && *(char*)(nt->user_data) > ' ' && *(char*)(nt->user_data) < 127) {
if(*(char*)(nt->user_data) != '0') {
// user_data is a non-empty string
return nt->user_data;
} else {
return nt->user_data+1;
}
}
static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
// find nt's number in g
size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
// NB the start symbol (number 0) is always "A".
int i;
for(i=14; i>=0 && (n>0 || i==14); i--) {
buf[i] = 'A' + n%26;
n = n/26; // shift one digit
}
return buf+i+1;
}
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
const HCFGrammar *g, const HCFSequence *seq) {
// tally up numbers of choices, and lengths of emitted strings.
// Immediately emit any nonterminals encountered.
HCFChoice** x = seq->items;
fprintf(file, "1");
if (*x == NULL) {
// empty sequence
// GF is 1
return;
} else {
char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0;
HCharset cs;
unsigned int i, cscount=0;
for(; *x; x++) {
switch((*x)->type) {
case HCF_CHAR:
if(!(has_user_data && *(char*)(*x)->user_data == '0')) {
(*length)++;
}
break;
case HCF_END:
break;
case HCF_CHARSET:
cs = (*x)->charset;
for(i=0; i<256; i++) {
if (charset_isset(cs, i)) {
cscount++;
}
}
*count *= cscount;
break;
default: // HCF_CHOICE, non-terminal symbol
fprintf(file, "*%s", nonterminal_name(g, *x));
break;
}
}
}
}
// For each nt in g->nts
// For each choice in nt->key->seq
// For all elements in sequence
// Accumulate counts
// Accumulate string lengths
// Emit count*t^length
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
if (g->nts->used < 1) {
return;
}
// determine maximum string length of symbol names
int len;
size_t s;
for(len=1, s=26; s < g->nts->used; len++, s*=26);
// emit the SageMath ring init string
// iterate over g->nts, output symbols
size_t i;
HHashTableEntry *hte;
fprintf(file, "ring.<t");
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
const HCFChoice *nt = hte->key;
fprintf(file, ",");
fprintf(file, "%s", nonterminal_name(g, nt));
}
}
fprintf(file, "> = QQ[]\n");
// iterate over g->nts
// emit a Sage ideal definition
int j=0;
fprintf(file, "ID = ring.ideal(");
for(i=0; i < g->nts->capacity; i++) {
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
if (hte->key == NULL) {
continue;
}
if(j>0) {
fprintf(file, ",");
}
j++;
const HCFChoice *nt = hte->key;
const char *ntn = nonterminal_name(g, nt);
if(*ntn == 0) {
continue;
}
fprintf(file, "%s - (", ntn);
for(HCFSequence **seq = nt->seq; *seq; seq++) {
if (seq != nt->seq) {
fprintf(file, " + ");
}
uint32_t count=1, length=0;
readsequence(file, &count, &length, g, *seq);
if(count == 1) {
if(length == 1) {
fprintf(file, "*t");
}
if(length > 1) {
fprintf(file, "*t^%d", length);
}
} else if(count > 1) {
if(length == 0) {
fprintf(file, "*%d", count);
}
if(length == 1) {
fprintf(file, "*%d*t", count);
}
if (length > 1) {
fprintf(file, "*%d*t^%d", count, length);
}
}
}
fprintf(file, ")");
}
}
fprintf(file, ")\n");
}

46
examples/grammar.h Normal file
View file

@ -0,0 +1,46 @@
// Generates a system of equations for generating functions from a grammar.
//
// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
//
// Currently does absolutely no elegance, no caching of information, but rather
// just prints the generating functions to a provided FILE*.
//
// If a desugared parser has user_data set, the generating function systems will try
// to interpret it as a string:
//
// If this string for an h_ch starts with the character 0, then that character
// will have weight 0 in the generating function.
//
// Use the remaining string to set the preferred name of that parser in the
// generating function.
//
#ifndef HAMMER_GRAMMAR__H
#define HAMMER_GRAMMAR__H
#include "../src/backends/contextfree.h"
#include "../src/backends/lr.h"
// Filched from cfgrammar.c this function extracts the name from user_data if it
// is set; otherwise assigns a name automatically from its position in some
// ordering of non-terminals.
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt);
// This function prints out the monomial generated by a single HCFSequence
// It returns the resulting exponent for t in length and the number of alternatives
// accumulated in length. The monomial is (mostly) printed out to the provided FILE*,
// the caller is responsible for adding a scalar and a power of t to the printout.
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
const HCFGrammar *g, const HCFSequence *seq);
// This function walks through a grammar and generates an equation for each
// production rule. The results are printed out to the provided FILE*.
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g);
#endif

309
examples/ties.c Normal file
View file

@ -0,0 +1,309 @@
// Intention: read in a parser, generate the system of equations for its
// generating functions
//
#include <inttypes.h>
#include "../src/backends/contextfree.h"
#include "../src/backends/lr.h"
#include "grammar.h"
#include <stdio.h>
HAllocator *mm__;
HParser* cfExample() {
HParser *n = h_ch('n');
HParser *E = h_indirect();
HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL);
HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL);
h_bind_indirect(E, E_);
return E;
}
// The tie knot parsers below would work better if we could patch the gen.function
// code above to allow user specification of non-default byte string "lengths",
// so that U symbols don't contribute with factors of t to the gen. function.
//
// Alternatively: use multivariate generating functions to spit out different
// variables for different terminals. This gets really messy with bigger alphabets.
HParser* finkmao() {
HParser *L = h_ch('L');
HParser *R = h_ch('R');
HParser *C = h_ch('C');
HParser *U = h_ch('U');
HParser *Lnext = h_indirect();
HParser *Rnext = h_indirect();
HParser *Cnext = h_indirect();
HParser *L_ = h_choice(h_sequence(R, Rnext, NULL),
h_sequence(C, Cnext, NULL),
h_sequence(R, C, U, NULL), NULL);
HParser *R_ = h_choice(h_sequence(L, Lnext, NULL),
h_sequence(C, Cnext, NULL),
h_sequence(L, C, U, NULL), NULL);
HParser *C_ = h_choice(h_sequence(R, Rnext, NULL),
h_sequence(L, Lnext, NULL), NULL);
h_bind_indirect(Lnext, L_);
h_bind_indirect(Rnext, R_);
h_bind_indirect(Cnext, C_);
HParser *tie = h_sequence(L, Lnext, NULL);
h_desugar_augmented(mm__, tie);
L->desugared->user_data = "L";
R->desugared->user_data = "R";
C->desugared->user_data = "C";
Lnext->desugared->user_data = "Ln";
Rnext->desugared->user_data = "Rn";
Cnext->desugared->user_data = "Cn";
tie->desugared->user_data = "tie";
U->desugared->user_data = "0U";
return tie;
}
HParser* finkmaoTW() {
HParser *T = h_ch('T');
HParser *W = h_ch('W');
HParser *U = h_ch('U');
HParser *prefix = h_choice(T, W, h_epsilon_p(),
NULL);
HParser *pair = h_choice(h_sequence(T, T, NULL),
h_sequence(W, T, NULL),
h_sequence(T, W, NULL),
h_sequence(W, W, NULL), NULL);
HParser *tuck = h_choice(h_sequence(T, T, U, NULL),
h_sequence(W, W, U, NULL),
NULL);
HParser *pairstar = h_indirect();
HParser *pstar_ = h_choice(h_sequence(pair, pairstar, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(pairstar, pstar_);
HParser* tie = h_sequence(prefix, pairstar, tuck, NULL);
h_desugar_augmented(mm__, tie);
T->desugared->user_data = "T";
W->desugared->user_data = "W";
U->desugared->user_data = "0U";
prefix->desugared->user_data = "prefix";
pair->desugared->user_data = "pair";
tuck->desugared->user_data = "tuck";
pstar_->desugared->user_data = "pairstar";
tie->desugared->user_data = "tie";
return tie;
}
HParser* depth1TW() {
HParser *T = h_ch('T');
HParser *W = h_ch('W');
HParser *U = h_ch('U');
HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL);
HParser *pair = h_choice(h_sequence(T, T, NULL),
h_sequence(W, T, NULL),
h_sequence(T, W, NULL),
h_sequence(W, W, NULL), NULL);
HParser *tuck = h_choice(h_sequence(T, T, U, NULL),
h_sequence(W, W, U, NULL),
NULL);
HParser *tuckpairstar = h_indirect();
HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL),
h_sequence(tuck, tuckpairstar, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(tuckpairstar, tpstar_);
HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL);
h_desugar_augmented(mm__, tie);
T->desugared->user_data = "T";
W->desugared->user_data = "W";
U->desugared->user_data = "0U";
prefix->desugared->user_data = "prefix";
pair->desugared->user_data = "pair";
tuck->desugared->user_data = "tuck";
tpstar_->desugared->user_data = "tuckpairstar";
tie->desugared->user_data = "tie";
return tie;
}
HParser* depth1() {
HParser *L = h_ch('L');
HParser *R = h_ch('R');
HParser *C = h_ch('C');
HParser *U = h_ch('U');
HParser *lastR = h_indirect();
HParser *lastL = h_indirect();
HParser *lastC = h_indirect();
HParser *R_ = h_choice(h_sequence(L, R, lastR, NULL),
h_sequence(C, R, lastR, NULL),
h_sequence(L, C, lastC, NULL),
h_sequence(L, C, U, lastC, NULL),
h_sequence(L, C, U, NULL),
h_sequence(C, L, lastL, NULL),
h_sequence(C, L, U, lastL, NULL),
h_sequence(C, L, U, NULL),
NULL);
HParser *L_ = h_choice(h_sequence(R, L, lastR, NULL),
h_sequence(C, L, lastR, NULL),
h_sequence(R, C, lastC, NULL),
h_sequence(R, C, U, lastC, NULL),
h_sequence(R, C, U, NULL),
h_sequence(C, R, lastR, NULL),
h_sequence(C, R, U, lastR, NULL),
h_sequence(C, R, U, NULL),
NULL);
HParser *C_ = h_choice(h_sequence(L, C, lastR, NULL),
h_sequence(R, C, lastR, NULL),
h_sequence(L, R, lastR, NULL),
h_sequence(L, R, U, lastR, NULL),
h_sequence(L, R, U, NULL),
h_sequence(R, L, lastL, NULL),
h_sequence(R, L, U, lastL, NULL),
h_sequence(R, L, U, NULL),
NULL);
h_bind_indirect(lastR, R_);
h_bind_indirect(lastL, L_);
h_bind_indirect(lastC, C_);
HParser* tie = h_choice(h_sequence(L, lastL, NULL),
h_sequence(R, lastR, NULL),
h_sequence(C, lastC, NULL),
NULL);
h_desugar_augmented(mm__, tie);
L->desugared->user_data = "L";
R->desugared->user_data = "R";
C->desugared->user_data = "C";
U->desugared->user_data = "0U";
lastL ->desugared->user_data = "Ln";
lastR->desugared->user_data = "Rn";
lastC->desugared->user_data = "Cn";
tie->desugared->user_data = "tie";
return tie;
}
HParser* depthNTW() {
HParser *T = h_ch('T');
HParser *W = h_ch('W');
HParser *U = h_ch('U');
HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL);
HParser *pair = h_choice(h_sequence(T, T, NULL),
h_sequence(W, T, NULL),
h_sequence(T, W, NULL),
h_sequence(W, W, NULL), NULL);
HParser *tstart = h_indirect();
HParser *tw0 = h_indirect();
HParser *tw1 = h_indirect();
HParser *tw2 = h_indirect();
HParser *wstart = h_indirect();
HParser *wt0 = h_indirect();
HParser *wt1 = h_indirect();
HParser *wt2 = h_indirect();
HParser *T_ = h_choice(h_sequence(T, T, tw2, U, NULL),
h_sequence(T, W, tw0, U, NULL),
NULL);
HParser *tw0_ = h_choice(h_sequence(T, T, tw2, U, NULL),
h_sequence(T, W, tw0, U, NULL),
h_sequence(W, T, tw0, U, NULL),
h_sequence(W, W, tw1, U, NULL),
h_sequence(tstart, tw2, U, NULL),
h_sequence(wstart, tw1, U, NULL),
NULL);
HParser *tw1_ = h_choice(h_sequence(T, T, tw0, U, NULL),
h_sequence(T, W, tw1, U, NULL),
h_sequence(W, T, tw1, U, NULL),
h_sequence(W, W, tw2, U, NULL),
h_sequence(tstart, tw0, U, NULL),
h_sequence(wstart, tw2, U, NULL),
NULL);
HParser *tw2_ = h_choice(h_sequence(T, T, tw1, U, NULL),
h_sequence(T, W, tw2, U, NULL),
h_sequence(W, T, tw2, U, NULL),
h_sequence(W, W, tw0, U, NULL),
h_sequence(tstart, tw1, U, NULL),
h_sequence(wstart, tw0, U, NULL),
h_epsilon_p(),
NULL);
HParser *W_ = h_choice(h_sequence(W, W, wt2, U, NULL),
h_sequence(W, T, wt0, U, NULL),
NULL);
HParser *wt0_ = h_choice(h_sequence(W, W, wt2, U, NULL),
h_sequence(W, T, wt0, U, NULL),
h_sequence(T, W, wt0, U, NULL),
h_sequence(T, T, wt1, U, NULL),
h_sequence(wstart, wt2, U, NULL),
h_sequence(tstart, wt1, U, NULL),
NULL);
HParser *wt1_ = h_choice(h_sequence(W, W, wt0, U, NULL),
h_sequence(W, T, wt1, U, NULL),
h_sequence(T, W, wt1, U, NULL),
h_sequence(T, T, wt2, U, NULL),
h_sequence(wstart, wt0, U, NULL),
h_sequence(tstart, wt2, U, NULL),
NULL);
HParser *wt2_ = h_choice(h_sequence(W, W, wt1, U, NULL),
h_sequence(W, T, wt2, U, NULL),
h_sequence(T, W, wt2, U, NULL),
h_sequence(T, T, wt0, U, NULL),
h_sequence(wstart, wt1, U, NULL),
h_sequence(tstart, wt0, U, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(tstart, T_);
h_bind_indirect(tw0, tw0_);
h_bind_indirect(tw1, tw1_);
h_bind_indirect(tw2, tw2_);
h_bind_indirect(wstart, W_);
h_bind_indirect(wt0, wt0_);
h_bind_indirect(wt1, wt1_);
h_bind_indirect(wt2, wt2_);
HParser *tuck = h_choice(tstart, wstart, NULL);
HParser *tuckpairstar = h_indirect();
HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL),
h_sequence(tuck, tuckpairstar, NULL),
h_epsilon_p(),
NULL);
h_bind_indirect(tuckpairstar, tpstar_);
HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL);
h_desugar_augmented(mm__, tie);
T->desugared->user_data = "T";
W->desugared->user_data = "W";
U->desugared->user_data = "0U";
prefix->desugared->user_data = "prefix";
pair->desugared->user_data = "pair";
tuck->desugared->user_data = "tuck";
tpstar_->desugared->user_data = "tuckpairstar";
tie->desugared->user_data = "tie";
return tie;
}
int main(int argc, char **argv) {
mm__ = &system_allocator;
HParser *p = finkmao();
HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p));
if (g == NULL) {
fprintf(stderr, "h_cfgrammar failed\n");
return 1;
}
printf("\n==== Generating functions ====\n");
h_pprint_gfeqns(stdout, g);
printf("\n==== Grammar ====\n");
h_pprint_grammar(stdout, g, 0);
}