refactored out the grammar handling. handles epsilons better
This commit is contained in:
parent
24e9e9de7f
commit
a72aff9b39
4 changed files with 197 additions and 143 deletions
|
|
@ -7,6 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c'])
|
||||||
base64 = example.Program('base64', 'base64.c')
|
base64 = example.Program('base64', 'base64.c')
|
||||||
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
|
base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c')
|
||||||
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
|
base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c')
|
||||||
singular = example.Program('explore_singular', 'explore_singular.c')
|
ties = example.Program('ties', ['ties.c', 'grammar.c'])
|
||||||
ties = example.Program('ties', 'ties.c')
|
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties])
|
||||||
env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular, ties])
|
|
||||||
148
examples/grammar.c
Normal file
148
examples/grammar.c
Normal file
|
|
@ -0,0 +1,148 @@
|
||||||
|
// Generates a system of equations for generating functions from a grammar.
|
||||||
|
//
|
||||||
|
// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
|
||||||
|
//
|
||||||
|
|
||||||
|
// If a desugared parser has user_data set, the generating function systems will try
|
||||||
|
// to interpret it as a string:
|
||||||
|
//
|
||||||
|
// If this string for an h_ch starts with the character 0, then that character
|
||||||
|
// will have weight 0 in the generating function.
|
||||||
|
//
|
||||||
|
// Use the remaining string to set the preferred name of that parser in the
|
||||||
|
// generating function.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <inttypes.h>
|
||||||
|
#include "../src/backends/contextfree.h"
|
||||||
|
#include "../src/backends/lr.h"
|
||||||
|
#include "grammar.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
|
||||||
|
if(nt->user_data != NULL) {
|
||||||
|
if(*(char*)(nt->user_data) != '0') {
|
||||||
|
// user_data is a non-empty string
|
||||||
|
return nt->user_data;
|
||||||
|
} else {
|
||||||
|
return nt->user_data+1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
|
||||||
|
|
||||||
|
// find nt's number in g
|
||||||
|
size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
|
||||||
|
|
||||||
|
// NB the start symbol (number 0) is always "A".
|
||||||
|
int i;
|
||||||
|
for(i=14; i>=0 && (n>0 || i==14); i--) {
|
||||||
|
buf[i] = 'A' + n%26;
|
||||||
|
n = n/26; // shift one digit
|
||||||
|
}
|
||||||
|
|
||||||
|
return buf+i+1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
|
||||||
|
const HCFGrammar *g, const HCFSequence *seq) {
|
||||||
|
// tally up numbers of choices, and lengths of emitted strings.
|
||||||
|
// Immediately emit any nonterminals encountered.
|
||||||
|
HCFChoice** x = seq->items;
|
||||||
|
|
||||||
|
fprintf(file, "1");
|
||||||
|
if (*x == NULL) {
|
||||||
|
// empty sequence
|
||||||
|
// GF is 1
|
||||||
|
return;
|
||||||
|
} else {
|
||||||
|
char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0;
|
||||||
|
HCharset cs;
|
||||||
|
unsigned int i, cscount=0;
|
||||||
|
for(; *x; x++) {
|
||||||
|
switch((*x)->type) {
|
||||||
|
case HCF_CHAR:
|
||||||
|
if(!(has_user_data && *(char*)(*x)->user_data == '0')) {
|
||||||
|
(*length)++;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case HCF_END:
|
||||||
|
break;
|
||||||
|
case HCF_CHARSET:
|
||||||
|
cs = (*x)->charset;
|
||||||
|
for(i=0; i<256; i++) {
|
||||||
|
if (charset_isset(cs, i)) {
|
||||||
|
cscount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*count *= cscount;
|
||||||
|
break;
|
||||||
|
default: // HCF_CHOICE, non-terminal symbol
|
||||||
|
fprintf(file, "*%s(t)", nonterminal_name(g, *x));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// For each nt in g->nts
|
||||||
|
// For each choice in nt->key->seq
|
||||||
|
// For all elements in sequence
|
||||||
|
// Accumulate counts
|
||||||
|
// Accumulate string lengths
|
||||||
|
// Emit count*t^length
|
||||||
|
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
|
||||||
|
if (g->nts->used < 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// determine maximum string length of symbol names
|
||||||
|
int len;
|
||||||
|
size_t s;
|
||||||
|
for(len=1, s=26; s < g->nts->used; len++, s*=26);
|
||||||
|
|
||||||
|
// iterate over g->nts
|
||||||
|
size_t i;
|
||||||
|
HHashTableEntry *hte;
|
||||||
|
for(i=0; i < g->nts->capacity; i++) {
|
||||||
|
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
||||||
|
if (hte->key == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
const HCFChoice *nt = hte->key;
|
||||||
|
fprintf(file, "%s(t) = ", nonterminal_name(g, nt));
|
||||||
|
|
||||||
|
|
||||||
|
for(HCFSequence **seq = nt->seq; *seq; seq++) {
|
||||||
|
if (seq != nt->seq) {
|
||||||
|
fprintf(file, " + ");
|
||||||
|
}
|
||||||
|
uint32_t count=1, length=0;
|
||||||
|
readsequence(file, &count, &length, g, *seq);
|
||||||
|
if(count == 1) {
|
||||||
|
if(length == 1) {
|
||||||
|
fprintf(file, "*t");
|
||||||
|
}
|
||||||
|
if(length > 1) {
|
||||||
|
fprintf(file, "*t^%d", length);
|
||||||
|
}
|
||||||
|
} else if(count > 1) {
|
||||||
|
if(length == 0) {
|
||||||
|
fprintf(file, "*%d", count);
|
||||||
|
}
|
||||||
|
if(length == 1) {
|
||||||
|
fprintf(file, "*%d*t", count);
|
||||||
|
}
|
||||||
|
if (length > 1) {
|
||||||
|
fprintf(file, "*%d*t^%d", count, length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(file, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
46
examples/grammar.h
Normal file
46
examples/grammar.h
Normal file
|
|
@ -0,0 +1,46 @@
|
||||||
|
// Generates a system of equations for generating functions from a grammar.
|
||||||
|
//
|
||||||
|
// (c) 2015 Mikael Vejdemo-Johansson <mikael@johanssons.org>
|
||||||
|
//
|
||||||
|
|
||||||
|
// Currently does absolutely no elegance, no caching of information, but rather
|
||||||
|
// just prints the generating functions to a provided FILE*.
|
||||||
|
//
|
||||||
|
|
||||||
|
|
||||||
|
// If a desugared parser has user_data set, the generating function systems will try
|
||||||
|
// to interpret it as a string:
|
||||||
|
//
|
||||||
|
// If this string for an h_ch starts with the character 0, then that character
|
||||||
|
// will have weight 0 in the generating function.
|
||||||
|
//
|
||||||
|
// Use the remaining string to set the preferred name of that parser in the
|
||||||
|
// generating function.
|
||||||
|
//
|
||||||
|
|
||||||
|
#ifndef HAMMER_GRAMMAR__H
|
||||||
|
#define HAMMER_GRAMMAR__H
|
||||||
|
|
||||||
|
#include "../src/backends/contextfree.h"
|
||||||
|
#include "../src/backends/lr.h"
|
||||||
|
|
||||||
|
|
||||||
|
// Filched from cfgrammar.c this function extracts the name from user_data if it
|
||||||
|
// is set; otherwise assigns a name automatically from its position in some
|
||||||
|
// ordering of non-terminals.
|
||||||
|
const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt);
|
||||||
|
|
||||||
|
// This function prints out the monomial generated by a single HCFSequence
|
||||||
|
// It returns the resulting exponent for t in length and the number of alternatives
|
||||||
|
// accumulated in length. The monomial is (mostly) printed out to the provided FILE*,
|
||||||
|
// the caller is responsible for adding a scalar and a power of t to the printout.
|
||||||
|
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
|
||||||
|
const HCFGrammar *g, const HCFSequence *seq);
|
||||||
|
|
||||||
|
// This function walks through a grammar and generates an equation for each
|
||||||
|
// production rule. The results are printed out to the provided FILE*.
|
||||||
|
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
141
examples/ties.c
141
examples/ties.c
|
|
@ -1,6 +1,3 @@
|
||||||
//
|
|
||||||
// Created by Mikael Vejdemo Johansson on 4/7/15.
|
|
||||||
//
|
|
||||||
// Intention: read in a parser, generate the system of equations for its
|
// Intention: read in a parser, generate the system of equations for its
|
||||||
// generating functions
|
// generating functions
|
||||||
//
|
//
|
||||||
|
|
@ -8,148 +5,12 @@
|
||||||
#include <inttypes.h>
|
#include <inttypes.h>
|
||||||
#include "../src/backends/contextfree.h"
|
#include "../src/backends/contextfree.h"
|
||||||
#include "../src/backends/lr.h"
|
#include "../src/backends/lr.h"
|
||||||
|
#include "grammar.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
|
||||||
HAllocator *mm__;
|
HAllocator *mm__;
|
||||||
|
|
||||||
// If a parser has user_data set, the generating function systems will try
|
|
||||||
// to interpret it as a string:
|
|
||||||
//
|
|
||||||
// If this string for an h_ch starts with the character 0, then that character
|
|
||||||
// will have weight 0 in the generating function.
|
|
||||||
//
|
|
||||||
// Use the remaining string to set the preferred name of that parser in the
|
|
||||||
// generating function.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) {
|
|
||||||
if(nt->user_data != NULL) {
|
|
||||||
if(*(char*)(nt->user_data) != '0') {
|
|
||||||
// user_data is a non-empty string
|
|
||||||
return nt->user_data;
|
|
||||||
} else {
|
|
||||||
return nt->user_data+1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits
|
|
||||||
|
|
||||||
// find nt's number in g
|
|
||||||
size_t n = (uintptr_t)h_hashtable_get(g->nts, nt);
|
|
||||||
|
|
||||||
// NB the start symbol (number 0) is always "A".
|
|
||||||
int i;
|
|
||||||
for(i=14; i>=0 && (n>0 || i==14); i--) {
|
|
||||||
buf[i] = 'A' + n%26;
|
|
||||||
n = n/26; // shift one digit
|
|
||||||
}
|
|
||||||
|
|
||||||
return buf+i+1;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void readsequence(FILE *file, uint32_t *count, uint32_t *length,
|
|
||||||
const HCFGrammar *g, const HCFSequence *seq) {
|
|
||||||
// tally up numbers of choices, and lengths of emitted strings.
|
|
||||||
// Immediately emit any nonterminals encountered.
|
|
||||||
HCFChoice** x = seq->items;
|
|
||||||
|
|
||||||
if (*x == NULL) {
|
|
||||||
return;
|
|
||||||
} else {
|
|
||||||
char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0;
|
|
||||||
fprintf(file, "1");
|
|
||||||
HCharset cs;
|
|
||||||
unsigned int i, cscount=0;
|
|
||||||
for(; *x; x++) {
|
|
||||||
switch((*x)->type) {
|
|
||||||
case HCF_CHAR:
|
|
||||||
if(!(has_user_data && *(char*)(*x)->user_data == '0')) {
|
|
||||||
(*length)++;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case HCF_END:
|
|
||||||
break;
|
|
||||||
case HCF_CHARSET:
|
|
||||||
cs = (*x)->charset;
|
|
||||||
for(i=0; i<256; i++) {
|
|
||||||
if (charset_isset(cs, i)) {
|
|
||||||
cscount++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*count *= cscount;
|
|
||||||
break;
|
|
||||||
default: // HCF_CHOICE, non-terminal symbol
|
|
||||||
fprintf(file, "*%s(t)", nonterminal_name(g, *x));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// For each nt in g->nts
|
|
||||||
// For each choice in nt->key->seq
|
|
||||||
// For all elements in sequence
|
|
||||||
// Accumulate counts
|
|
||||||
// Accumulate string lengths
|
|
||||||
// Emit count*t^length
|
|
||||||
void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) {
|
|
||||||
if (g->nts->used < 1) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// determine maximum string length of symbol names
|
|
||||||
int len;
|
|
||||||
size_t s;
|
|
||||||
for(len=1, s=26; s < g->nts->used; len++, s*=26);
|
|
||||||
|
|
||||||
// iterate over g->nts
|
|
||||||
size_t i;
|
|
||||||
HHashTableEntry *hte;
|
|
||||||
for(i=0; i < g->nts->capacity; i++) {
|
|
||||||
for(hte = &g->nts->contents[i]; hte; hte = hte->next) {
|
|
||||||
if (hte->key == NULL) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const HCFChoice *nt = hte->key;
|
|
||||||
fprintf(file, "%s(t) = ", nonterminal_name(g, nt));
|
|
||||||
|
|
||||||
|
|
||||||
for(HCFSequence **seq = nt->seq; *seq; seq++) {
|
|
||||||
if (seq != nt->seq) {
|
|
||||||
fprintf(file, " + ");
|
|
||||||
}
|
|
||||||
uint32_t count=1, length=0;
|
|
||||||
readsequence(file, &count, &length, g, *seq);
|
|
||||||
if(count == 1) {
|
|
||||||
if(length == 1) {
|
|
||||||
fprintf(file, "*t");
|
|
||||||
}
|
|
||||||
if(length > 1) {
|
|
||||||
fprintf(file, "*t^%d", length);
|
|
||||||
}
|
|
||||||
} else if(count > 1) {
|
|
||||||
if(length == 0) {
|
|
||||||
fprintf(file, "*%d", count);
|
|
||||||
}
|
|
||||||
if(length == 1) {
|
|
||||||
fprintf(file, "*%d*t", count);
|
|
||||||
}
|
|
||||||
if (length > 1) {
|
|
||||||
fprintf(file, "*%d*t^%d", count, length);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(file, "\n");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
HParser* cfExample() {
|
HParser* cfExample() {
|
||||||
HParser *n = h_ch('n');
|
HParser *n = h_ch('n');
|
||||||
HParser *E = h_indirect();
|
HParser *E = h_indirect();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue