From 107d8c092b9b38417f48af1e66f3df72a5e2fc8b Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Tue, 7 Apr 2015 22:13:18 +0200 Subject: [PATCH 01/11] Generating function skeleton code --- examples/SConscript | 3 +- examples/explore_singular.c | 244 ++++++++++++++++++++++++++++++++++++ 2 files changed, 246 insertions(+), 1 deletion(-) create mode 100644 examples/explore_singular.c diff --git a/examples/SConscript b/examples/SConscript index 0932bda..07c6e51 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -7,4 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2]) \ No newline at end of file +singular = example.Program('explore_singular', 'explore_singular.c') +env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular]) \ No newline at end of file diff --git a/examples/explore_singular.c b/examples/explore_singular.c new file mode 100644 index 0000000..3c75145 --- /dev/null +++ b/examples/explore_singular.c @@ -0,0 +1,244 @@ +// +// Created by Mikael Vejdemo Johansson on 4/7/15. +// +// Intention: read in a parser, generate the system of equations for its +// generating functions +// + +#include +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include + +void h_pprint_gfexpr(FILE *file, const HCFGrammar *g, HCFSequence *seq) { + HCFChoice **x = seq->items; + + if (*x == NULL) { // empty sequence + fprintf(file, "1\n"); + } else { + while (*x) { + if (x != seq->items) { + fprintf(file, " + "); + } + // consume items + // if a string, + // count its length + // output t^length + + if ((*x)->type == HCF_CHAR) { + uint32_t count = 0; + for(; *x; x++, count++) { + if ((*x)->type != HCF_CHAR) { + break; + } + } + fprintf(file, "t^%d", count); + } else { + uint32_t count=0, n, i=0; + switch((*x)->type) { + case HCF_CHAR: + // should not be possible + break; + case HCF_END: + // does not generate any output symbols: value 0 + break; + case HCF_CHARSET: + for(i=0; i<256; i++) { + if (charset_isset((*x)->charset, i)) { + count++; + } + } + fprintf(file, "%d*t", count); + break; + default: + n = (uint8_t)(uintptr_t)h_hashtable_get(g->nts, x); + + fprintf(file, "%c(t)", 'A'+n); + } + x++; + } + } + } +} + + +void h_pprint_gfeqns_NOTUSED(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // iterate over g->nts + size_t i; + HHashTableEntry *hte; + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + const HCFChoice *lhs = hte->key; // production's left-hand symbol + assert(lhs->type == HCF_CHOICE); + + uint8_t n = (uint8_t)(uintptr_t)h_hashtable_get(g->nts, lhs); + fprintf(file, "%c(t) = ", 'A'+n); + + HCFSequence **p = lhs->seq; + if (*p == NULL) { + return; // shouldn't happen + } + + h_pprint_gfexpr(file, g, *p); + for(; *p; p++) { + fprintf(file, "\t"); + h_pprint_gfexpr(file, g, *p); + fprintf(file, "\n"); + } + } + } +} + + +static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) +{ + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits + + // find nt's number in g + size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); + + // NB the start symbol (number 0) is always "A". + int i; + for(i=14; i>=0 && (n>0 || i==14); i--) { + buf[i] = 'A' + n%26; + n = n/26; // shift one digit + } + + return buf+i+1; +} + + + +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq) { + // tally up numbers of choices, and lengths of emitted strings. + // Immediately emit any nonterminals encountered. + HCFChoice** x = seq->items; + + if (*x == NULL) { + return; + } else { + fprintf(file, "1"); + HCharset cs; + unsigned int i, cscount=0; + for(; *x; x++) { + switch((*x)->type) { + case HCF_CHAR: + (*length)++; + break; + case HCF_END: + break; + case HCF_CHARSET: + cs = (*x)->charset; + for(i=0; i<256; i++) { + if (charset_isset(cs, i)) { + cscount++; + } + } + *count *= cscount; + break; + default: // HCF_CHOICE, non-terminal symbol + fprintf(file, "*%s(t)", nonterminal_name(g, *x)); + break; + } + } + } +} + +// For each nt in g->nts +// For each choice in nt->key->seq +// For all elements in sequence +// Accumulate counts +// Accumulate string lengths +// Emit count*t^length +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // iterate over g->nts + size_t i; + HHashTableEntry *hte; + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + + const HCFChoice *nt = hte->key; + fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); + + + for(HCFSequence **seq = nt->seq; *seq; seq++) { + if (seq != nt->seq) { + fprintf(file, " + "); + } + uint32_t count=1, length=0; + readsequence(file, &count, &length, g, *seq); + if(count == 1) { + if(length == 1) { + fprintf(file, "*t"); + } + if(length > 1) { + fprintf(file, "*t^%d", length); + } + } else if(count > 1) { + if(length == 0) { + fprintf(file, "*%d", count); + } + if(length == 1) { + fprintf(file, "*%d*t", count); + } + if (length > 1) { + fprintf(file, "*%d*t^%d", count, length); + } + } + } + + fprintf(file, "\n"); + } + } +} + + + + +int main(int argc, char **argv) +{ + HAllocator *mm__ = &system_allocator; + + HParser *n = h_ch('n'); + HParser *E = h_indirect(); + HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); + HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); + h_bind_indirect(E, E_); + HParser *p = E; + + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); + if (g == NULL) { + fprintf(stderr, "h_cfgrammar failed\n"); + return 1; + } + printf("\n==== Generating functions ====\n"); + h_pprint_gfeqns(stdout, g); + + printf("\n==== Grammar ====\n"); + h_pprint_grammar(stdout, g, 0); +} From 6b8a3f262f419cd3bfcb3dff2676bea3278c3532 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Tue, 7 Apr 2015 22:14:32 +0200 Subject: [PATCH 02/11] polished generating function code more --- examples/explore_singular.c | 92 ------------------------------------- 1 file changed, 92 deletions(-) diff --git a/examples/explore_singular.c b/examples/explore_singular.c index 3c75145..5127c7c 100644 --- a/examples/explore_singular.c +++ b/examples/explore_singular.c @@ -10,98 +10,6 @@ #include "../src/backends/lr.h" #include -void h_pprint_gfexpr(FILE *file, const HCFGrammar *g, HCFSequence *seq) { - HCFChoice **x = seq->items; - - if (*x == NULL) { // empty sequence - fprintf(file, "1\n"); - } else { - while (*x) { - if (x != seq->items) { - fprintf(file, " + "); - } - // consume items - // if a string, - // count its length - // output t^length - - if ((*x)->type == HCF_CHAR) { - uint32_t count = 0; - for(; *x; x++, count++) { - if ((*x)->type != HCF_CHAR) { - break; - } - } - fprintf(file, "t^%d", count); - } else { - uint32_t count=0, n, i=0; - switch((*x)->type) { - case HCF_CHAR: - // should not be possible - break; - case HCF_END: - // does not generate any output symbols: value 0 - break; - case HCF_CHARSET: - for(i=0; i<256; i++) { - if (charset_isset((*x)->charset, i)) { - count++; - } - } - fprintf(file, "%d*t", count); - break; - default: - n = (uint8_t)(uintptr_t)h_hashtable_get(g->nts, x); - - fprintf(file, "%c(t)", 'A'+n); - } - x++; - } - } - } -} - - -void h_pprint_gfeqns_NOTUSED(FILE *file, const HCFGrammar *g) { - if (g->nts->used < 1) { - return; - } - - // determine maximum string length of symbol names - int len; - size_t s; - for(len=1, s=26; s < g->nts->used; len++, s*=26); - - // iterate over g->nts - size_t i; - HHashTableEntry *hte; - for(i=0; i < g->nts->capacity; i++) { - for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if (hte->key == NULL) { - continue; - } - const HCFChoice *lhs = hte->key; // production's left-hand symbol - assert(lhs->type == HCF_CHOICE); - - uint8_t n = (uint8_t)(uintptr_t)h_hashtable_get(g->nts, lhs); - fprintf(file, "%c(t) = ", 'A'+n); - - HCFSequence **p = lhs->seq; - if (*p == NULL) { - return; // shouldn't happen - } - - h_pprint_gfexpr(file, g, *p); - for(; *p; p++) { - fprintf(file, "\t"); - h_pprint_gfexpr(file, g, *p); - fprintf(file, "\n"); - } - } - } -} - - static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits From 7fdc687bd4ff1ab652613af6cec7b0f033d9b460 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 09:33:20 +0200 Subject: [PATCH 03/11] parsers for the grammars in the tieknots paper --- examples/ties.c | 331 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 examples/ties.c diff --git a/examples/ties.c b/examples/ties.c new file mode 100644 index 0000000..b8cc9fd --- /dev/null +++ b/examples/ties.c @@ -0,0 +1,331 @@ +// +// Created by Mikael Vejdemo Johansson on 4/7/15. +// +// Intention: read in a parser, generate the system of equations for its +// generating functions +// + +#include +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include + +static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) +{ + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits + + // find nt's number in g + size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); + + // NB the start symbol (number 0) is always "A". + int i; + for(i=14; i>=0 && (n>0 || i==14); i--) { + buf[i] = 'A' + n%26; + n = n/26; // shift one digit + } + + return buf+i+1; +} + + + +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq) { + // tally up numbers of choices, and lengths of emitted strings. + // Immediately emit any nonterminals encountered. + HCFChoice** x = seq->items; + + if (*x == NULL) { + return; + } else { + fprintf(file, "1"); + HCharset cs; + unsigned int i, cscount=0; + for(; *x; x++) { + switch((*x)->type) { + case HCF_CHAR: + (*length)++; + break; + case HCF_END: + break; + case HCF_CHARSET: + cs = (*x)->charset; + for(i=0; i<256; i++) { + if (charset_isset(cs, i)) { + cscount++; + } + } + *count *= cscount; + break; + default: // HCF_CHOICE, non-terminal symbol + fprintf(file, "*%s(t)", nonterminal_name(g, *x)); + break; + } + } + } +} + +// For each nt in g->nts +// For each choice in nt->key->seq +// For all elements in sequence +// Accumulate counts +// Accumulate string lengths +// Emit count*t^length +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // iterate over g->nts + size_t i; + HHashTableEntry *hte; + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + + const HCFChoice *nt = hte->key; + fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); + + + for(HCFSequence **seq = nt->seq; *seq; seq++) { + if (seq != nt->seq) { + fprintf(file, " + "); + } + uint32_t count=1, length=0; + readsequence(file, &count, &length, g, *seq); + if(count == 1) { + if(length == 1) { + fprintf(file, "*t"); + } + if(length > 1) { + fprintf(file, "*t^%d", length); + } + } else if(count > 1) { + if(length == 0) { + fprintf(file, "*%d", count); + } + if(length == 1) { + fprintf(file, "*%d*t", count); + } + if (length > 1) { + fprintf(file, "*%d*t^%d", count, length); + } + } + } + + fprintf(file, "\n"); + } + } +} + +HParser* cfExample() { + HParser *n = h_ch('n'); + HParser *E = h_indirect(); + HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); + HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); + h_bind_indirect(E, E_); + return E; +} + +// The tie knot parsers below would work better if we could patch the gen.function +// code above to allow user specification of non-default byte string "lengths", +// so that U symbols don't contribute with factors of t to the gen. function. +// +// Alternatively: use multivariate generating functions to spit out different +// variables for different terminals. This gets really messy with bigger alphabets. + +HParser* finkmao() { + HParser *L = h_ch('L'); + HParser *R = h_ch('R'); + HParser *C = h_ch('C'); + HParser *U = h_ch('U'); + HParser *Lnext = h_indirect(); + HParser *Rnext = h_indirect(); + HParser *Cnext = h_indirect(); + HParser *L_ = h_choice(h_sequence(R, Rnext, NULL), + h_sequence(C, Cnext, NULL), + h_sequence(R, C, U, NULL), NULL); + HParser *R_ = h_choice(h_sequence(L, Lnext, NULL), + h_sequence(C, Cnext, NULL), + h_sequence(L, C, U, NULL), NULL); + HParser *C_ = h_choice(h_sequence(R, Rnext, NULL), + h_sequence(L, Lnext, NULL), NULL); + h_bind_indirect(Lnext, L_); + h_bind_indirect(Rnext, R_); + h_bind_indirect(Cnext, C_); + HParser *tie = h_choice(h_sequence(L, Lnext), NULL); + return tie; +} + +HParser* finkmaoTW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_repeat_n(h_choice(T, W, NULL), 2); + HParser *tuck = h_choice(h_sequence(T, T, U, NULL), + h_sequence(W, W, U, NULL), + NULL); + return h_choice(h_sequence(prefix, h_many(pair), tuck, NULL)); +} + +HParser* depth1TW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_repeat_n(h_choice(T, W, NULL), 2); + HParser *tuck = h_choice(h_sequence(T, T, U, NULL), + h_sequence(W, W, U, NULL), + NULL); + return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck)), tuck, NULL)); +} + +HParser* depth1() { + HParser *L = h_ch('L'); + HParser *R = h_ch('R'); + HParser *C = h_ch('C'); + HParser *U = h_ch('U'); + HParser *lastR = h_indirect(); + HParser *lastL = h_indirect(); + HParser *lastC = h_indirect(); + HParser *R_ = h_choice(h_sequence(L, R, lastR, NULL), + h_sequence(C, R, lastR, NULL), + h_sequence(L, C, lastC, NULL), + h_sequence(L, C, U, lastC, NULL), + h_sequence(L, C, U, NULL), + h_sequence(C, L, lastL, NULL), + h_sequence(C, L, U, lastL, NULL), + h_sequence(C, L, U, NULL), + NULL); + HParser *L_ = h_choice(h_sequence(R, L, lastR, NULL), + h_sequence(C, L, lastR, NULL), + h_sequence(R, C, lastC, NULL), + h_sequence(R, C, U, lastC, NULL), + h_sequence(R, C, U, NULL), + h_sequence(C, R, lastR, NULL), + h_sequence(C, R, U, lastR, NULL), + h_sequence(C, R, U, NULL), + NULL); + HParser *C_ = h_choice(h_sequence(L, C, lastR, NULL), + h_sequence(R, C, lastR, NULL), + h_sequence(L, R, lastR, NULL), + h_sequence(L, R, U, lastR, NULL), + h_sequence(L, R, U, NULL), + h_sequence(R, L, lastL, NULL), + h_sequence(R, L, U, lastL, NULL), + h_sequence(R, L, U, NULL), + NULL); + h_bind_indirect(lastR, R_); + h_bind_indirect(lastL, L_); + h_bind_indirect(lastC, C_); + return h_choice(h_sequence(L, lastL, NULL), + h_sequence(R, lastR, NULL), + h_sequence(C, lastC, NULL), + NULL); +} + +HParser* depthNTW() { + HParser *T = h_ch('T'); + HParser *W = h_ch('W'); + HParser *U = h_ch('U'); + HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); + HParser *pair = h_repeat_n(h_choice(T, W, NULL), 2); + HParser *tstart = h_indirect(); + HParser *tw0 = h_indirect(); + HParser *tw1 = h_indirect(); + HParser *tw2 = h_indirect(); + HParser *wstart = h_indirect(); + HParser *wt0 = h_indirect(); + HParser *wt1 = h_indirect(); + HParser *wt2 = h_indirect(); + + HParser *T_ = h_choice(h_sequence(T, T, tw2, U, NULL), + h_sequence(T, W, tw0, U, NULL), + NULL); + HParser *tw0_ = h_choice(h_sequence(T, T, tw2, U, NULL), + h_sequence(T, W, tw0, U, NULL), + h_sequence(W, T, tw0, U, NULL), + h_sequence(W, W, tw1, U, NULL), + h_sequence(tstart, tw2, U, NULL), + h_sequence(wstart, tw1, U, NULL), + NULL); + HParser *tw1_ = h_choice(h_sequence(T, T, tw0, U, NULL), + h_sequence(T, W, tw1, U, NULL), + h_sequence(W, T, tw1, U, NULL), + h_sequence(W, W, tw2, U, NULL), + h_sequence(tstart, tw0, U, NULL), + h_sequence(wstart, tw2, U, NULL), + NULL); + HParser *tw2_ = h_choice(h_sequence(T, T, tw1, U, NULL), + h_sequence(T, W, tw2, U, NULL), + h_sequence(W, T, tw2, U, NULL), + h_sequence(W, W, tw0, U, NULL), + h_sequence(tstart, tw1, U, NULL), + h_sequence(wstart, tw0, U, NULL), + h_epsilon_p(), + NULL); + + HParser *W_ = h_choice(h_sequence(W, W, wt2, U, NULL), + h_sequence(W, T, wt0, U, NULL), + NULL); + HParser *wt0_ = h_choice(h_sequence(W, W, wt2, U, NULL), + h_sequence(W, T, wt0, U, NULL), + h_sequence(T, W, wt0, U, NULL), + h_sequence(T, T, wt1, U, NULL), + h_sequence(wstart, wt2, U, NULL), + h_sequence(tstart, wt1, U, NULL), + NULL); + HParser *wt1_ = h_choice(h_sequence(W, W, wt0, U, NULL), + h_sequence(W, T, wt1, U, NULL), + h_sequence(T, W, wt1, U, NULL), + h_sequence(T, T, wt2, U, NULL), + h_sequence(wstart, wt0, U, NULL), + h_sequence(tstart, wt2, U, NULL), + NULL); + HParser *wt2_ = h_choice(h_sequence(W, W, wt1, U, NULL), + h_sequence(W, T, wt2, U, NULL), + h_sequence(T, W, wt2, U, NULL), + h_sequence(T, T, wt0, U, NULL), + h_sequence(wstart, wt1, U, NULL), + h_sequence(tstart, wt0, U, NULL), + h_epsilon_p(), + NULL); + + h_bind_indirect(tstart, T_); + h_bind_indirect(tw0, tw0_); + h_bind_indirect(tw1, tw1_); + h_bind_indirect(tw2, tw2_); + h_bind_indirect(wstart, W_); + h_bind_indirect(wt0, wt0_); + h_bind_indirect(wt1, wt1_); + h_bind_indirect(wt2, wt2_); + + HParser *tuck = h_choice(tstart, wstart, NULL); + return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck)), tuck, NULL)); +} + + +int main(int argc, char **argv) +{ + HAllocator *mm__ = &system_allocator; + + + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, cfExample())); + if (g == NULL) { + fprintf(stderr, "h_cfgrammar failed\n"); + return 1; + } + printf("\n==== Generating functions ====\n"); + h_pprint_gfeqns(stdout, g); + + printf("\n==== Grammar ====\n"); + h_pprint_grammar(stdout, g, 0); +} From 9a1812bd4948508f2e5df3fcc9184851e8bb2035 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 13:44:14 +0200 Subject: [PATCH 04/11] added ties to build script --- examples/SConscript | 3 ++- examples/ties.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/examples/SConscript b/examples/SConscript index 07c6e51..456545c 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -8,4 +8,5 @@ base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') singular = example.Program('explore_singular', 'explore_singular.c') -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular]) \ No newline at end of file +ties = example.Program('ties', 'ties.c') +env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular, ties]) \ No newline at end of file diff --git a/examples/ties.c b/examples/ties.c index b8cc9fd..c7d2de7 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -318,7 +318,7 @@ int main(int argc, char **argv) HAllocator *mm__ = &system_allocator; - HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, cfExample())); + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, finkmaoTW())); if (g == NULL) { fprintf(stderr, "h_cfgrammar failed\n"); return 1; From 61a211559ecfbb75d298c51cff371448433459a6 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 13:46:42 +0200 Subject: [PATCH 05/11] fixed build errors wrt sentinels --- examples/ties.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/ties.c b/examples/ties.c index c7d2de7..341f5ba 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -160,7 +160,7 @@ HParser* finkmao() { h_bind_indirect(Lnext, L_); h_bind_indirect(Rnext, R_); h_bind_indirect(Cnext, C_); - HParser *tie = h_choice(h_sequence(L, Lnext), NULL); + HParser *tie = h_choice(h_sequence(L, Lnext, NULL), NULL); return tie; } @@ -173,7 +173,7 @@ HParser* finkmaoTW() { HParser *tuck = h_choice(h_sequence(T, T, U, NULL), h_sequence(W, W, U, NULL), NULL); - return h_choice(h_sequence(prefix, h_many(pair), tuck, NULL)); + return h_choice(h_sequence(prefix, h_many(pair), tuck, NULL), NULL); } HParser* depth1TW() { @@ -185,7 +185,7 @@ HParser* depth1TW() { HParser *tuck = h_choice(h_sequence(T, T, U, NULL), h_sequence(W, W, U, NULL), NULL); - return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck)), tuck, NULL)); + return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck, NULL)), tuck, NULL), NULL); } HParser* depth1() { @@ -309,7 +309,7 @@ HParser* depthNTW() { h_bind_indirect(wt2, wt2_); HParser *tuck = h_choice(tstart, wstart, NULL); - return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck)), tuck, NULL)); + return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck, NULL)), tuck, NULL), NULL); } From 8039d48537f529b134a9734c9715f13b66527ab7 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 13:53:45 +0200 Subject: [PATCH 06/11] removed use of h_many to get desugars --- examples/ties.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/examples/ties.c b/examples/ties.c index 341f5ba..42dacb4 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -173,7 +173,12 @@ HParser* finkmaoTW() { HParser *tuck = h_choice(h_sequence(T, T, U, NULL), h_sequence(W, W, U, NULL), NULL); - return h_choice(h_sequence(prefix, h_many(pair), tuck, NULL), NULL); + HParser *pairstar = h_indirect(); + HParser *pstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(pairstar, pstar_); + return h_choice(h_sequence(prefix, pairstar, tuck, NULL), NULL); } HParser* depth1TW() { @@ -185,7 +190,13 @@ HParser* depth1TW() { HParser *tuck = h_choice(h_sequence(T, T, U, NULL), h_sequence(W, W, U, NULL), NULL); - return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck, NULL)), tuck, NULL), NULL); + HParser *tuckpairstar = h_indirect(); + HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_sequence(tuck, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(tuckpairstar, tpstar_); + return h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); } HParser* depth1() { @@ -307,9 +318,16 @@ HParser* depthNTW() { h_bind_indirect(wt0, wt0_); h_bind_indirect(wt1, wt1_); h_bind_indirect(wt2, wt2_); - HParser *tuck = h_choice(tstart, wstart, NULL); - return h_choice(h_sequence(prefix, h_many(h_choice(pair, tuck, NULL)), tuck, NULL), NULL); + + HParser *tuckpairstar = h_indirect(); + HParser *tpstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + h_sequence(tuck, tuckpairstar, NULL), + h_epsilon_p(), + NULL); + h_bind_indirect(tuckpairstar, tpstar_); + + return h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); } From 5100dc69dae77ecc815e68e71eb25d60cd17ad5c Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 13:54:30 +0200 Subject: [PATCH 07/11] typo --- examples/ties.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ties.c b/examples/ties.c index 42dacb4..45b49f7 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -174,7 +174,7 @@ HParser* finkmaoTW() { h_sequence(W, W, U, NULL), NULL); HParser *pairstar = h_indirect(); - HParser *pstar_ = h_choice(h_sequence(pair, tuckpairstar, NULL), + HParser *pstar_ = h_choice(h_sequence(pair, pairstar, NULL), h_epsilon_p(), NULL); h_bind_indirect(pairstar, pstar_); From 1e95eac6cdb8a8ae0f3a3a1608e7f919f6951442 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 14:06:58 +0200 Subject: [PATCH 08/11] removed h_repeat_n to enable desugaring --- examples/ties.c | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/examples/ties.c b/examples/ties.c index 45b49f7..e4ca211 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -160,7 +160,7 @@ HParser* finkmao() { h_bind_indirect(Lnext, L_); h_bind_indirect(Rnext, R_); h_bind_indirect(Cnext, C_); - HParser *tie = h_choice(h_sequence(L, Lnext, NULL), NULL); + HParser *tie = h_sequence(L, Lnext, NULL); return tie; } @@ -168,17 +168,21 @@ HParser* finkmaoTW() { HParser *T = h_ch('T'); HParser *W = h_ch('W'); HParser *U = h_ch('U'); - HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); - HParser *pair = h_repeat_n(h_choice(T, W, NULL), 2); + HParser *prefix = h_choice(T, W, h_epsilon_p(), + NULL); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); HParser *tuck = h_choice(h_sequence(T, T, U, NULL), h_sequence(W, W, U, NULL), NULL); HParser *pairstar = h_indirect(); HParser *pstar_ = h_choice(h_sequence(pair, pairstar, NULL), - h_epsilon_p(), + h_epsilon_p(), NULL); h_bind_indirect(pairstar, pstar_); - return h_choice(h_sequence(prefix, pairstar, tuck, NULL), NULL); + return h_sequence(prefix, pairstar, tuck, NULL); } HParser* depth1TW() { @@ -186,7 +190,10 @@ HParser* depth1TW() { HParser *W = h_ch('W'); HParser *U = h_ch('U'); HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); - HParser *pair = h_repeat_n(h_choice(T, W, NULL), 2); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); HParser *tuck = h_choice(h_sequence(T, T, U, NULL), h_sequence(W, W, U, NULL), NULL); @@ -248,7 +255,10 @@ HParser* depthNTW() { HParser *W = h_ch('W'); HParser *U = h_ch('U'); HParser *prefix = h_choice(T, W, h_epsilon_p(), NULL); - HParser *pair = h_repeat_n(h_choice(T, W, NULL), 2); + HParser *pair = h_choice(h_sequence(T, T, NULL), + h_sequence(W, T, NULL), + h_sequence(T, W, NULL), + h_sequence(W, W, NULL), NULL); HParser *tstart = h_indirect(); HParser *tw0 = h_indirect(); HParser *tw1 = h_indirect(); @@ -335,8 +345,8 @@ int main(int argc, char **argv) { HAllocator *mm__ = &system_allocator; - - HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, finkmaoTW())); + HParser *p = finkmaoTW(); + HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); if (g == NULL) { fprintf(stderr, "h_cfgrammar failed\n"); return 1; From 24e9e9de7f0467386b61b27319e28e40454e9616 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 15:29:32 +0200 Subject: [PATCH 09/11] support naming of generating functions, and 0-weighting of symbols --- examples/explore_singular.c | 152 ------------------------------------ examples/ties.c | 52 ++++++++++-- 2 files changed, 45 insertions(+), 159 deletions(-) delete mode 100644 examples/explore_singular.c diff --git a/examples/explore_singular.c b/examples/explore_singular.c deleted file mode 100644 index 5127c7c..0000000 --- a/examples/explore_singular.c +++ /dev/null @@ -1,152 +0,0 @@ -// -// Created by Mikael Vejdemo Johansson on 4/7/15. -// -// Intention: read in a parser, generate the system of equations for its -// generating functions -// - -#include -#include "../src/backends/contextfree.h" -#include "../src/backends/lr.h" -#include - -static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) -{ - static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits - - // find nt's number in g - size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); - - // NB the start symbol (number 0) is always "A". - int i; - for(i=14; i>=0 && (n>0 || i==14); i--) { - buf[i] = 'A' + n%26; - n = n/26; // shift one digit - } - - return buf+i+1; -} - - - -void readsequence(FILE *file, uint32_t *count, uint32_t *length, - const HCFGrammar *g, const HCFSequence *seq) { - // tally up numbers of choices, and lengths of emitted strings. - // Immediately emit any nonterminals encountered. - HCFChoice** x = seq->items; - - if (*x == NULL) { - return; - } else { - fprintf(file, "1"); - HCharset cs; - unsigned int i, cscount=0; - for(; *x; x++) { - switch((*x)->type) { - case HCF_CHAR: - (*length)++; - break; - case HCF_END: - break; - case HCF_CHARSET: - cs = (*x)->charset; - for(i=0; i<256; i++) { - if (charset_isset(cs, i)) { - cscount++; - } - } - *count *= cscount; - break; - default: // HCF_CHOICE, non-terminal symbol - fprintf(file, "*%s(t)", nonterminal_name(g, *x)); - break; - } - } - } -} - -// For each nt in g->nts -// For each choice in nt->key->seq -// For all elements in sequence -// Accumulate counts -// Accumulate string lengths -// Emit count*t^length -void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { - if (g->nts->used < 1) { - return; - } - - // determine maximum string length of symbol names - int len; - size_t s; - for(len=1, s=26; s < g->nts->used; len++, s*=26); - - // iterate over g->nts - size_t i; - HHashTableEntry *hte; - for(i=0; i < g->nts->capacity; i++) { - for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if (hte->key == NULL) { - continue; - } - - const HCFChoice *nt = hte->key; - fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); - - - for(HCFSequence **seq = nt->seq; *seq; seq++) { - if (seq != nt->seq) { - fprintf(file, " + "); - } - uint32_t count=1, length=0; - readsequence(file, &count, &length, g, *seq); - if(count == 1) { - if(length == 1) { - fprintf(file, "*t"); - } - if(length > 1) { - fprintf(file, "*t^%d", length); - } - } else if(count > 1) { - if(length == 0) { - fprintf(file, "*%d", count); - } - if(length == 1) { - fprintf(file, "*%d*t", count); - } - if (length > 1) { - fprintf(file, "*%d*t^%d", count, length); - } - } - } - - fprintf(file, "\n"); - } - } -} - - - - -int main(int argc, char **argv) -{ - HAllocator *mm__ = &system_allocator; - - HParser *n = h_ch('n'); - HParser *E = h_indirect(); - HParser *T = h_choice(h_sequence(h_ch('('), E, h_ch(')'), NULL), n, NULL); - HParser *E_ = h_choice(h_sequence(E, h_ch('-'), T, NULL), T, NULL); - h_bind_indirect(E, E_); - HParser *p = E; - - HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); - if (g == NULL) { - fprintf(stderr, "h_cfgrammar failed\n"); - return 1; - } - printf("\n==== Generating functions ====\n"); - h_pprint_gfeqns(stdout, g); - - printf("\n==== Grammar ====\n"); - h_pprint_grammar(stdout, g, 0); -} diff --git a/examples/ties.c b/examples/ties.c index e4ca211..a379b5c 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -10,8 +10,30 @@ #include "../src/backends/lr.h" #include -static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) -{ + +HAllocator *mm__; + +// If a parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. + + + +static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { + if(nt->user_data != NULL) { + if(*(char*)(nt->user_data) != '0') { + // user_data is a non-empty string + return nt->user_data; + } else { + return nt->user_data+1; + } + } + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits // find nt's number in g @@ -38,13 +60,16 @@ void readsequence(FILE *file, uint32_t *count, uint32_t *length, if (*x == NULL) { return; } else { + char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; fprintf(file, "1"); HCharset cs; unsigned int i, cscount=0; for(; *x; x++) { switch((*x)->type) { case HCF_CHAR: - (*length)++; + if(!(has_user_data && *(char*)(*x)->user_data == '0')) { + (*length)++; + } break; case HCF_END: break; @@ -182,7 +207,21 @@ HParser* finkmaoTW() { h_epsilon_p(), NULL); h_bind_indirect(pairstar, pstar_); - return h_sequence(prefix, pairstar, tuck, NULL); + + HParser* tie = h_sequence(prefix, pairstar, tuck, NULL); + h_desugar_augmented(mm__, tie); + + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + pstar_->desugared->user_data = "pairstar"; + tie->desugared->user_data = "tie"; + + return tie; } HParser* depth1TW() { @@ -341,9 +380,8 @@ HParser* depthNTW() { } -int main(int argc, char **argv) -{ - HAllocator *mm__ = &system_allocator; +int main(int argc, char **argv) { + mm__ = &system_allocator; HParser *p = finkmaoTW(); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); From a72aff9b3924ba37f4cfee4ac2cebc913f8f30e8 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Wed, 8 Apr 2015 17:32:12 +0200 Subject: [PATCH 10/11] refactored out the grammar handling. handles epsilons better --- examples/SConscript | 5 +- examples/grammar.c | 148 ++++++++++++++++++++++++++++++++++++++++++++ examples/grammar.h | 46 ++++++++++++++ examples/ties.c | 141 +---------------------------------------- 4 files changed, 197 insertions(+), 143 deletions(-) create mode 100644 examples/grammar.c create mode 100644 examples/grammar.h diff --git a/examples/SConscript b/examples/SConscript index 456545c..0694721 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -7,6 +7,5 @@ dns = example.Program('dns', ['dns.c', 'rr.c', 'dns_common.c']) base64 = example.Program('base64', 'base64.c') base64_sem1 = example.Program('base64_sem1', 'base64_sem1.c') base64_sem2 = example.Program('base64_sem2', 'base64_sem2.c') -singular = example.Program('explore_singular', 'explore_singular.c') -ties = example.Program('ties', 'ties.c') -env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, singular, ties]) \ No newline at end of file +ties = example.Program('ties', ['ties.c', 'grammar.c']) +env.Alias("examples", [dns, base64, base64_sem1, base64_sem2, ties]) \ No newline at end of file diff --git a/examples/grammar.c b/examples/grammar.c new file mode 100644 index 0000000..f722edf --- /dev/null +++ b/examples/grammar.c @@ -0,0 +1,148 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson +// + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#include +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" +#include "grammar.h" +#include + +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { + if(nt->user_data != NULL) { + if(*(char*)(nt->user_data) != '0') { + // user_data is a non-empty string + return nt->user_data; + } else { + return nt->user_data+1; + } + } + + static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits + + // find nt's number in g + size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); + + // NB the start symbol (number 0) is always "A". + int i; + for(i=14; i>=0 && (n>0 || i==14); i--) { + buf[i] = 'A' + n%26; + n = n/26; // shift one digit + } + + return buf+i+1; +} + + + +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq) { + // tally up numbers of choices, and lengths of emitted strings. + // Immediately emit any nonterminals encountered. + HCFChoice** x = seq->items; + + fprintf(file, "1"); + if (*x == NULL) { + // empty sequence + // GF is 1 + return; + } else { + char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; + HCharset cs; + unsigned int i, cscount=0; + for(; *x; x++) { + switch((*x)->type) { + case HCF_CHAR: + if(!(has_user_data && *(char*)(*x)->user_data == '0')) { + (*length)++; + } + break; + case HCF_END: + break; + case HCF_CHARSET: + cs = (*x)->charset; + for(i=0; i<256; i++) { + if (charset_isset(cs, i)) { + cscount++; + } + } + *count *= cscount; + break; + default: // HCF_CHOICE, non-terminal symbol + fprintf(file, "*%s(t)", nonterminal_name(g, *x)); + break; + } + } + } +} + +// For each nt in g->nts +// For each choice in nt->key->seq +// For all elements in sequence +// Accumulate counts +// Accumulate string lengths +// Emit count*t^length +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { + if (g->nts->used < 1) { + return; + } + + // determine maximum string length of symbol names + int len; + size_t s; + for(len=1, s=26; s < g->nts->used; len++, s*=26); + + // iterate over g->nts + size_t i; + HHashTableEntry *hte; + for(i=0; i < g->nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + + const HCFChoice *nt = hte->key; + fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); + + + for(HCFSequence **seq = nt->seq; *seq; seq++) { + if (seq != nt->seq) { + fprintf(file, " + "); + } + uint32_t count=1, length=0; + readsequence(file, &count, &length, g, *seq); + if(count == 1) { + if(length == 1) { + fprintf(file, "*t"); + } + if(length > 1) { + fprintf(file, "*t^%d", length); + } + } else if(count > 1) { + if(length == 0) { + fprintf(file, "*%d", count); + } + if(length == 1) { + fprintf(file, "*%d*t", count); + } + if (length > 1) { + fprintf(file, "*%d*t^%d", count, length); + } + } + } + + fprintf(file, "\n"); + } + } +} diff --git a/examples/grammar.h b/examples/grammar.h new file mode 100644 index 0000000..b42eced --- /dev/null +++ b/examples/grammar.h @@ -0,0 +1,46 @@ +// Generates a system of equations for generating functions from a grammar. +// +// (c) 2015 Mikael Vejdemo-Johansson +// + +// Currently does absolutely no elegance, no caching of information, but rather +// just prints the generating functions to a provided FILE*. +// + + +// If a desugared parser has user_data set, the generating function systems will try +// to interpret it as a string: +// +// If this string for an h_ch starts with the character 0, then that character +// will have weight 0 in the generating function. +// +// Use the remaining string to set the preferred name of that parser in the +// generating function. +// + +#ifndef HAMMER_GRAMMAR__H +#define HAMMER_GRAMMAR__H + +#include "../src/backends/contextfree.h" +#include "../src/backends/lr.h" + + +// Filched from cfgrammar.c this function extracts the name from user_data if it +// is set; otherwise assigns a name automatically from its position in some +// ordering of non-terminals. +const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt); + +// This function prints out the monomial generated by a single HCFSequence +// It returns the resulting exponent for t in length and the number of alternatives +// accumulated in length. The monomial is (mostly) printed out to the provided FILE*, +// the caller is responsible for adding a scalar and a power of t to the printout. +void readsequence(FILE *file, uint32_t *count, uint32_t *length, + const HCFGrammar *g, const HCFSequence *seq); + +// This function walks through a grammar and generates an equation for each +// production rule. The results are printed out to the provided FILE*. +void h_pprint_gfeqns(FILE *file, const HCFGrammar *g); + + + +#endif diff --git a/examples/ties.c b/examples/ties.c index a379b5c..09f6b70 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -1,6 +1,3 @@ -// -// Created by Mikael Vejdemo Johansson on 4/7/15. -// // Intention: read in a parser, generate the system of equations for its // generating functions // @@ -8,148 +5,12 @@ #include #include "../src/backends/contextfree.h" #include "../src/backends/lr.h" +#include "grammar.h" #include HAllocator *mm__; -// If a parser has user_data set, the generating function systems will try -// to interpret it as a string: -// -// If this string for an h_ch starts with the character 0, then that character -// will have weight 0 in the generating function. -// -// Use the remaining string to set the preferred name of that parser in the -// generating function. - - - -static const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { - if(nt->user_data != NULL) { - if(*(char*)(nt->user_data) != '0') { - // user_data is a non-empty string - return nt->user_data; - } else { - return nt->user_data+1; - } - } - - static char buf[16] = {0}; // 14 characters in base 26 are enough for 64 bits - - // find nt's number in g - size_t n = (uintptr_t)h_hashtable_get(g->nts, nt); - - // NB the start symbol (number 0) is always "A". - int i; - for(i=14; i>=0 && (n>0 || i==14); i--) { - buf[i] = 'A' + n%26; - n = n/26; // shift one digit - } - - return buf+i+1; -} - - - -void readsequence(FILE *file, uint32_t *count, uint32_t *length, - const HCFGrammar *g, const HCFSequence *seq) { - // tally up numbers of choices, and lengths of emitted strings. - // Immediately emit any nonterminals encountered. - HCFChoice** x = seq->items; - - if (*x == NULL) { - return; - } else { - char has_user_data = (*x)->user_data != NULL && *(char*)(*x)->user_data != 0; - fprintf(file, "1"); - HCharset cs; - unsigned int i, cscount=0; - for(; *x; x++) { - switch((*x)->type) { - case HCF_CHAR: - if(!(has_user_data && *(char*)(*x)->user_data == '0')) { - (*length)++; - } - break; - case HCF_END: - break; - case HCF_CHARSET: - cs = (*x)->charset; - for(i=0; i<256; i++) { - if (charset_isset(cs, i)) { - cscount++; - } - } - *count *= cscount; - break; - default: // HCF_CHOICE, non-terminal symbol - fprintf(file, "*%s(t)", nonterminal_name(g, *x)); - break; - } - } - } -} - -// For each nt in g->nts -// For each choice in nt->key->seq -// For all elements in sequence -// Accumulate counts -// Accumulate string lengths -// Emit count*t^length -void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { - if (g->nts->used < 1) { - return; - } - - // determine maximum string length of symbol names - int len; - size_t s; - for(len=1, s=26; s < g->nts->used; len++, s*=26); - - // iterate over g->nts - size_t i; - HHashTableEntry *hte; - for(i=0; i < g->nts->capacity; i++) { - for(hte = &g->nts->contents[i]; hte; hte = hte->next) { - if (hte->key == NULL) { - continue; - } - - const HCFChoice *nt = hte->key; - fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); - - - for(HCFSequence **seq = nt->seq; *seq; seq++) { - if (seq != nt->seq) { - fprintf(file, " + "); - } - uint32_t count=1, length=0; - readsequence(file, &count, &length, g, *seq); - if(count == 1) { - if(length == 1) { - fprintf(file, "*t"); - } - if(length > 1) { - fprintf(file, "*t^%d", length); - } - } else if(count > 1) { - if(length == 0) { - fprintf(file, "*%d", count); - } - if(length == 1) { - fprintf(file, "*%d*t", count); - } - if (length > 1) { - fprintf(file, "*%d*t^%d", count, length); - } - } - } - - fprintf(file, "\n"); - } - } -} - HParser* cfExample() { HParser *n = h_ch('n'); HParser *E = h_indirect(); From d13657a411c549b9bcb647478be01249ed74ee71 Mon Sep 17 00:00:00 2001 From: Mikael Vejdemo-Johansson Date: Fri, 10 Apr 2015 10:05:19 +0200 Subject: [PATCH 11/11] Changed generating functions printouts to be copy-paste-able into SageMath. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now we can do things like: # copy-paste from output ring. = QQ[] ID = ring.ideal(L - (1*Cn*t),tie - (1*Ln*t),Cn - (1*I + 1*J),M - (1*t^2),Ln - (1*D + 1*L + 1*M),I - (1*Rn*t),D - (1*Rn*t),J - (1*Ln*t),Rn - (1*F + 1*G + 1*K),A - (1*tie),K - (1*t^2),F - (1*Ln*t),G - (1*Cn*t)) # we are interested in tie in terms of t; so we want to remove anything not these two: ID.elimination_ideal([L,Cn,M,Ln,I,D,J,Rn,A,K,F,G]) # output from this SageMath command is # Ideal (t^3 + 2*t^2*tie + t*tie - tie) of Multivariate Polynomial Ring in t, L, tie, Cn, M, Ln, I, D, J, Rn, A, K, F, G over Rational Field # which we can solve for tie to get tie = t^3/(1-t-2*t^2) just as expected --- examples/grammar.c | 45 +++++++++++++++++++++++++++++------ examples/ties.c | 59 ++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 93 insertions(+), 11 deletions(-) diff --git a/examples/grammar.c b/examples/grammar.c index f722edf..7638fe9 100644 --- a/examples/grammar.c +++ b/examples/grammar.c @@ -20,7 +20,8 @@ #include const char *nonterminal_name(const HCFGrammar *g, const HCFChoice *nt) { - if(nt->user_data != NULL) { + // if user_data exists and is printable: + if(nt->user_data != NULL && *(char*)(nt->user_data) > ' ' && *(char*)(nt->user_data) < 127) { if(*(char*)(nt->user_data) != '0') { // user_data is a non-empty string return nt->user_data; @@ -80,7 +81,7 @@ void readsequence(FILE *file, uint32_t *count, uint32_t *length, *count *= cscount; break; default: // HCF_CHOICE, non-terminal symbol - fprintf(file, "*%s(t)", nonterminal_name(g, *x)); + fprintf(file, "*%s", nonterminal_name(g, *x)); break; } } @@ -101,19 +102,48 @@ void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { // determine maximum string length of symbol names int len; size_t s; - for(len=1, s=26; s < g->nts->used; len++, s*=26); + for(len=1, s=26; s < g->nts->used; len++, s*=26); - // iterate over g->nts + // emit the SageMath ring init string + // iterate over g->nts, output symbols size_t i; - HHashTableEntry *hte; + HHashTableEntry *hte; + fprintf(file, "ring.nts->capacity; i++) { + for(hte = &g->nts->contents[i]; hte; hte = hte->next) { + if (hte->key == NULL) { + continue; + } + const HCFChoice *nt = hte->key; + fprintf(file, ","); + + fprintf(file, "%s", nonterminal_name(g, nt)); + } + } + fprintf(file, "> = QQ[]\n"); + + + // iterate over g->nts + // emit a Sage ideal definition + int j=0; + fprintf(file, "ID = ring.ideal("); for(i=0; i < g->nts->capacity; i++) { for(hte = &g->nts->contents[i]; hte; hte = hte->next) { if (hte->key == NULL) { continue; } + if(j>0) { + fprintf(file, ","); + } + j++; + const HCFChoice *nt = hte->key; - fprintf(file, "%s(t) = ", nonterminal_name(g, nt)); + const char *ntn = nonterminal_name(g, nt); + if(*ntn == 0) { + continue; + } + fprintf(file, "%s - (", ntn); for(HCFSequence **seq = nt->seq; *seq; seq++) { @@ -142,7 +172,8 @@ void h_pprint_gfeqns(FILE *file, const HCFGrammar *g) { } } - fprintf(file, "\n"); + fprintf(file, ")"); } } + fprintf(file, ")\n"); } diff --git a/examples/ties.c b/examples/ties.c index 09f6b70..77d0821 100644 --- a/examples/ties.c +++ b/examples/ties.c @@ -47,6 +47,18 @@ HParser* finkmao() { h_bind_indirect(Rnext, R_); h_bind_indirect(Cnext, C_); HParser *tie = h_sequence(L, Lnext, NULL); + + h_desugar_augmented(mm__, tie); + + L->desugared->user_data = "L"; + R->desugared->user_data = "R"; + C->desugared->user_data = "C"; + Lnext->desugared->user_data = "Ln"; + Rnext->desugared->user_data = "Rn"; + Cnext->desugared->user_data = "Cn"; + tie->desugared->user_data = "tie"; + U->desugared->user_data = "0U"; + return tie; } @@ -103,7 +115,20 @@ HParser* depth1TW() { h_epsilon_p(), NULL); h_bind_indirect(tuckpairstar, tpstar_); - return h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + tpstar_->desugared->user_data = "tuckpairstar"; + tie->desugared->user_data = "tie"; + + return tie; } HParser* depth1() { @@ -144,10 +169,23 @@ HParser* depth1() { h_bind_indirect(lastR, R_); h_bind_indirect(lastL, L_); h_bind_indirect(lastC, C_); - return h_choice(h_sequence(L, lastL, NULL), + HParser* tie = h_choice(h_sequence(L, lastL, NULL), h_sequence(R, lastR, NULL), h_sequence(C, lastC, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + L->desugared->user_data = "L"; + R->desugared->user_data = "R"; + C->desugared->user_data = "C"; + U->desugared->user_data = "0U"; + lastL ->desugared->user_data = "Ln"; + lastR->desugared->user_data = "Rn"; + lastC->desugared->user_data = "Cn"; + tie->desugared->user_data = "tie"; + + return tie; } HParser* depthNTW() { @@ -237,14 +275,27 @@ HParser* depthNTW() { NULL); h_bind_indirect(tuckpairstar, tpstar_); - return h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + HParser *tie = h_choice(h_sequence(prefix, tuckpairstar, tuck, NULL), NULL); + + h_desugar_augmented(mm__, tie); + + T->desugared->user_data = "T"; + W->desugared->user_data = "W"; + U->desugared->user_data = "0U"; + prefix->desugared->user_data = "prefix"; + pair->desugared->user_data = "pair"; + tuck->desugared->user_data = "tuck"; + tpstar_->desugared->user_data = "tuckpairstar"; + tie->desugared->user_data = "tie"; + + return tie; } int main(int argc, char **argv) { mm__ = &system_allocator; - HParser *p = finkmaoTW(); + HParser *p = finkmao(); HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, p)); if (g == NULL) { fprintf(stderr, "h_cfgrammar failed\n");