diff --git a/.gitignore b/.gitignore index 3ab234f..7f4d7d9 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ src/test_suite lib/hush examples/dns +examples/base64 +examples/base64_sem1 +examples/base64_sem2 TAGS *.swp *.swo diff --git a/Doxyfile b/Doxyfile new file mode 100644 index 0000000..16f15ca --- /dev/null +++ b/Doxyfile @@ -0,0 +1,1826 @@ +# Doxyfile 1.8.1.2 + +# This file describes the settings to be used by the documentation system +# doxygen (www.doxygen.org) for a project +# +# All text after a hash (#) is considered a comment and will be ignored +# The format is: +# TAG = value [value, ...] +# For lists items can also be appended using: +# TAG += value [value, ...] +# Values that contain spaces should be placed between quotes (" ") + +#--------------------------------------------------------------------------- +# Project related configuration options +#--------------------------------------------------------------------------- + +# This tag specifies the encoding used for all characters in the config file +# that follow. The default is UTF-8 which is also the encoding used for all +# text before the first occurrence of this tag. Doxygen uses libiconv (or the +# iconv built into libc) for the transcoding. See +# http://www.gnu.org/software/libiconv for the list of possible encodings. + +DOXYFILE_ENCODING = UTF-8 + +# The PROJECT_NAME tag is a single word (or sequence of words) that should +# identify the project. Note that if you do not use Doxywizard you need +# to put quotes around the project name if it contains spaces. + +PROJECT_NAME = Hammer + +# The PROJECT_NUMBER tag can be used to enter a project or revision number. +# This could be handy for archiving the generated documentation or +# if some version control system is used. + +PROJECT_NUMBER = + +# Using the PROJECT_BRIEF tag one can provide an optional one line description +# for a project that appears at the top of each page and should give viewer +# a quick idea about the purpose of the project. Keep the description short. + +PROJECT_BRIEF = "Binary parser combinators in C" + +# With the PROJECT_LOGO tag one can specify an logo or icon that is +# included in the documentation. The maximum height of the logo should not +# exceed 55 pixels and the maximum width should not exceed 200 pixels. +# Doxygen will copy the logo to the output directory. + +PROJECT_LOGO = + +# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) +# base path where the generated documentation will be put. +# If a relative path is entered, it will be relative to the location +# where doxygen was started. If left blank the current directory will be used. + +OUTPUT_DIRECTORY = /home/thequux/Projects/hammer/docs + +# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create +# 4096 sub-directories (in 2 levels) under the output directory of each output +# format and will distribute the generated files over these directories. +# Enabling this option can be useful when feeding doxygen a huge amount of +# source files, where putting all generated files in the same directory would +# otherwise cause performance problems for the file system. + +CREATE_SUBDIRS = NO + +# The OUTPUT_LANGUAGE tag is used to specify the language in which all +# documentation generated by doxygen is written. Doxygen will use this +# information to generate all constant output in the proper language. +# The default language is English, other supported languages are: +# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, +# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, +# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English +# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, +# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, +# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. + +OUTPUT_LANGUAGE = English + +# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will +# include brief member descriptions after the members that are listed in +# the file and class documentation (similar to JavaDoc). +# Set to NO to disable this. + +BRIEF_MEMBER_DESC = YES + +# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend +# the brief description of a member or function before the detailed description. +# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the +# brief descriptions will be completely suppressed. + +REPEAT_BRIEF = YES + +# This tag implements a quasi-intelligent brief description abbreviator +# that is used to form the text in various listings. Each string +# in this list, if found as the leading text of the brief description, will be +# stripped from the text and the result after processing the whole list, is +# used as the annotated text. Otherwise, the brief description is used as-is. +# If left blank, the following values are used ("$name" is automatically +# replaced with the name of the entity): "The $name class" "The $name widget" +# "The $name file" "is" "provides" "specifies" "contains" +# "represents" "a" "an" "the" + +ABBREVIATE_BRIEF = "The $name class" \ + "The $name widget" \ + "The $name file" \ + is \ + provides \ + specifies \ + contains \ + represents \ + a \ + an \ + the + +# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then +# Doxygen will generate a detailed section even if there is only a brief +# description. + +ALWAYS_DETAILED_SEC = NO + +# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all +# inherited members of a class in the documentation of that class as if those +# members were ordinary class members. Constructors, destructors and assignment +# operators of the base classes will not be shown. + +INLINE_INHERITED_MEMB = NO + +# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full +# path before files name in the file list and in the header files. If set +# to NO the shortest path that makes the file name unique will be used. + +FULL_PATH_NAMES = YES + +# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag +# can be used to strip a user-defined part of the path. Stripping is +# only done if one of the specified strings matches the left-hand part of +# the path. The tag can be used to show relative paths in the file list. +# If left blank the directory from which doxygen is run is used as the +# path to strip. + +STRIP_FROM_PATH = + +# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of +# the path mentioned in the documentation of a class, which tells +# the reader which header file to include in order to use a class. +# If left blank only the name of the header file containing the class +# definition is used. Otherwise one should specify the include paths that +# are normally passed to the compiler using the -I flag. + +STRIP_FROM_INC_PATH = + +# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter +# (but less readable) file names. This can be useful if your file system +# doesn't support long names like on DOS, Mac, or CD-ROM. + +SHORT_NAMES = NO + +# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen +# will interpret the first line (until the first dot) of a JavaDoc-style +# comment as the brief description. If set to NO, the JavaDoc +# comments will behave just like regular Qt-style comments +# (thus requiring an explicit @brief command for a brief description.) + +JAVADOC_AUTOBRIEF = NO + +# If the QT_AUTOBRIEF tag is set to YES then Doxygen will +# interpret the first line (until the first dot) of a Qt-style +# comment as the brief description. If set to NO, the comments +# will behave just like regular Qt-style comments (thus requiring +# an explicit \brief command for a brief description.) + +QT_AUTOBRIEF = NO + +# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen +# treat a multi-line C++ special comment block (i.e. a block of //! or /// +# comments) as a brief description. This used to be the default behaviour. +# The new default is to treat a multi-line C++ comment block as a detailed +# description. Set this tag to YES if you prefer the old behaviour instead. + +MULTILINE_CPP_IS_BRIEF = NO + +# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented +# member inherits the documentation from any documented member that it +# re-implements. + +INHERIT_DOCS = YES + +# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce +# a new page for each member. If set to NO, the documentation of a member will +# be part of the file/class/namespace that contains it. + +SEPARATE_MEMBER_PAGES = NO + +# The TAB_SIZE tag can be used to set the number of spaces in a tab. +# Doxygen uses this value to replace tabs by spaces in code fragments. + +TAB_SIZE = 8 + +# This tag can be used to specify a number of aliases that acts +# as commands in the documentation. An alias has the form "name=value". +# For example adding "sideeffect=\par Side Effects:\n" will allow you to +# put the command \sideeffect (or @sideeffect) in the documentation, which +# will result in a user-defined paragraph with heading "Side Effects:". +# You can put \n's in the value part of an alias to insert newlines. + +ALIASES = + +# This tag can be used to specify a number of word-keyword mappings (TCL only). +# A mapping has the form "name=value". For example adding +# "class=itcl::class" will allow you to use the command class in the +# itcl::class meaning. + +TCL_SUBST = + +# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C +# sources only. Doxygen will then generate output that is more tailored for C. +# For instance, some of the names that are used will be different. The list +# of all members will be omitted, etc. + +OPTIMIZE_OUTPUT_FOR_C = YES + +# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java +# sources only. Doxygen will then generate output that is more tailored for +# Java. For instance, namespaces will be presented as packages, qualified +# scopes will look different, etc. + +OPTIMIZE_OUTPUT_JAVA = NO + +# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran +# sources only. Doxygen will then generate output that is more tailored for +# Fortran. + +OPTIMIZE_FOR_FORTRAN = NO + +# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL +# sources. Doxygen will then generate output that is tailored for +# VHDL. + +OPTIMIZE_OUTPUT_VHDL = NO + +# Doxygen selects the parser to use depending on the extension of the files it +# parses. With this tag you can assign which parser to use for a given extension. +# Doxygen has a built-in mapping, but you can override or extend it using this +# tag. The format is ext=language, where ext is a file extension, and language +# is one of the parsers supported by doxygen: IDL, Java, Javascript, CSharp, C, +# C++, D, PHP, Objective-C, Python, Fortran, VHDL, C, C++. For instance to make +# doxygen treat .inc files as Fortran files (default is PHP), and .f files as C +# (default is Fortran), use: inc=Fortran f=C. Note that for custom extensions +# you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. + +EXTENSION_MAPPING = + +# If MARKDOWN_SUPPORT is enabled (the default) then doxygen pre-processes all +# comments according to the Markdown format, which allows for more readable +# documentation. See http://daringfireball.net/projects/markdown/ for details. +# The output of markdown processing is further processed by doxygen, so you +# can mix doxygen, HTML, and XML commands with Markdown formatting. +# Disable only in case of backward compatibilities issues. + +MARKDOWN_SUPPORT = YES + +# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want +# to include (a tag file for) the STL sources as input, then you should +# set this tag to YES in order to let doxygen match functions declarations and +# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. +# func(std::string) {}). This also makes the inheritance and collaboration +# diagrams that involve STL classes more complete and accurate. + +BUILTIN_STL_SUPPORT = NO + +# If you use Microsoft's C++/CLI language, you should set this option to YES to +# enable parsing support. + +CPP_CLI_SUPPORT = NO + +# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. +# Doxygen will parse them like normal C++ but will assume all classes use public +# instead of private inheritance when no explicit protection keyword is present. + +SIP_SUPPORT = NO + +# For Microsoft's IDL there are propget and propput attributes to indicate getter +# and setter methods for a property. Setting this option to YES (the default) +# will make doxygen replace the get and set methods by a property in the +# documentation. This will only work if the methods are indeed getting or +# setting a simple type. If this is not the case, or you want to show the +# methods anyway, you should set this option to NO. + +IDL_PROPERTY_SUPPORT = YES + +# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC +# tag is set to YES, then doxygen will reuse the documentation of the first +# member in the group (if any) for the other members of the group. By default +# all members of a group must be documented explicitly. + +DISTRIBUTE_GROUP_DOC = NO + +# Set the SUBGROUPING tag to YES (the default) to allow class member groups of +# the same type (for instance a group of public functions) to be put as a +# subgroup of that type (e.g. under the Public Functions section). Set it to +# NO to prevent subgrouping. Alternatively, this can be done per class using +# the \nosubgrouping command. + +SUBGROUPING = YES + +# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and +# unions are shown inside the group in which they are included (e.g. using +# @ingroup) instead of on a separate page (for HTML and Man pages) or +# section (for LaTeX and RTF). + +INLINE_GROUPED_CLASSES = NO + +# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and +# unions with only public data fields will be shown inline in the documentation +# of the scope in which they are defined (i.e. file, namespace, or group +# documentation), provided this scope is documented. If set to NO (the default), +# structs, classes, and unions are shown on a separate page (for HTML and Man +# pages) or section (for LaTeX and RTF). + +INLINE_SIMPLE_STRUCTS = NO + +# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum +# is documented as struct, union, or enum with the name of the typedef. So +# typedef struct TypeS {} TypeT, will appear in the documentation as a struct +# with name TypeT. When disabled the typedef will appear as a member of a file, +# namespace, or class. And the struct will be named TypeS. This can typically +# be useful for C code in case the coding convention dictates that all compound +# types are typedef'ed and only the typedef is referenced, never the tag name. + +TYPEDEF_HIDES_STRUCT = NO + +# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to +# determine which symbols to keep in memory and which to flush to disk. +# When the cache is full, less often used symbols will be written to disk. +# For small to medium size projects (<1000 input files) the default value is +# probably good enough. For larger projects a too small cache size can cause +# doxygen to be busy swapping symbols to and from disk most of the time +# causing a significant performance penalty. +# If the system has enough physical memory increasing the cache will improve the +# performance by keeping more symbols in memory. Note that the value works on +# a logarithmic scale so increasing the size by one will roughly double the +# memory usage. The cache size is given by this formula: +# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +SYMBOL_CACHE_SIZE = 0 + +# Similar to the SYMBOL_CACHE_SIZE the size of the symbol lookup cache can be +# set using LOOKUP_CACHE_SIZE. This cache is used to resolve symbols given +# their name and scope. Since this can be an expensive process and often the +# same symbol appear multiple times in the code, doxygen keeps a cache of +# pre-resolved symbols. If the cache is too small doxygen will become slower. +# If the cache is too large, memory is wasted. The cache size is given by this +# formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range is 0..9, the default is 0, +# corresponding to a cache size of 2^16 = 65536 symbols. + +LOOKUP_CACHE_SIZE = 0 + +#--------------------------------------------------------------------------- +# Build related configuration options +#--------------------------------------------------------------------------- + +# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in +# documentation are documented, even if no documentation was available. +# Private class members and static file members will be hidden unless +# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES + +EXTRACT_ALL = YES + +# If the EXTRACT_PRIVATE tag is set to YES all private members of a class +# will be included in the documentation. + +EXTRACT_PRIVATE = NO + +# If the EXTRACT_PACKAGE tag is set to YES all members with package or internal +# scope will be included in the documentation. + +EXTRACT_PACKAGE = NO + +# If the EXTRACT_STATIC tag is set to YES all static members of a file +# will be included in the documentation. + +EXTRACT_STATIC = NO + +# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) +# defined locally in source files will be included in the documentation. +# If set to NO only classes defined in header files are included. + +EXTRACT_LOCAL_CLASSES = YES + +# This flag is only useful for Objective-C code. When set to YES local +# methods, which are defined in the implementation section but not in +# the interface are included in the documentation. +# If set to NO (the default) only methods in the interface are included. + +EXTRACT_LOCAL_METHODS = NO + +# If this flag is set to YES, the members of anonymous namespaces will be +# extracted and appear in the documentation as a namespace called +# 'anonymous_namespace{file}', where file will be replaced with the base +# name of the file that contains the anonymous namespace. By default +# anonymous namespaces are hidden. + +EXTRACT_ANON_NSPACES = NO + +# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all +# undocumented members of documented classes, files or namespaces. +# If set to NO (the default) these members will be included in the +# various overviews, but no documentation section is generated. +# This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_MEMBERS = NO + +# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all +# undocumented classes that are normally visible in the class hierarchy. +# If set to NO (the default) these classes will be included in the various +# overviews. This option has no effect if EXTRACT_ALL is enabled. + +HIDE_UNDOC_CLASSES = NO + +# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all +# friend (class|struct|union) declarations. +# If set to NO (the default) these declarations will be included in the +# documentation. + +HIDE_FRIEND_COMPOUNDS = NO + +# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any +# documentation blocks found inside the body of a function. +# If set to NO (the default) these blocks will be appended to the +# function's detailed documentation block. + +HIDE_IN_BODY_DOCS = NO + +# The INTERNAL_DOCS tag determines if documentation +# that is typed after a \internal command is included. If the tag is set +# to NO (the default) then the documentation will be excluded. +# Set it to YES to include the internal documentation. + +INTERNAL_DOCS = NO + +# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate +# file names in lower-case letters. If set to YES upper-case letters are also +# allowed. This is useful if you have classes or files whose names only differ +# in case and if your file system supports case sensitive file names. Windows +# and Mac users are advised to set this option to NO. + +CASE_SENSE_NAMES = NO + +# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen +# will show members with their full class and namespace scopes in the +# documentation. If set to YES the scope will be hidden. + +HIDE_SCOPE_NAMES = YES + +# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen +# will put a list of the files that are included by a file in the documentation +# of that file. + +SHOW_INCLUDE_FILES = YES + +# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen +# will list include files with double quotes in the documentation +# rather than with sharp brackets. + +FORCE_LOCAL_INCLUDES = NO + +# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] +# is inserted in the documentation for inline members. + +INLINE_INFO = YES + +# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen +# will sort the (detailed) documentation of file and class members +# alphabetically by member name. If set to NO the members will appear in +# declaration order. + +SORT_MEMBER_DOCS = YES + +# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the +# brief documentation of file, namespace and class members alphabetically +# by member name. If set to NO (the default) the members will appear in +# declaration order. + +SORT_BRIEF_DOCS = NO + +# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen +# will sort the (brief and detailed) documentation of class members so that +# constructors and destructors are listed first. If set to NO (the default) +# the constructors will appear in the respective orders defined by +# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. +# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO +# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. + +SORT_MEMBERS_CTORS_1ST = NO + +# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the +# hierarchy of group names into alphabetical order. If set to NO (the default) +# the group names will appear in their defined order. + +SORT_GROUP_NAMES = NO + +# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be +# sorted by fully-qualified names, including namespaces. If set to +# NO (the default), the class list will be sorted only by class name, +# not including the namespace part. +# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. +# Note: This option applies only to the class list, not to the +# alphabetical list. + +SORT_BY_SCOPE_NAME = NO + +# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to +# do proper type resolution of all parameters of a function it will reject a +# match between the prototype and the implementation of a member function even +# if there is only one candidate or it is obvious which candidate to choose +# by doing a simple string match. By disabling STRICT_PROTO_MATCHING doxygen +# will still accept a match between prototype and implementation in such cases. + +STRICT_PROTO_MATCHING = NO + +# The GENERATE_TODOLIST tag can be used to enable (YES) or +# disable (NO) the todo list. This list is created by putting \todo +# commands in the documentation. + +GENERATE_TODOLIST = YES + +# The GENERATE_TESTLIST tag can be used to enable (YES) or +# disable (NO) the test list. This list is created by putting \test +# commands in the documentation. + +GENERATE_TESTLIST = YES + +# The GENERATE_BUGLIST tag can be used to enable (YES) or +# disable (NO) the bug list. This list is created by putting \bug +# commands in the documentation. + +GENERATE_BUGLIST = YES + +# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or +# disable (NO) the deprecated list. This list is created by putting +# \deprecated commands in the documentation. + +GENERATE_DEPRECATEDLIST= YES + +# The ENABLED_SECTIONS tag can be used to enable conditional +# documentation sections, marked by \if sectionname ... \endif. + +ENABLED_SECTIONS = + +# The MAX_INITIALIZER_LINES tag determines the maximum number of lines +# the initial value of a variable or macro consists of for it to appear in +# the documentation. If the initializer consists of more lines than specified +# here it will be hidden. Use a value of 0 to hide initializers completely. +# The appearance of the initializer of individual variables and macros in the +# documentation can be controlled using \showinitializer or \hideinitializer +# command in the documentation regardless of this setting. + +MAX_INITIALIZER_LINES = 30 + +# Set the SHOW_USED_FILES tag to NO to disable the list of files generated +# at the bottom of the documentation of classes and structs. If set to YES the +# list will mention the files that were used to generate the documentation. + +SHOW_USED_FILES = YES + +# Set the SHOW_FILES tag to NO to disable the generation of the Files page. +# This will remove the Files entry from the Quick Index and from the +# Folder Tree View (if specified). The default is YES. + +SHOW_FILES = YES + +# Set the SHOW_NAMESPACES tag to NO to disable the generation of the +# Namespaces page. This will remove the Namespaces entry from the Quick Index +# and from the Folder Tree View (if specified). The default is YES. + +SHOW_NAMESPACES = YES + +# The FILE_VERSION_FILTER tag can be used to specify a program or script that +# doxygen should invoke to get the current version for each file (typically from +# the version control system). Doxygen will invoke the program by executing (via +# popen()) the command , where is the value of +# the FILE_VERSION_FILTER tag, and is the name of an input file +# provided by doxygen. Whatever the program writes to standard output +# is used as the file version. See the manual for examples. + +FILE_VERSION_FILTER = + +# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed +# by doxygen. The layout file controls the global structure of the generated +# output files in an output format independent way. To create the layout file +# that represents doxygen's defaults, run doxygen with the -l option. +# You can optionally specify a file name after the option, if omitted +# DoxygenLayout.xml will be used as the name of the layout file. + +LAYOUT_FILE = + +# The CITE_BIB_FILES tag can be used to specify one or more bib files +# containing the references data. This must be a list of .bib files. The +# .bib extension is automatically appended if omitted. Using this command +# requires the bibtex tool to be installed. See also +# http://en.wikipedia.org/wiki/BibTeX for more info. For LaTeX the style +# of the bibliography can be controlled using LATEX_BIB_STYLE. To use this +# feature you need bibtex and perl available in the search path. + +CITE_BIB_FILES = + +#--------------------------------------------------------------------------- +# configuration options related to warning and progress messages +#--------------------------------------------------------------------------- + +# The QUIET tag can be used to turn on/off the messages that are generated +# by doxygen. Possible values are YES and NO. If left blank NO is used. + +QUIET = NO + +# The WARNINGS tag can be used to turn on/off the warning messages that are +# generated by doxygen. Possible values are YES and NO. If left blank +# NO is used. + +WARNINGS = YES + +# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings +# for undocumented members. If EXTRACT_ALL is set to YES then this flag will +# automatically be disabled. + +WARN_IF_UNDOCUMENTED = YES + +# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for +# potential errors in the documentation, such as not documenting some +# parameters in a documented function, or documenting parameters that +# don't exist or using markup commands wrongly. + +WARN_IF_DOC_ERROR = YES + +# The WARN_NO_PARAMDOC option can be enabled to get warnings for +# functions that are documented, but have no documentation for their parameters +# or return value. If set to NO (the default) doxygen will only warn about +# wrong or incomplete parameter documentation, but not about the absence of +# documentation. + +WARN_NO_PARAMDOC = NO + +# The WARN_FORMAT tag determines the format of the warning messages that +# doxygen can produce. The string should contain the $file, $line, and $text +# tags, which will be replaced by the file and line number from which the +# warning originated and the warning text. Optionally the format may contain +# $version, which will be replaced by the version of the file (if it could +# be obtained via FILE_VERSION_FILTER) + +WARN_FORMAT = "$file:$line: $text" + +# The WARN_LOGFILE tag can be used to specify a file to which warning +# and error messages should be written. If left blank the output is written +# to stderr. + +WARN_LOGFILE = + +#--------------------------------------------------------------------------- +# configuration options related to the input files +#--------------------------------------------------------------------------- + +# The INPUT tag can be used to specify the files and/or directories that contain +# documented source files. You may enter file names like "myfile.cpp" or +# directories like "/usr/src/myproject". Separate the files or directories +# with spaces. + +INPUT = /home/thequux/Projects/hammer/src + +# This tag can be used to specify the character encoding of the source files +# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is +# also the default input encoding. Doxygen uses libiconv (or the iconv built +# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for +# the list of possible encodings. + +INPUT_ENCODING = UTF-8 + +# If the value of the INPUT tag contains directories, you can use the +# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank the following patterns are tested: +# *.c *.cc *.cxx *.cpp *.c++ *.d *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh +# *.hxx *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.dox *.py +# *.f90 *.f *.for *.vhd *.vhdl + +FILE_PATTERNS = *.c \ + *.cc \ + *.cxx \ + *.cpp \ + *.c++ \ + *.d \ + *.java \ + *.ii \ + *.ixx \ + *.ipp \ + *.i++ \ + *.inl \ + *.h \ + *.hh \ + *.hxx \ + *.hpp \ + *.h++ \ + *.idl \ + *.odl \ + *.cs \ + *.php \ + *.php3 \ + *.inc \ + *.m \ + *.markdown \ + *.md \ + *.mm \ + *.dox \ + *.py \ + *.f90 \ + *.f \ + *.for \ + *.vhd \ + *.vhdl + +# The RECURSIVE tag can be used to turn specify whether or not subdirectories +# should be searched for input files as well. Possible values are YES and NO. +# If left blank NO is used. + +RECURSIVE = YES + +# The EXCLUDE tag can be used to specify files and/or directories that should be +# excluded from the INPUT source files. This way you can easily exclude a +# subdirectory from a directory tree whose root is specified with the INPUT tag. +# Note that relative paths are relative to the directory from which doxygen is +# run. + +EXCLUDE = + +# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or +# directories that are symbolic links (a Unix file system feature) are excluded +# from the input. + +EXCLUDE_SYMLINKS = NO + +# If the value of the INPUT tag contains directories, you can use the +# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude +# certain files from those directories. Note that the wildcards are matched +# against the file with absolute path, so to exclude all test directories +# for example use the pattern */test/* + +EXCLUDE_PATTERNS = + +# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names +# (namespaces, classes, functions, etc.) that should be excluded from the +# output. The symbol name can be a fully qualified name, a word, or if the +# wildcard * is used, a substring. Examples: ANamespace, AClass, +# AClass::ANamespace, ANamespace::*Test + +EXCLUDE_SYMBOLS = + +# The EXAMPLE_PATH tag can be used to specify one or more files or +# directories that contain example code fragments that are included (see +# the \include command). + +EXAMPLE_PATH = + +# If the value of the EXAMPLE_PATH tag contains directories, you can use the +# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp +# and *.h) to filter out the source-files in the directories. If left +# blank all files are included. + +EXAMPLE_PATTERNS = * + +# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be +# searched for input files to be used with the \include or \dontinclude +# commands irrespective of the value of the RECURSIVE tag. +# Possible values are YES and NO. If left blank NO is used. + +EXAMPLE_RECURSIVE = NO + +# The IMAGE_PATH tag can be used to specify one or more files or +# directories that contain image that are included in the documentation (see +# the \image command). + +IMAGE_PATH = + +# The INPUT_FILTER tag can be used to specify a program that doxygen should +# invoke to filter for each input file. Doxygen will invoke the filter program +# by executing (via popen()) the command , where +# is the value of the INPUT_FILTER tag, and is the name of an +# input file. Doxygen will then use the output that the filter program writes +# to standard output. If FILTER_PATTERNS is specified, this tag will be +# ignored. + +INPUT_FILTER = + +# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern +# basis. Doxygen will compare the file name with each pattern and apply the +# filter if there is a match. The filters are a list of the form: +# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further +# info on how filters are used. If FILTER_PATTERNS is empty or if +# non of the patterns match the file name, INPUT_FILTER is applied. + +FILTER_PATTERNS = + +# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using +# INPUT_FILTER) will be used to filter the input files when producing source +# files to browse (i.e. when SOURCE_BROWSER is set to YES). + +FILTER_SOURCE_FILES = NO + +# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file +# pattern. A pattern will override the setting for FILTER_PATTERN (if any) +# and it is also possible to disable source filtering for a specific pattern +# using *.ext= (so without naming a filter). This option only has effect when +# FILTER_SOURCE_FILES is enabled. + +FILTER_SOURCE_PATTERNS = + +#--------------------------------------------------------------------------- +# configuration options related to source browsing +#--------------------------------------------------------------------------- + +# If the SOURCE_BROWSER tag is set to YES then a list of source files will +# be generated. Documented entities will be cross-referenced with these sources. +# Note: To get rid of all source code in the generated output, make sure also +# VERBATIM_HEADERS is set to NO. + +SOURCE_BROWSER = YES + +# Setting the INLINE_SOURCES tag to YES will include the body +# of functions and classes directly in the documentation. + +INLINE_SOURCES = NO + +# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct +# doxygen to hide any special comment blocks from generated source code +# fragments. Normal C, C++ and Fortran comments will always remain visible. + +STRIP_CODE_COMMENTS = YES + +# If the REFERENCED_BY_RELATION tag is set to YES +# then for each documented function all documented +# functions referencing it will be listed. + +REFERENCED_BY_RELATION = NO + +# If the REFERENCES_RELATION tag is set to YES +# then for each documented function all documented entities +# called/used by that function will be listed. + +REFERENCES_RELATION = NO + +# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) +# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from +# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will +# link to the source code. Otherwise they will link to the documentation. + +REFERENCES_LINK_SOURCE = YES + +# If the USE_HTAGS tag is set to YES then the references to source code +# will point to the HTML generated by the htags(1) tool instead of doxygen +# built-in source browser. The htags tool is part of GNU's global source +# tagging system (see http://www.gnu.org/software/global/global.html). You +# will need version 4.8.6 or higher. + +USE_HTAGS = NO + +# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen +# will generate a verbatim copy of the header file for each class for +# which an include is specified. Set to NO to disable this. + +VERBATIM_HEADERS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the alphabetical class index +#--------------------------------------------------------------------------- + +# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index +# of all compounds will be generated. Enable this if the project +# contains a lot of classes, structs, unions or interfaces. + +ALPHABETICAL_INDEX = YES + +# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then +# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns +# in which this list will be split (can be a number in the range [1..20]) + +COLS_IN_ALPHA_INDEX = 5 + +# In case all classes in a project start with a common prefix, all +# classes will be put under the same header in the alphabetical index. +# The IGNORE_PREFIX tag can be used to specify one or more prefixes that +# should be ignored while generating the index headers. + +IGNORE_PREFIX = + +#--------------------------------------------------------------------------- +# configuration options related to the HTML output +#--------------------------------------------------------------------------- + +# If the GENERATE_HTML tag is set to YES (the default) Doxygen will +# generate HTML output. + +GENERATE_HTML = YES + +# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `html' will be used as the default path. + +HTML_OUTPUT = html + +# The HTML_FILE_EXTENSION tag can be used to specify the file extension for +# each generated HTML page (for example: .htm,.php,.asp). If it is left blank +# doxygen will generate files with .html extension. + +HTML_FILE_EXTENSION = .html + +# The HTML_HEADER tag can be used to specify a personal HTML header for +# each generated HTML page. If it is left blank doxygen will generate a +# standard header. Note that when using a custom header you are responsible +# for the proper inclusion of any scripts and style sheets that doxygen +# needs, which is dependent on the configuration options used. +# It is advised to generate a default header using "doxygen -w html +# header.html footer.html stylesheet.css YourConfigFile" and then modify +# that header. Note that the header is subject to change so you typically +# have to redo this when upgrading to a newer version of doxygen or when +# changing the value of configuration settings such as GENERATE_TREEVIEW! + +HTML_HEADER = + +# The HTML_FOOTER tag can be used to specify a personal HTML footer for +# each generated HTML page. If it is left blank doxygen will generate a +# standard footer. + +HTML_FOOTER = + +# The HTML_STYLESHEET tag can be used to specify a user-defined cascading +# style sheet that is used by each HTML page. It can be used to +# fine-tune the look of the HTML output. If the tag is left blank doxygen +# will generate a default style sheet. Note that doxygen will try to copy +# the style sheet file to the HTML output directory, so don't put your own +# style sheet in the HTML output directory as well, or it will be erased! + +HTML_STYLESHEET = + +# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or +# other source files which should be copied to the HTML output directory. Note +# that these files will be copied to the base HTML output directory. Use the +# $relpath$ marker in the HTML_HEADER and/or HTML_FOOTER files to load these +# files. In the HTML_STYLESHEET file, use the file name only. Also note that +# the files will be copied as-is; there are no commands or markers available. + +HTML_EXTRA_FILES = + +# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. +# Doxygen will adjust the colors in the style sheet and background images +# according to this color. Hue is specified as an angle on a colorwheel, +# see http://en.wikipedia.org/wiki/Hue for more information. +# For instance the value 0 represents red, 60 is yellow, 120 is green, +# 180 is cyan, 240 is blue, 300 purple, and 360 is red again. +# The allowed range is 0 to 359. + +HTML_COLORSTYLE_HUE = 220 + +# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of +# the colors in the HTML output. For a value of 0 the output will use +# grayscales only. A value of 255 will produce the most vivid colors. + +HTML_COLORSTYLE_SAT = 100 + +# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to +# the luminance component of the colors in the HTML output. Values below +# 100 gradually make the output lighter, whereas values above 100 make +# the output darker. The value divided by 100 is the actual gamma applied, +# so 80 represents a gamma of 0.8, The value 220 represents a gamma of 2.2, +# and 100 does not change the gamma. + +HTML_COLORSTYLE_GAMMA = 80 + +# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML +# page will contain the date and time when the page was generated. Setting +# this to NO can help when comparing the output of multiple runs. + +HTML_TIMESTAMP = YES + +# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML +# documentation will contain sections that can be hidden and shown after the +# page has loaded. + +HTML_DYNAMIC_SECTIONS = NO + +# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of +# entries shown in the various tree structured indices initially; the user +# can expand and collapse entries dynamically later on. Doxygen will expand +# the tree to such a level that at most the specified number of entries are +# visible (unless a fully collapsed tree already exceeds this amount). +# So setting the number of entries 1 will produce a full collapsed tree by +# default. 0 is a special value representing an infinite number of entries +# and will result in a full expanded tree by default. + +HTML_INDEX_NUM_ENTRIES = 100 + +# If the GENERATE_DOCSET tag is set to YES, additional index files +# will be generated that can be used as input for Apple's Xcode 3 +# integrated development environment, introduced with OSX 10.5 (Leopard). +# To create a documentation set, doxygen will generate a Makefile in the +# HTML output directory. Running make will produce the docset in that +# directory and running "make install" will install the docset in +# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find +# it at startup. +# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html +# for more information. + +GENERATE_DOCSET = NO + +# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the +# feed. A documentation feed provides an umbrella under which multiple +# documentation sets from a single provider (such as a company or product suite) +# can be grouped. + +DOCSET_FEEDNAME = "Doxygen generated docs" + +# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that +# should uniquely identify the documentation set bundle. This should be a +# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen +# will append .docset to the name. + +DOCSET_BUNDLE_ID = org.doxygen.Project + +# When GENERATE_PUBLISHER_ID tag specifies a string that should uniquely identify +# the documentation publisher. This should be a reverse domain-name style +# string, e.g. com.mycompany.MyDocSet.documentation. + +DOCSET_PUBLISHER_ID = org.doxygen.Publisher + +# The GENERATE_PUBLISHER_NAME tag identifies the documentation publisher. + +DOCSET_PUBLISHER_NAME = Publisher + +# If the GENERATE_HTMLHELP tag is set to YES, additional index files +# will be generated that can be used as input for tools like the +# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) +# of the generated HTML documentation. + +GENERATE_HTMLHELP = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can +# be used to specify the file name of the resulting .chm file. You +# can add a path in front of the file if the result should not be +# written to the html output directory. + +CHM_FILE = + +# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can +# be used to specify the location (absolute path including file name) of +# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run +# the HTML help compiler on the generated index.hhp. + +HHC_LOCATION = + +# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag +# controls if a separate .chi index file is generated (YES) or that +# it should be included in the master .chm file (NO). + +GENERATE_CHI = NO + +# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING +# is used to encode HtmlHelp index (hhk), content (hhc) and project file +# content. + +CHM_INDEX_ENCODING = + +# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag +# controls whether a binary table of contents is generated (YES) or a +# normal table of contents (NO) in the .chm file. + +BINARY_TOC = NO + +# The TOC_EXPAND flag can be set to YES to add extra items for group members +# to the contents of the HTML help documentation and to the tree view. + +TOC_EXPAND = NO + +# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and +# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated +# that can be used as input for Qt's qhelpgenerator to generate a +# Qt Compressed Help (.qch) of the generated HTML documentation. + +GENERATE_QHP = NO + +# If the QHG_LOCATION tag is specified, the QCH_FILE tag can +# be used to specify the file name of the resulting .qch file. +# The path specified is relative to the HTML output folder. + +QCH_FILE = + +# The QHP_NAMESPACE tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#namespace + +QHP_NAMESPACE = org.doxygen.Project + +# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating +# Qt Help Project output. For more information please see +# http://doc.trolltech.com/qthelpproject.html#virtual-folders + +QHP_VIRTUAL_FOLDER = doc + +# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to +# add. For more information please see +# http://doc.trolltech.com/qthelpproject.html#custom-filters + +QHP_CUST_FILTER_NAME = + +# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the +# custom filter to add. For more information please see +# +# Qt Help Project / Custom Filters. + +QHP_CUST_FILTER_ATTRS = + +# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this +# project's +# filter section matches. +# +# Qt Help Project / Filter Attributes. + +QHP_SECT_FILTER_ATTRS = + +# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can +# be used to specify the location of Qt's qhelpgenerator. +# If non-empty doxygen will try to run qhelpgenerator on the generated +# .qhp file. + +QHG_LOCATION = + +# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files +# will be generated, which together with the HTML files, form an Eclipse help +# plugin. To install this plugin and make it available under the help contents +# menu in Eclipse, the contents of the directory containing the HTML and XML +# files needs to be copied into the plugins directory of eclipse. The name of +# the directory within the plugins directory should be the same as +# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before +# the help appears. + +GENERATE_ECLIPSEHELP = NO + +# A unique identifier for the eclipse help plugin. When installing the plugin +# the directory name containing the HTML and XML files should also have +# this name. + +ECLIPSE_DOC_ID = org.doxygen.Project + +# The DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) +# at top of each HTML page. The value NO (the default) enables the index and +# the value YES disables it. Since the tabs have the same information as the +# navigation tree you can set this option to NO if you already set +# GENERATE_TREEVIEW to YES. + +DISABLE_INDEX = NO + +# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index +# structure should be generated to display hierarchical information. +# If the tag value is set to YES, a side panel will be generated +# containing a tree-like index structure (just like the one that +# is generated for HTML Help). For this to work a browser that supports +# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). +# Windows users are probably better off using the HTML help feature. +# Since the tree basically has the same information as the tab index you +# could consider to set DISABLE_INDEX to NO when enabling this option. + +GENERATE_TREEVIEW = NO + +# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values +# (range [0,1..20]) that doxygen will group on one line in the generated HTML +# documentation. Note that a value of 0 will completely suppress the enum +# values from appearing in the overview section. + +ENUM_VALUES_PER_LINE = 4 + +# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be +# used to set the initial width (in pixels) of the frame in which the tree +# is shown. + +TREEVIEW_WIDTH = 250 + +# When the EXT_LINKS_IN_WINDOW option is set to YES doxygen will open +# links to external symbols imported via tag files in a separate window. + +EXT_LINKS_IN_WINDOW = NO + +# Use this tag to change the font size of Latex formulas included +# as images in the HTML documentation. The default is 10. Note that +# when you change the font size after a successful doxygen run you need +# to manually remove any form_*.png images from the HTML output directory +# to force them to be regenerated. + +FORMULA_FONTSIZE = 10 + +# Use the FORMULA_TRANPARENT tag to determine whether or not the images +# generated for formulas are transparent PNGs. Transparent PNGs are +# not supported properly for IE 6.0, but are supported on all modern browsers. +# Note that when changing this option you need to delete any form_*.png files +# in the HTML output before the changes have effect. + +FORMULA_TRANSPARENT = YES + +# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax +# (see http://www.mathjax.org) which uses client side Javascript for the +# rendering instead of using prerendered bitmaps. Use this if you do not +# have LaTeX installed or if you want to formulas look prettier in the HTML +# output. When enabled you may also need to install MathJax separately and +# configure the path to it using the MATHJAX_RELPATH option. + +USE_MATHJAX = YES + +# When MathJax is enabled you need to specify the location relative to the +# HTML output directory using the MATHJAX_RELPATH option. The destination +# directory should contain the MathJax.js script. For instance, if the mathjax +# directory is located at the same level as the HTML output directory, then +# MATHJAX_RELPATH should be ../mathjax. The default value points to +# the MathJax Content Delivery Network so you can quickly see the result without +# installing MathJax. However, it is strongly recommended to install a local +# copy of MathJax from http://www.mathjax.org before deployment. + +MATHJAX_RELPATH = http://cdn.mathjax.org/mathjax/latest + +# The MATHJAX_EXTENSIONS tag can be used to specify one or MathJax extension +# names that should be enabled during MathJax rendering. + +MATHJAX_EXTENSIONS = + +# When the SEARCHENGINE tag is enabled doxygen will generate a search box +# for the HTML output. The underlying search engine uses javascript +# and DHTML and should work on any modern browser. Note that when using +# HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets +# (GENERATE_DOCSET) there is already a search function so this one should +# typically be disabled. For large projects the javascript based search engine +# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution. + +SEARCHENGINE = YES + +# When the SERVER_BASED_SEARCH tag is enabled the search engine will be +# implemented using a PHP enabled web server instead of at the web client +# using Javascript. Doxygen will generate the search PHP script and index +# file to put on the web server. The advantage of the server +# based approach is that it scales better to large projects and allows +# full text search. The disadvantages are that it is more difficult to setup +# and does not have live searching capabilities. + +SERVER_BASED_SEARCH = NO + +#--------------------------------------------------------------------------- +# configuration options related to the LaTeX output +#--------------------------------------------------------------------------- + +# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will +# generate Latex output. + +GENERATE_LATEX = YES + +# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `latex' will be used as the default path. + +LATEX_OUTPUT = latex + +# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be +# invoked. If left blank `latex' will be used as the default command name. +# Note that when enabling USE_PDFLATEX this option is only used for +# generating bitmaps for formulas in the HTML output, but not in the +# Makefile that is written to the output directory. + +LATEX_CMD_NAME = latex + +# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to +# generate index for LaTeX. If left blank `makeindex' will be used as the +# default command name. + +MAKEINDEX_CMD_NAME = makeindex + +# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact +# LaTeX documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_LATEX = NO + +# The PAPER_TYPE tag can be used to set the paper type that is used +# by the printer. Possible values are: a4, letter, legal and +# executive. If left blank a4wide will be used. + +PAPER_TYPE = a4 + +# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX +# packages that should be included in the LaTeX output. + +EXTRA_PACKAGES = + +# The LATEX_HEADER tag can be used to specify a personal LaTeX header for +# the generated latex document. The header should contain everything until +# the first chapter. If it is left blank doxygen will generate a +# standard header. Notice: only use this tag if you know what you are doing! + +LATEX_HEADER = + +# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for +# the generated latex document. The footer should contain everything after +# the last chapter. If it is left blank doxygen will generate a +# standard footer. Notice: only use this tag if you know what you are doing! + +LATEX_FOOTER = + +# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated +# is prepared for conversion to pdf (using ps2pdf). The pdf file will +# contain links (just like the HTML output) instead of page references +# This makes the output suitable for online browsing using a pdf viewer. + +PDF_HYPERLINKS = YES + +# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of +# plain latex in the generated Makefile. Set this option to YES to get a +# higher quality PDF documentation. + +USE_PDFLATEX = YES + +# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. +# command to the generated LaTeX files. This will instruct LaTeX to keep +# running if errors occur, instead of asking the user for help. +# This option is also used when generating formulas in HTML. + +LATEX_BATCHMODE = NO + +# If LATEX_HIDE_INDICES is set to YES then doxygen will not +# include the index chapters (such as File Index, Compound Index, etc.) +# in the output. + +LATEX_HIDE_INDICES = NO + +# If LATEX_SOURCE_CODE is set to YES then doxygen will include +# source code with syntax highlighting in the LaTeX output. +# Note that which sources are shown also depends on other settings +# such as SOURCE_BROWSER. + +LATEX_SOURCE_CODE = NO + +# The LATEX_BIB_STYLE tag can be used to specify the style to use for the +# bibliography, e.g. plainnat, or ieeetr. The default style is "plain". See +# http://en.wikipedia.org/wiki/BibTeX for more info. + +LATEX_BIB_STYLE = plain + +#--------------------------------------------------------------------------- +# configuration options related to the RTF output +#--------------------------------------------------------------------------- + +# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output +# The RTF output is optimized for Word 97 and may not look very pretty with +# other RTF readers or editors. + +GENERATE_RTF = NO + +# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `rtf' will be used as the default path. + +RTF_OUTPUT = rtf + +# If the COMPACT_RTF tag is set to YES Doxygen generates more compact +# RTF documents. This may be useful for small projects and may help to +# save some trees in general. + +COMPACT_RTF = NO + +# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated +# will contain hyperlink fields. The RTF file will +# contain links (just like the HTML output) instead of page references. +# This makes the output suitable for online browsing using WORD or other +# programs which support those fields. +# Note: wordpad (write) and others do not support links. + +RTF_HYPERLINKS = NO + +# Load style sheet definitions from file. Syntax is similar to doxygen's +# config file, i.e. a series of assignments. You only have to provide +# replacements, missing definitions are set to their default value. + +RTF_STYLESHEET_FILE = + +# Set optional variables used in the generation of an rtf document. +# Syntax is similar to doxygen's config file. + +RTF_EXTENSIONS_FILE = + +#--------------------------------------------------------------------------- +# configuration options related to the man page output +#--------------------------------------------------------------------------- + +# If the GENERATE_MAN tag is set to YES (the default) Doxygen will +# generate man pages + +GENERATE_MAN = YES + +# The MAN_OUTPUT tag is used to specify where the man pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `man' will be used as the default path. + +MAN_OUTPUT = man + +# The MAN_EXTENSION tag determines the extension that is added to +# the generated man pages (default is the subroutine's section .3) + +MAN_EXTENSION = .3 + +# If the MAN_LINKS tag is set to YES and Doxygen generates man output, +# then it will generate one additional man file for each entity +# documented in the real man page(s). These additional files +# only source the real man page, but without them the man command +# would be unable to find the correct page. The default is NO. + +MAN_LINKS = YES + +#--------------------------------------------------------------------------- +# configuration options related to the XML output +#--------------------------------------------------------------------------- + +# If the GENERATE_XML tag is set to YES Doxygen will +# generate an XML file that captures the structure of +# the code including all documentation. + +GENERATE_XML = NO + +# The XML_OUTPUT tag is used to specify where the XML pages will be put. +# If a relative path is entered the value of OUTPUT_DIRECTORY will be +# put in front of it. If left blank `xml' will be used as the default path. + +XML_OUTPUT = xml + +# The XML_SCHEMA tag can be used to specify an XML schema, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_SCHEMA = + +# The XML_DTD tag can be used to specify an XML DTD, +# which can be used by a validating XML parser to check the +# syntax of the XML files. + +XML_DTD = + +# If the XML_PROGRAMLISTING tag is set to YES Doxygen will +# dump the program listings (including syntax highlighting +# and cross-referencing information) to the XML output. Note that +# enabling this will significantly increase the size of the XML output. + +XML_PROGRAMLISTING = YES + +#--------------------------------------------------------------------------- +# configuration options for the AutoGen Definitions output +#--------------------------------------------------------------------------- + +# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will +# generate an AutoGen Definitions (see autogen.sf.net) file +# that captures the structure of the code including all +# documentation. Note that this feature is still experimental +# and incomplete at the moment. + +GENERATE_AUTOGEN_DEF = NO + +#--------------------------------------------------------------------------- +# configuration options related to the Perl module output +#--------------------------------------------------------------------------- + +# If the GENERATE_PERLMOD tag is set to YES Doxygen will +# generate a Perl module file that captures the structure of +# the code including all documentation. Note that this +# feature is still experimental and incomplete at the +# moment. + +GENERATE_PERLMOD = NO + +# If the PERLMOD_LATEX tag is set to YES Doxygen will generate +# the necessary Makefile rules, Perl scripts and LaTeX code to be able +# to generate PDF and DVI output from the Perl module output. + +PERLMOD_LATEX = NO + +# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be +# nicely formatted so it can be parsed by a human reader. This is useful +# if you want to understand what is going on. On the other hand, if this +# tag is set to NO the size of the Perl module output will be much smaller +# and Perl will parse it just the same. + +PERLMOD_PRETTY = YES + +# The names of the make variables in the generated doxyrules.make file +# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. +# This is useful so different doxyrules.make files included by the same +# Makefile don't overwrite each other's variables. + +PERLMOD_MAKEVAR_PREFIX = + +#--------------------------------------------------------------------------- +# Configuration options related to the preprocessor +#--------------------------------------------------------------------------- + +# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will +# evaluate all C-preprocessor directives found in the sources and include +# files. + +ENABLE_PREPROCESSING = YES + +# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro +# names in the source code. If set to NO (the default) only conditional +# compilation will be performed. Macro expansion can be done in a controlled +# way by setting EXPAND_ONLY_PREDEF to YES. + +MACRO_EXPANSION = NO + +# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES +# then the macro expansion is limited to the macros specified with the +# PREDEFINED and EXPAND_AS_DEFINED tags. + +EXPAND_ONLY_PREDEF = NO + +# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files +# pointed to by INCLUDE_PATH will be searched when a #include is found. + +SEARCH_INCLUDES = YES + +# The INCLUDE_PATH tag can be used to specify one or more directories that +# contain include files that are not input files but should be processed by +# the preprocessor. + +INCLUDE_PATH = + +# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard +# patterns (like *.h and *.hpp) to filter out the header-files in the +# directories. If left blank, the patterns specified with FILE_PATTERNS will +# be used. + +INCLUDE_FILE_PATTERNS = + +# The PREDEFINED tag can be used to specify one or more macro names that +# are defined before the preprocessor is started (similar to the -D option of +# gcc). The argument of the tag is a list of macros of the form: name +# or name=definition (no spaces). If the definition and the = are +# omitted =1 is assumed. To prevent a macro definition from being +# undefined via #undef or recursively expanded use the := operator +# instead of the = operator. + +PREDEFINED = + +# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then +# this tag can be used to specify a list of macro names that should be expanded. +# The macro definition that is found in the sources will be used. +# Use the PREDEFINED tag if you want to use a different macro definition that +# overrules the definition found in the source code. + +EXPAND_AS_DEFINED = + +# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then +# doxygen's preprocessor will remove all references to function-like macros +# that are alone on a line, have an all uppercase name, and do not end with a +# semicolon, because these will confuse the parser if not removed. + +SKIP_FUNCTION_MACROS = YES + +#--------------------------------------------------------------------------- +# Configuration::additions related to external references +#--------------------------------------------------------------------------- + +# The TAGFILES option can be used to specify one or more tagfiles. For each +# tag file the location of the external documentation should be added. The +# format of a tag file without this location is as follows: +# TAGFILES = file1 file2 ... +# Adding location for the tag files is done as follows: +# TAGFILES = file1=loc1 "file2 = loc2" ... +# where "loc1" and "loc2" can be relative or absolute paths +# or URLs. Note that each tag file must have a unique name (where the name does +# NOT include the path). If a tag file is not located in the directory in which +# doxygen is run, you must also specify the path to the tagfile here. + +TAGFILES = + +# When a file name is specified after GENERATE_TAGFILE, doxygen will create +# a tag file that is based on the input files it reads. + +GENERATE_TAGFILE = + +# If the ALLEXTERNALS tag is set to YES all external classes will be listed +# in the class index. If set to NO only the inherited external classes +# will be listed. + +ALLEXTERNALS = NO + +# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed +# in the modules index. If set to NO, only the current project's groups will +# be listed. + +EXTERNAL_GROUPS = YES + +# The PERL_PATH should be the absolute path and name of the perl script +# interpreter (i.e. the result of `which perl'). + +PERL_PATH = /usr/bin/perl + +#--------------------------------------------------------------------------- +# Configuration options related to the dot tool +#--------------------------------------------------------------------------- + +# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will +# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base +# or super classes. Setting the tag to NO turns the diagrams off. Note that +# this option also works with HAVE_DOT disabled, but it is recommended to +# install and use dot, since it yields more powerful graphs. + +CLASS_DIAGRAMS = YES + +# You can define message sequence charts within doxygen comments using the \msc +# command. Doxygen will then run the mscgen tool (see +# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the +# documentation. The MSCGEN_PATH tag allows you to specify the directory where +# the mscgen tool resides. If left empty the tool is assumed to be found in the +# default search path. + +MSCGEN_PATH = + +# If set to YES, the inheritance and collaboration graphs will hide +# inheritance and usage relations if the target is undocumented +# or is not a class. + +HIDE_UNDOC_RELATIONS = YES + +# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is +# available from the path. This tool is part of Graphviz, a graph visualization +# toolkit from AT&T and Lucent Bell Labs. The other options in this section +# have no effect if this option is set to NO (the default) + +HAVE_DOT = NO + +# The DOT_NUM_THREADS specifies the number of dot invocations doxygen is +# allowed to run in parallel. When set to 0 (the default) doxygen will +# base this on the number of processors available in the system. You can set it +# explicitly to a value larger than 0 to get control over the balance +# between CPU load and processing speed. + +DOT_NUM_THREADS = 0 + +# By default doxygen will use the Helvetica font for all dot files that +# doxygen generates. When you want a differently looking font you can specify +# the font name using DOT_FONTNAME. You need to make sure dot is able to find +# the font, which can be done by putting it in a standard location or by setting +# the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. + +DOT_FONTNAME = Helvetica + +# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. +# The default size is 10pt. + +DOT_FONTSIZE = 10 + +# By default doxygen will tell dot to use the Helvetica font. +# If you specify a different font using DOT_FONTNAME you can use DOT_FONTPATH to +# set the path where dot can find it. + +DOT_FONTPATH = + +# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect inheritance relations. Setting this tag to YES will force the +# CLASS_DIAGRAMS tag to NO. + +CLASS_GRAPH = YES + +# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for each documented class showing the direct and +# indirect implementation dependencies (inheritance, containment, and +# class references variables) of the class with other documented classes. + +COLLABORATION_GRAPH = YES + +# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen +# will generate a graph for groups, showing the direct groups dependencies + +GROUP_GRAPHS = YES + +# If the UML_LOOK tag is set to YES doxygen will generate inheritance and +# collaboration diagrams in a style similar to the OMG's Unified Modeling +# Language. + +UML_LOOK = NO + +# If the UML_LOOK tag is enabled, the fields and methods are shown inside +# the class node. If there are many fields or methods and many nodes the +# graph may become too big to be useful. The UML_LIMIT_NUM_FIELDS +# threshold limits the number of items for each type to make the size more +# managable. Set this to 0 for no limit. Note that the threshold may be +# exceeded by 50% before the limit is enforced. + +UML_LIMIT_NUM_FIELDS = 10 + +# If set to YES, the inheritance and collaboration graphs will show the +# relations between templates and their instances. + +TEMPLATE_RELATIONS = NO + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT +# tags are set to YES then doxygen will generate a graph for each documented +# file showing the direct and indirect include dependencies of the file with +# other documented files. + +INCLUDE_GRAPH = YES + +# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and +# HAVE_DOT tags are set to YES then doxygen will generate a graph for each +# documented header file showing the documented files that directly or +# indirectly include this file. + +INCLUDED_BY_GRAPH = YES + +# If the CALL_GRAPH and HAVE_DOT options are set to YES then +# doxygen will generate a call dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable call graphs +# for selected functions only using the \callgraph command. + +CALL_GRAPH = NO + +# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then +# doxygen will generate a caller dependency graph for every global function +# or class method. Note that enabling this option will significantly increase +# the time of a run. So in most cases it will be better to enable caller +# graphs for selected functions only using the \callergraph command. + +CALLER_GRAPH = NO + +# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen +# will generate a graphical hierarchy of all classes instead of a textual one. + +GRAPHICAL_HIERARCHY = YES + +# If the DIRECTORY_GRAPH and HAVE_DOT tags are set to YES +# then doxygen will show the dependencies a directory has on other directories +# in a graphical way. The dependency relations are determined by the #include +# relations between the files in the directories. + +DIRECTORY_GRAPH = YES + +# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images +# generated by dot. Possible values are svg, png, jpg, or gif. +# If left blank png will be used. If you choose svg you need to set +# HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible in IE 9+ (other browsers do not have this requirement). + +DOT_IMAGE_FORMAT = png + +# If DOT_IMAGE_FORMAT is set to svg, then this option can be set to YES to +# enable generation of interactive SVG images that allow zooming and panning. +# Note that this requires a modern browser other than Internet Explorer. +# Tested and working are Firefox, Chrome, Safari, and Opera. For IE 9+ you +# need to set HTML_FILE_EXTENSION to xhtml in order to make the SVG files +# visible. Older versions of IE do not have SVG support. + +INTERACTIVE_SVG = NO + +# The tag DOT_PATH can be used to specify the path where the dot tool can be +# found. If left blank, it is assumed the dot tool can be found in the path. + +DOT_PATH = + +# The DOTFILE_DIRS tag can be used to specify one or more directories that +# contain dot files that are included in the documentation (see the +# \dotfile command). + +DOTFILE_DIRS = + +# The MSCFILE_DIRS tag can be used to specify one or more directories that +# contain msc files that are included in the documentation (see the +# \mscfile command). + +MSCFILE_DIRS = + +# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of +# nodes that will be shown in the graph. If the number of nodes in a graph +# becomes larger than this value, doxygen will truncate the graph, which is +# visualized by representing a node as a red box. Note that doxygen if the +# number of direct children of the root node in a graph is already larger than +# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note +# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. + +DOT_GRAPH_MAX_NODES = 50 + +# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the +# graphs generated by dot. A depth value of 3 means that only nodes reachable +# from the root by following a path via at most 3 edges will be shown. Nodes +# that lay further from the root node will be omitted. Note that setting this +# option to 1 or 2 may greatly reduce the computation time needed for large +# code bases. Also note that the size of a graph can be further restricted by +# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. + +MAX_DOT_GRAPH_DEPTH = 0 + +# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent +# background. This is disabled by default, because dot on Windows does not +# seem to support this out of the box. Warning: Depending on the platform used, +# enabling this option may lead to badly anti-aliased labels on the edges of +# a graph (i.e. they become hard to read). + +DOT_TRANSPARENT = NO + +# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output +# files in one run (i.e. multiple -o and -T options on the command line). This +# makes dot run faster, but since only newer versions of dot (>1.8.10) +# support this, this feature is disabled by default. + +DOT_MULTI_TARGETS = NO + +# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will +# generate a legend page explaining the meaning of the various boxes and +# arrows in the dot generated graphs. + +GENERATE_LEGEND = YES + +# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will +# remove the intermediate dot files that are used to generate +# the various graphs. + +DOT_CLEANUP = YES diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..7bffb4c --- /dev/null +++ b/HACKING @@ -0,0 +1,52 @@ +Privileged arguments +==================== + +As a matter of convenience, there are several identifiers that +internal macros use. Chances are that if you use these names for other +things, you're gonna have a bad time. + +In particular, these names, and the macros that use them, are: +- state: + Used by a_new and company. Should be an HParseState* +- mm__: + Used by h_new and h_free. Should be an HAllocator* + +Function suffixes +================= + +Many functions come in several variants, to handle receiving optional +parameters or parameters in multiple different forms. For example, +often, you have a global memory manager that is used for an entire +program. In this case, you can leave off the memory manager arguments +off, letting them be implicit instead. Further, it is often convenient +to pass an array or va_list to a function instead of listing the +arguments inline (eg, for wrapping a function, generating the +arguments programattically, or writing bindings for another language. + +Because we have found that most variants fall into a fairly small set +of forms, and to minimize the amount of API calls that users need to +remember, there is a consistent naming scheme for these function +variants: the function name is followed by two underscores and a set +of single-character "flags" indicating what optional features that +particular variant has (in alphabetical order, of course): + + __a: takes variadic arguments as a void*[] (not implemented yet, but will be soon. + __m: takes a memory manager as the first argument, to override the system memory manager. + __v: Takes the variadic argument list as a va_list + + +Memory managers +=============== + +If the __m function variants are used or system_allocator is +overridden, there come some difficult questions to answer, +particularly regarding the behavior when multiple memory managers are +combined. As a general rule of thumb (exceptions will be explicitly +documented), assume that + + If you have a function f, which is passed a memory manager m and + returns a value r, any function that uses r as a parameter must + also be told to use m as a memory manager. + +In other words, don't let the (memory manager) streams cross. + diff --git a/Makefile b/Makefile index 08ce4e1..bd383a2 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,10 @@ SUBDIRS = src examples +include config.mk + +CONFIG_VARS= INCLUDE_TESTS + .DEFAULT_GOAL := all %: @@ -25,3 +29,6 @@ $(foreach dir,$(SUBDIRS),$(eval $(call SUBDIR_TEMPLATE,$(dir)))) TAGS: $(shell find * -name "*.c") etags $^ + +config: + @printf "%30s %s\n" $(foreach var,$(CONFIG_VARS),$(var) $($(var)) ) diff --git a/NOTES b/NOTES index 84b8c46..77d899d 100644 --- a/NOTES +++ b/NOTES @@ -35,4 +35,3 @@ what the comments say. TODO: implement datastructure linearization func TODO: implement free func for parsers -TODO: Remove glib dependency (i.e., GQueue and GHashtable) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..e008b12 --- /dev/null +++ b/README.md @@ -0,0 +1,58 @@ +Hammer is a parsing library. Like many modern parsing libraries, it provides a parser combinator interface for writing grammars as inline domain-specific languages, but Hammer also provides a variety of parsing backends. It's also bit-oriented rather than character-oriented, making it ideal for parsing binary data such as images, network packets, audio, and executables. + +Hammer is written in C, but will provide bindings for other languages. If you don't see a language you're interested in on the list, just ask. + +Hammer currently builds under Linux. (Windows and OSX are coming.) + +Features +======== +* Bit-oriented -- grammars can include single-bit flags or multi-bit constructs that span character boundaries, with no hassle +* Thread-safe, reentrant +* Benchmarking for parsing backends -- determine empirically which backend will be most time-efficient for your grammar +* Parsing backends: + * Packrat parsing + * LL(k) (not yet implemented) + * GLR (not yet implemented) + * LALR(8) (not yet implemented) + * Regular expressions (not yet implemented) +* Language bindings: (not yet implemented) + * C++ + * Java + * Python + * Ruby + * Perl + * Go + * PHP + * .NET + +Installing +========== +### Prerequisites +* make + +### Optional Dependencies +* pkg-config (for `make test`) +* glib-2.0 (>= 2.29) (for `make test`) +* glib-2.0-dev (for `make test`) + +To install, type `make`. To run the built-in test suite, type `make test`. + +There is not currently a `make install` target; to make Hammer available system-wide, copy `libhammer.a` to `/usr/lib/` (or `/usr/local/lib/`, or wherever ld will find it) and `hammer.h` to `/usr/include/`. + +Usage +===== +Just `#include ` and link with `-lhammer`. + +Examples +======== +The `examples/` directory contains some simple examples, currently including: +* base64 +* DNS + +Community +========= +Please join us at `#hammer` on `irc.upstandinghackers.com` if you have any questions or just want to talk about parsing. + +Contact +======= +You can also email us at . diff --git a/TODO b/TODO new file mode 100644 index 0000000..10ad174 --- /dev/null +++ b/TODO @@ -0,0 +1,3 @@ +- Make h_action functions be called only after parse is complete. +- Allow alternative input streams (eg, zlib, base64) + - Bonus points if layered... \ No newline at end of file diff --git a/common.mk b/common.mk index 4f70745..a57429d 100644 --- a/common.mk +++ b/common.mk @@ -1,17 +1,24 @@ -CFLAGS := $(shell pkg-config --cflags glib-2.0) -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes -LDFLAGS := $(shell pkg-config --libs glib-2.0) -CC ?= gcc -$(info CC=$(CC)) -# Set V=1 for verbose mode... -V ?= 0 -CFLAGS += -DINCLUDE_TESTS $(EXTRA_CFLAGS) -HUSH = $(TOPLEVEL)/lib/hush - # Check to make sure variables are properly set ifeq ($(TOPLEVEL),) $(error $$TOPLEVEL is unset) endif +include $(TOPLEVEL)/config.mk + +TEST_CFLAGS = $(shell pkg-config --cflags glib-2.0) -DINCLUDE_TESTS +TEST_LDFLAGS = $(shell pkg-config --libs glib-2.0) + +CFLAGS := -std=gnu99 -Wall -Wextra -Werror -Wno-unused-parameter -Wno-attributes +LDFLAGS := + +CC ?= gcc +$(info CC=$(CC)) +# Set V=1 for verbose mode... +V ?= 0 +CFLAGS += $(EXTRA_CFLAGS) +HUSH = $(TOPLEVEL)/lib/hush + + ifsilent = $(if $(findstring 0, $(V)),$(1),) hush = $(call ifsilent,$(HUSH) $(1)) #.SUFFIXES: diff --git a/config.mk b/config.mk new file mode 100644 index 0000000..5161bda --- /dev/null +++ b/config.mk @@ -0,0 +1 @@ +INCLUDE_TESTS = 0 diff --git a/examples/Makefile b/examples/Makefile index dc4a0f3..663a214 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -1,20 +1,41 @@ OUTPUTS := dns.o \ - dns + dns \ + base64.o \ + base64 \ + base64_sem1.o \ + base64_sem1 \ + base64_sem2.o \ + base64_sem2 TOPLEVEL := ../ include ../common.mk +CFLAGS += $(pkg-config --cflags glib-2.0) +LDFLAGS += $(pkg-config --libs glib-2.0) -all: dns + +all: dns base64 base64_sem1 base64_sem2 dns: LDFLAGS:=-L../src -lhammer $(LDFLAGS) dns: dns.o rr.o dns_common.o $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -dns.o: ../src/hammer.h dns_common.h +dns.o: ../src/hammer.h dns_common.h ../src/glue.h +rr.o: ../src/hammer.h rr.h dns_common.h ../src/glue.h +dns_common.o: ../src/hammer.h dns_common.h ../src/glue.h -rr.o: ../src/hammer.h rr.h dns_common.h +base64: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64: base64.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) -dns_common.o: ../src/hammer.h dns_common.h \ No newline at end of file +base64_sem1: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem1: base64_sem1.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64_sem2: LDFLAGS:=-L../src -lhammer $(LDFLAGS) +base64_sem2: base64_sem2.o + $(call hush, "Linking $@") $(CC) -o $@ $^ $(LDFLAGS) + +base64%.o: ../src/hammer.h ../src/glue.h diff --git a/examples/base64.c b/examples/base64.c new file mode 100644 index 0000000..ee142e3 --- /dev/null +++ b/examples/base64.c @@ -0,0 +1,63 @@ +// Example parser: Base64, syntax only. +// +// Demonstrates how to construct a Hammer parser that recognizes valid Base64 +// sequences. +// +// Note that no semantic evaluation of the sequence is performed, i.e. the +// byte sequence being represented is not returned, or determined. See +// base64_sem1.c and base64_sem2.c for examples how to attach appropriate +// semantic actions to the grammar. + +#include "../src/hammer.h" + +const HParser* document = NULL; + +void init_parser(void) +{ + // CORE + const HParser *digit = h_ch_range(0x30, 0x39); + const HParser *alpha = h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL); + + // AUX. + const HParser *plus = h_ch('+'); + const HParser *slash = h_ch('/'); + const HParser *equals = h_ch('='); + + const HParser *bsfdig = h_choice(alpha, digit, plus, slash, NULL); + const HParser *bsfdig_4bit = h_in((uint8_t *)"AEIMQUYcgkosw048", 16); + const HParser *bsfdig_2bit = h_in((uint8_t *)"AQgw", 4); + const HParser *base64_3 = h_repeat_n(bsfdig, 4); + const HParser *base64_2 = h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL); + const HParser *base64_1 = h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL); + const HParser *base64 = h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL); + + document = h_sequence(h_whitespace(base64), h_whitespace(h_end_p()), NULL); +} + + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParseResult *result; + + init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(document, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} diff --git a/examples/base64_sem1.c b/examples/base64_sem1.c new file mode 100644 index 0000000..f2a3e82 --- /dev/null +++ b/examples/base64_sem1.c @@ -0,0 +1,172 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to grammar rules and piece by +// piece transform the parse tree into the desired semantic representation, +// in this case a sequence of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses fine-grained semantic actions that +// transform the parse tree in small steps in a bottom-up fashion. Compare +// base64_sem2.c for an alternative approach using a single top-level action. + +#include "../src/hammer.h" +#include "../src/glue.h" +#include + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_. +/// + +const HParsedToken *act_bsfdig(const HParseResult *p) +{ + HParsedToken *res = H_MAKE_UINT(0); + + uint8_t c = H_CAST_UINT(p->ast); + + if(c >= 0x40 && c <= 0x5A) // A-Z + res->uint = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + res->uint = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + res->uint = c - 0x30 + 52; + else if(c == '+') + res->uint = 62; + else if(c == '/') + res->uint = 63; + + return res; +} + +H_ACT_APPLY(act_index0, h_act_index, 0); + +#define act_bsfdig_4bit act_bsfdig +#define act_bsfdig_2bit act_bsfdig + +#define act_equals h_act_ignore +#define act_ws h_act_ignore + +#define act_document act_index0 + +// General-form action to turn a block of base64 digits into bytes. +const HParsedToken *act_base64_n(int n, const HParseResult *p) +{ + HParsedToken *res = H_MAKE_SEQN(n); + + HParsedToken **digits = h_seq_elements(p->ast); + + uint32_t x = 0; + int bits = 0; + for(int i=0; iuint; + bits += 6; + } + x >>= bits%8; // align, i.e. cut off extra bits + + for(int i=0; iseq->elements[n-1-i] = item; // output the last byte and + x >>= 8; // discard it + } + res->seq->used = n; + + return res; +} + +H_ACT_APPLY(act_base64_3, act_base64_n, 3); +H_ACT_APPLY(act_base64_2, act_base64_n, 2); +H_ACT_APPLY(act_base64_1, act_base64_n, 1); + +const HParsedToken *act_base64(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + HParsedToken *res = H_MAKE_SEQ(); + + // concatenate base64_3 blocks + HCountedArray *seq = H_FIELD_SEQ(0); + for(size_t i=0; iused; i++) + h_seq_append(res, seq->elements[i]); + + // append one trailing base64_2 or _1 block + const HParsedToken *tok = h_seq_index(p->ast, 1); + if(tok->token_type == TT_SEQUENCE) + h_seq_append(res, tok); + + return res; +} + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_ARULE(equals, h_ch('=')); + + H_ARULE(bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_ARULE(bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_ARULE(bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_ARULE(base64_3, h_repeat_n(bsfdig, 4)); + H_ARULE(base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_ARULE(base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_ARULE(base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} diff --git a/examples/base64_sem2.c b/examples/base64_sem2.c new file mode 100644 index 0000000..32afe5b --- /dev/null +++ b/examples/base64_sem2.c @@ -0,0 +1,176 @@ +// Example parser: Base64, with fine-grained semantic actions +// +// Demonstrates how to attach semantic actions to a grammar and transform the +// parse tree into the desired semantic representation, in this case a sequence +// of 8-bit values. +// +// Note how the grammar is defined by using the macros H_RULE and H_ARULE. +// Those rules using ARULE get an attached action which must be declared (as +// a function of type HAction) with a standard name based on the rule name. +// +// This variant of the example uses coarse-grained semantic actions, +// transforming the entire parse tree in one big step. Compare base64_sem1.c +// for an alternative approach using a fine-grained piece-by-piece +// transformation. + +#include "../src/hammer.h" +#include "../src/glue.h" +#include + + +/// +// Semantic actions for the grammar below, each corresponds to an "ARULE". +// They must be named act_. +/// + +// helper: return the numeric value of a parsed base64 digit +uint8_t bsfdig_value(const HParsedToken *p) +{ + uint8_t value = 0; + + if(p && p->token_type == TT_UINT) { + uint8_t c = p->uint; + if(c >= 0x40 && c <= 0x5A) // A-Z + value = c - 0x41; + else if(c >= 0x60 && c <= 0x7A) // a-z + value = c - 0x61 + 26; + else if(c >= 0x30 && c <= 0x39) // 0-9 + value = c - 0x30 + 52; + else if(c == '+') + value = 62; + else if(c == '/') + value = 63; + } + + return value; +} + +// helper: append a byte value to a sequence +#define seq_append_byte(res, b) h_seq_snoc(res, H_MAKE_UINT(b)) + +const HParsedToken *act_base64(const HParseResult *p) +{ + assert(p->ast->token_type == TT_SEQUENCE); + assert(p->ast->seq->used == 2); + assert(p->ast->seq->elements[0]->token_type == TT_SEQUENCE); + + // grab b64_3 block sequence + // grab and analyze b64 end block (_2 or _1) + const HParsedToken *b64_3 = p->ast->seq->elements[0]; + const HParsedToken *b64_2 = p->ast->seq->elements[1]; + const HParsedToken *b64_1 = p->ast->seq->elements[1]; + + if(b64_2->token_type != TT_SEQUENCE) + b64_1 = b64_2 = NULL; + else if(b64_2->seq->elements[2]->uint == '=') + b64_2 = NULL; + else + b64_1 = NULL; + + // allocate result sequence + HParsedToken *res = H_MAKE_SEQ(); + + // concatenate base64_3 blocks + for(size_t i=0; iseq->used; i++) { + assert(b64_3->seq->elements[i]->token_type == TT_SEQUENCE); + HParsedToken **digits = b64_3->seq->elements[i]->seq->elements; + + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + x <<= 6; x |= bsfdig_value(digits[3]); + seq_append_byte(res, (x >> 16) & 0xFF); + seq_append_byte(res, (x >> 8) & 0xFF); + seq_append_byte(res, x & 0xFF); + } + + // append one trailing base64_2 or _1 block + if(b64_2) { + HParsedToken **digits = b64_2->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + x <<= 6; x |= bsfdig_value(digits[2]); + seq_append_byte(res, (x >> 10) & 0xFF); + seq_append_byte(res, (x >> 2) & 0xFF); + } else if(b64_1) { + HParsedToken **digits = b64_1->seq->elements; + uint32_t x = bsfdig_value(digits[0]); + x <<= 6; x |= bsfdig_value(digits[1]); + seq_append_byte(res, (x >> 4) & 0xFF); + } + + return res; +} + +H_ACT_APPLY(act_index0, h_act_index, 0); + +#define act_ws h_act_ignore +#define act_document act_index0 + + +/// +// Set up the parser with the grammar to be recognized. +/// + +const HParser *init_parser(void) +{ + // CORE + H_RULE (digit, h_ch_range(0x30, 0x39)); + H_RULE (alpha, h_choice(h_ch_range(0x41, 0x5a), h_ch_range(0x61, 0x7a), NULL)); + H_RULE (space, h_in((uint8_t *)" \t\n\r\f\v", 6)); + + // AUX. + H_RULE (plus, h_ch('+')); + H_RULE (slash, h_ch('/')); + H_RULE (equals, h_ch('=')); + + H_RULE (bsfdig, h_choice(alpha, digit, plus, slash, NULL)); + H_RULE (bsfdig_4bit, h_in((uint8_t *)"AEIMQUYcgkosw048", 16)); + H_RULE (bsfdig_2bit, h_in((uint8_t *)"AQgw", 4)); + H_RULE (base64_3, h_repeat_n(bsfdig, 4)); + H_RULE (base64_2, h_sequence(bsfdig, bsfdig, bsfdig_4bit, equals, NULL)); + H_RULE (base64_1, h_sequence(bsfdig, bsfdig_2bit, equals, equals, NULL)); + H_ARULE(base64, h_sequence(h_many(base64_3), + h_optional(h_choice(base64_2, + base64_1, NULL)), + NULL)); + + H_ARULE(ws, h_many(space)); + H_ARULE(document, h_sequence(ws, base64, ws, h_end_p(), NULL)); + + // BUG sometimes inputs that should just don't parse. + // It *seemed* to happen mostly with things like "bbbbaaaaBA==". + // Using less actions seemed to make it less likely. + + return document; +} + + +/// +// Main routine: print input, parse, print result, return success/failure. +/// + +#include + +int main(int argc, char **argv) +{ + uint8_t input[102400]; + size_t inputsize; + const HParser *parser; + const HParseResult *result; + + parser = init_parser(); + + inputsize = fread(input, 1, sizeof(input), stdin); + fprintf(stderr, "inputsize=%lu\ninput=", inputsize); + fwrite(input, 1, inputsize, stderr); + result = h_parse(parser, input, inputsize); + + if(result) { + fprintf(stderr, "parsed=%lld bytes\n", result->bit_length/8); + h_pprint(stdout, result->ast, 0, 0); + return 0; + } else { + return 1; + } +} diff --git a/examples/dns.c b/examples/dns.c index 54d9c7e..7887ba6 100644 --- a/examples/dns.c +++ b/examples/dns.c @@ -10,7 +10,12 @@ #define false 0 #define true 1 -bool is_zero(HParseResult *p) { + +/// +// Validations +/// + +bool validate_hdzero(HParseResult *p) { if (TT_UINT != p->ast->token_type) return false; return (0 == p->ast->uint); @@ -20,407 +25,243 @@ bool is_zero(HParseResult *p) { * Every DNS message should have QDCOUNT entries in the question * section, and ANCOUNT+NSCOUNT+ARCOUNT resource records. */ -bool validate_dns(HParseResult *p) { +bool validate_message(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) return false; - // The header holds the counts as its last 4 elements. - HParsedToken **elems = p->ast->seq->elements[0]->seq->elements; - size_t qd = elems[8]->uint; - size_t an = elems[9]->uint; - size_t ns = elems[10]->uint; - size_t ar = elems[11]->uint; - HParsedToken *questions = p->ast->seq->elements[1]; - if (questions->seq->used != qd) + + dns_header_t *header = H_FIELD(dns_header_t, 0); + size_t qd = header->question_count; + size_t an = header->answer_count; + size_t ns = header->authority_count; + size_t ar = header->additional_count; + + if (H_FIELD_SEQ(1)->used != qd) return false; - HParsedToken *rrs = p->ast->seq->elements[2]; - if (an+ns+ar != rrs->seq->used) + if (an+ns+ar != H_FIELD_SEQ(2)->used) return false; + return true; } -struct dns_qname get_qname(const HParsedToken *t) { - // The qname parser parses at least 1 length-value pair, then a NULL. - // So, t->seq->elements[0] is a sequence of at least 1 such pair, - // and t->seq->elements[1] is the null. - const HParsedToken *labels = t->seq->elements[0]; - struct dns_qname ret = { - .qlen = labels->seq->used, - .labels = h_arena_malloc(t->seq->arena, sizeof(*ret.labels)*labels->seq->used) - }; - // i is which label we're on - for (size_t i=0; iseq->used; ++i) { - ret.labels[i].len = labels->seq->elements[i]->seq->used; - ret.labels[i].label = h_arena_malloc(t->seq->arena, ret.labels[i].len + 1); - // j is which char of the label we're on - for (size_t j=0; jseq->elements[i]->seq->elements[j]->uint; - ret.labels[i].label[ret.labels[i].len] = 0; - } - return ret; -} -char* get_domain(const HParsedToken *t) { - switch(t->token_type) { - case TT_UINT: - return " "; - case TT_SEQUENCE: - { - // Sequence of subdomains separated by "." - // Each subdomain is a label, which can be no more than 63 chars. - char *ret = h_arena_malloc(t->seq->arena, 64*t->seq->used); - size_t count = 0; - for (size_t i=0; iseq->used; ++i) { - HParsedToken *tmp = t->seq->elements[i]; - for (size_t j=0; jseq->used; ++j) { - ret[count] = tmp->seq->elements[i]->uint; - ++count; - } - ret[count] = '.'; - ++count; - } - ret[count-1] = '\x00'; - return ret; - } - default: - return NULL; - } -} +/// +// Semantic Actions +/// -uint8_t* get_cs(const HCountedArray *arr) { - uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); - for (size_t i=0; iused; ++i) - ret[i] = arr->elements[i]->uint; - return ret; -} - -uint8_t** get_txt(const HCountedArray *arr) { - uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); - for (size_t i=0; iused; ++i) { - uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->elements[i]->seq->used); - for (size_t j=0; jelements[i]->seq->used; ++j) - tmp[j] = arr->elements[i]->seq->elements[j]->uint; - } - return ret; -} - -void set_rr(struct dns_rr rr, HCountedArray *rdata) { +// Helper: Parse and pack the RDATA field of a Resource Record. +void set_rdata(struct dns_rr *rr, HCountedArray *rdata) { uint8_t *data = h_arena_malloc(rdata->arena, sizeof(uint8_t)*rdata->used); for (size_t i=0; iused; ++i) - data[i] = rdata->elements[i]->uint; + data[i] = H_CAST_UINT(rdata->elements[i]); + + // Parse RDATA if possible. + const HParseResult *p = NULL; + const HParser *parser = init_rdata(rr->type); + if (parser) + p = h_parse(parser, (const uint8_t*)data, rdata->used); // If the RR doesn't parse, set its type to 0. - switch(rr.type) { - case 1: // A - { - const HParseResult *r = h_parse(init_a(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.a = r->ast->seq->elements[0]->uint; - break; - } - case 2: // NS - { - const HParseResult *r = h_parse(init_ns(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.ns = get_domain(r->ast->seq->elements[0]); - break; - } - case 3: // MD - { - const HParseResult *r = h_parse(init_md(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.md = get_domain(r->ast->seq->elements[0]); - break; - } - case 4: // MF - { - const HParseResult *r = h_parse(init_mf(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.md = get_domain(r->ast->seq->elements[0]); - break; - } - case 5: // CNAME - { - const HParseResult *r = h_parse(init_cname(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.cname = get_domain(r->ast->seq->elements[0]); - break; - } - case 6: // SOA - { - const HParseResult *r = h_parse(init_soa(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.soa.mname = get_domain(r->ast->seq->elements[0]); - rr.soa.rname = get_domain(r->ast->seq->elements[1]); - rr.soa.serial = r->ast->seq->elements[2]->uint; - rr.soa.refresh = r->ast->seq->elements[3]->uint; - rr.soa.retry = r->ast->seq->elements[4]->uint; - rr.soa.expire = r->ast->seq->elements[5]->uint; - rr.soa.minimum = r->ast->seq->elements[6]->uint; - } - break; - } - case 7: // MB - { - const HParseResult *r = h_parse(init_mb(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mb = get_domain(r->ast->seq->elements[0]); - break; - } - case 8: // MG - { - const HParseResult *r = h_parse(init_mg(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mg = get_domain(r->ast->seq->elements[0]); - break; - } - case 9: // MR - { - const HParseResult *r = h_parse(init_mr(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.mr = get_domain(r->ast->seq->elements[0]); - break; - } - case 10: // NULL - { - const HParseResult *r = h_parse(init_null(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.null = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->used); - for (size_t i=0; iast->seq->used; ++i) - rr.null[i] = r->ast->seq->elements[i]->uint; - } - break; - } - case 11: // WKS - { - const HParseResult *r = h_parse(init_wks(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.wks.address = r->ast->seq->elements[0]->uint; - rr.wks.protocol = r->ast->seq->elements[1]->uint; - rr.wks.len = r->ast->seq->elements[2]->seq->used; - rr.wks.bit_map = h_arena_malloc(rdata->arena, sizeof(uint8_t)*r->ast->seq->elements[2]->seq->used); - for (size_t i=0; iast->seq->elements[2]->seq->elements[i]->uint; - } - break; - } - case 12: // PTR - { - const HParseResult *r = h_parse(init_ptr(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else - rr.ptr = get_domain(r->ast->seq->elements[0]); - break; - } - case 13: // HINFO - { - const HParseResult *r = h_parse(init_hinfo(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.hinfo.cpu = get_cs(r->ast->seq->elements[0]->seq); - rr.hinfo.os = get_cs(r->ast->seq->elements[1]->seq); - } - break; - } - case 14: // MINFO - { - const HParseResult *r = h_parse(init_minfo(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.minfo.rmailbx = get_domain(r->ast->seq->elements[0]); - rr.minfo.emailbx = get_domain(r->ast->seq->elements[1]); - } - break; - } - case 15: // MX - { - const HParseResult *r = h_parse(init_mx(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.mx.preference = r->ast->seq->elements[0]->uint; - rr.mx.exchange = get_domain(r->ast->seq->elements[1]); - } - break; - } - case 16: // TXT - { - const HParseResult *r = h_parse(init_txt(), (const uint8_t*)data, rdata->used); - if (!r) - rr.type = 0; - else { - rr.txt.count = r->ast->seq->elements[0]->seq->used; - rr.txt.txt_data = get_txt(r->ast->seq->elements[0]->seq); - } - break; - } - default: - break; + if (!p) + rr->type = 0; + + // Pack the parsed rdata into rr. + switch(rr->type) { + case 1: rr->a = H_CAST_UINT(p->ast); break; + case 2: rr->ns = *H_CAST(dns_domain_t, p->ast); break; + case 3: rr->md = *H_CAST(dns_domain_t, p->ast); break; + case 4: rr->md = *H_CAST(dns_domain_t, p->ast); break; + case 5: rr->cname = *H_CAST(dns_domain_t, p->ast); break; + case 6: rr->soa = *H_CAST(dns_rr_soa_t, p->ast); break; + case 7: rr->mb = *H_CAST(dns_domain_t, p->ast); break; + case 8: rr->mg = *H_CAST(dns_domain_t, p->ast); break; + case 9: rr->mr = *H_CAST(dns_domain_t, p->ast); break; + case 10: rr->null = *H_CAST(dns_rr_null_t, p->ast); break; + case 11: rr->wks = *H_CAST(dns_rr_wks_t, p->ast); break; + case 12: rr->ptr = *H_CAST(dns_domain_t, p->ast); break; + case 13: rr->hinfo = *H_CAST(dns_rr_hinfo_t, p->ast); break; + case 14: rr->minfo = *H_CAST(dns_rr_minfo_t, p->ast); break; + case 15: rr->mx = *H_CAST(dns_rr_mx_t, p->ast); break; + case 16: rr->txt = *H_CAST(dns_rr_txt_t, p->ast); break; + default: break; } } -const HParsedToken* pack_dns_struct(const HParseResult *p) { - h_pprint(stdout, p->ast, 0, 2); - HParsedToken *ret = h_arena_malloc(p->arena, sizeof(HParsedToken)); - ret->token_type = TT_USER; - - dns_message_t *msg = h_arena_malloc(p->arena, sizeof(dns_message_t)); - - HParsedToken *hdr = p->ast->seq->elements[0]; - struct dns_header header = { - .id = hdr->seq->elements[0]->uint, - .qr = hdr->seq->elements[1]->uint, - .opcode = hdr->seq->elements[2]->uint, - .aa = hdr->seq->elements[3]->uint, - .tc = hdr->seq->elements[4]->uint, - .rd = hdr->seq->elements[5]->uint, - .ra = hdr->seq->elements[6]->uint, - .rcode = hdr->seq->elements[7]->uint, - .question_count = hdr->seq->elements[8]->uint, - .answer_count = hdr->seq->elements[9]->uint, - .authority_count = hdr->seq->elements[10]->uint, - .additional_count = hdr->seq->elements[11]->uint +const HParsedToken* act_header(const HParseResult *p) { + HParsedToken **fields = h_seq_elements(p->ast); + dns_header_t header_ = { + .id = H_CAST_UINT(fields[0]), + .qr = H_CAST_UINT(fields[1]), + .opcode = H_CAST_UINT(fields[2]), + .aa = H_CAST_UINT(fields[3]), + .tc = H_CAST_UINT(fields[4]), + .rd = H_CAST_UINT(fields[5]), + .ra = H_CAST_UINT(fields[6]), + .rcode = H_CAST_UINT(fields[7]), + .question_count = H_CAST_UINT(fields[8]), + .answer_count = H_CAST_UINT(fields[9]), + .authority_count = H_CAST_UINT(fields[10]), + .additional_count = H_CAST_UINT(fields[11]) }; - msg->header = header; - HParsedToken *qs = p->ast->seq->elements[1]; + dns_header_t *header = H_ALLOC(dns_header_t); + *header = header_; + + return H_MAKE(dns_header_t, header); +} + +const HParsedToken* act_label(const HParseResult *p) { + dns_label_t *r = H_ALLOC(dns_label_t); + + r->len = h_seq_len(p->ast); + r->label = h_arena_malloc(p->arena, r->len + 1); + for (size_t i=0; ilen; ++i) + r->label[i] = H_FIELD_UINT(i); + r->label[r->len] = 0; + + return H_MAKE(dns_label_t, r); +} + +const HParsedToken* act_rr(const HParseResult *p) { + dns_rr_t *rr = H_ALLOC(dns_rr_t); + + rr->name = *H_FIELD(dns_domain_t, 0); + rr->type = H_FIELD_UINT(1); + rr->class = H_FIELD_UINT(2); + rr->ttl = H_FIELD_UINT(3); + rr->rdlength = H_FIELD_SEQ(4)->used; + + // Parse and pack RDATA. + set_rdata(rr, H_FIELD_SEQ(4)); + + return H_MAKE(dns_rr_t, rr); +} + +const HParsedToken* act_question(const HParseResult *p) { + dns_question_t *q = H_ALLOC(dns_question_t); + HParsedToken **fields = h_seq_elements(p->ast); + + // QNAME is a sequence of labels. Pack them into an array. + q->qname.qlen = h_seq_len(fields[0]); + q->qname.labels = h_arena_malloc(p->arena, sizeof(dns_label_t)*q->qname.qlen); + for(size_t i=0; iqname.qlen; i++) { + q->qname.labels[i] = *H_INDEX(dns_label_t, fields[0], i); + } + + q->qtype = H_CAST_UINT(fields[1]); + q->qclass = H_CAST_UINT(fields[2]); + + return H_MAKE(dns_question_t, q); +} + +const HParsedToken* act_message(const HParseResult *p) { + h_pprint(stdout, p->ast, 0, 2); + dns_message_t *msg = H_ALLOC(dns_message_t); + + // Copy header into message struct. + dns_header_t *header = H_FIELD(dns_header_t, 0); + msg->header = *header; + + // Copy questions into message struct. + HParsedToken *qs = h_seq_index(p->ast, 1); struct dns_question *questions = h_arena_malloc(p->arena, - sizeof(struct dns_question)*(header.question_count)); - for (size_t i=0; iseq->elements[i]->seq->elements[0]); - questions[i].qtype = qs->seq->elements[i]->seq->elements[1]->uint; - questions[i].qclass = qs->seq->elements[i]->seq->elements[2]->uint; + sizeof(struct dns_question)*(header->question_count)); + for (size_t i=0; iquestion_count; ++i) { + questions[i] = *H_INDEX(dns_question_t, qs, i); } msg->questions = questions; - HParsedToken *rrs = p->ast->seq->elements[2]; + // Copy answer RRs into message struct. + HParsedToken *rrs = h_seq_index(p->ast, 2); struct dns_rr *answers = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.answer_count)); - for (size_t i=0; ielements[0]); - answers[i].type = rrs[i].seq->elements[1]->uint; - answers[i].class = rrs[i].seq->elements[2]->uint; - answers[i].ttl = rrs[i].seq->elements[3]->uint; - answers[i].rdlength = rrs[i].seq->elements[4]->seq->used; - set_rr(answers[i], rrs[i].seq->elements[4]->seq); + sizeof(struct dns_rr)*(header->answer_count)); + for (size_t i=0; ianswer_count; ++i) { + answers[i] = *H_INDEX(dns_rr_t, rrs, i); } msg->answers = answers; + // Copy authority RRs into message struct. struct dns_rr *authority = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.authority_count)); - for (size_t i=0, j=header.answer_count; ielements[0]); - authority[i].type = rrs[j].seq->elements[1]->uint; - authority[i].class = rrs[j].seq->elements[2]->uint; - authority[i].ttl = rrs[j].seq->elements[3]->uint; - authority[i].rdlength = rrs[j].seq->elements[4]->seq->used; - set_rr(authority[i], rrs[j].seq->elements[4]->seq); + sizeof(struct dns_rr)*(header->authority_count)); + for (size_t i=0, j=header->answer_count; iauthority_count; ++i, ++j) { + authority[i] = *H_INDEX(dns_rr_t, rrs, j); } msg->authority = authority; + // Copy additional RRs into message struct. struct dns_rr *additional = h_arena_malloc(p->arena, - sizeof(struct dns_rr)*(header.additional_count)); - for (size_t i=0, j=header.answer_count+header.authority_count; ielements[0]); - additional[i].type = rrs[j].seq->elements[1]->uint; - additional[i].class = rrs[j].seq->elements[2]->uint; - additional[i].ttl = rrs[j].seq->elements[3]->uint; - additional[i].rdlength = rrs[j].seq->elements[4]->seq->used; - set_rr(additional[i], rrs[j].seq->elements[4]->seq); + sizeof(struct dns_rr)*(header->additional_count)); + for (size_t i=0, j=header->answer_count+header->authority_count; iadditional_count; ++i, ++j) { + additional[i] = *H_INDEX(dns_rr_t, rrs, j); } msg->additional = additional; - ret->user = (void*)msg; + return H_MAKE(dns_message_t, msg); +} + +#define act_hdzero h_act_ignore +#define act_qname act_index0 + + +/// +// Grammar +/// + +const HParser* init_parser() { + static const HParser *ret = NULL; + if (ret) + return ret; + + H_RULE (domain, init_domain()); + H_AVRULE(hdzero, h_bits(3, false)); + H_ARULE (header, h_sequence(h_bits(16, false), // ID + h_bits(1, false), // QR + h_bits(4, false), // opcode + h_bits(1, false), // AA + h_bits(1, false), // TC + h_bits(1, false), // RD + h_bits(1, false), // RA + hdzero, // Z + h_bits(4, false), // RCODE + h_uint16(), // QDCOUNT + h_uint16(), // ANCOUNT + h_uint16(), // NSCOUNT + h_uint16(), // ARCOUNT + NULL)); + H_RULE (type, h_int_range(h_uint16(), 1, 16)); + H_RULE (qtype, h_choice(type, + h_int_range(h_uint16(), 252, 255), + NULL)); + H_RULE (class, h_int_range(h_uint16(), 1, 4)); + H_RULE (qclass, h_choice(class, + h_int_range(h_uint16(), 255, 255), + NULL)); + H_RULE (len, h_int_range(h_uint8(), 1, 255)); + H_ARULE (label, h_length_value(len, h_uint8())); + H_ARULE (qname, h_sequence(h_many1(label), + h_ch('\x00'), + NULL)); + H_ARULE (question, h_sequence(qname, qtype, qclass, NULL)); + H_RULE (rdata, h_length_value(h_uint16(), h_uint8())); + H_ARULE (rr, h_sequence(domain, // NAME + type, // TYPE + class, // CLASS + h_uint32(), // TTL + rdata, // RDLENGTH+RDATA + NULL)); + H_AVRULE(message, h_sequence(header, + h_many(question), + h_many(rr), + h_end_p(), + NULL)); + + ret = message; return ret; } -const HParser* init_parser() { - static HParser *dns_message = NULL; - if (dns_message) - return dns_message; - const HParser *domain = init_domain(); - - const HParser *dns_header = h_sequence(h_bits(16, false), // ID - h_bits(1, false), // QR - h_bits(4, false), // opcode - h_bits(1, false), // AA - h_bits(1, false), // TC - h_bits(1, false), // RD - h_bits(1, false), // RA - h_ignore(h_attr_bool(h_bits(3, false), is_zero)), // Z - h_bits(4, false), // RCODE - h_uint16(), // QDCOUNT - h_uint16(), // ANCOUNT - h_uint16(), // NSCOUNT - h_uint16(), // ARCOUNT - NULL); - - const HParser *type = h_int_range(h_uint16(), 1, 16); - - const HParser *qtype = h_choice(type, - h_int_range(h_uint16(), 252, 255), - NULL); - - const HParser *class = h_int_range(h_uint16(), 1, 4); - - const HParser *qclass = h_choice(class, - h_int_range(h_uint16(), 255, 255), - NULL); - - const HParser *dns_question = h_sequence(h_sequence(h_many1(h_length_value(h_int_range(h_uint8(), 1, 255), - h_uint8())), - h_ch('\x00'), - NULL), // QNAME - qtype, // QTYPE - qclass, // QCLASS - NULL); - - - const HParser *dns_rr = h_sequence(domain, // NAME - type, // TYPE - class, // CLASS - h_uint32(), // TTL - h_length_value(h_uint16(), h_uint8()), // RDLENGTH+RDATA - NULL); - - - dns_message = (HParser*)h_action(h_attr_bool(h_sequence(dns_header, - h_many(dns_question), - h_many(dns_rr), - h_end_p(), - NULL), - validate_dns), - pack_dns_struct); - - return dns_message; -} +/// +// Main Program for a Dummy DNS Server +/// int start_listening() { // return: fd @@ -442,7 +283,7 @@ int start_listening() { const int TYPE_MAX = 16; typedef const char* cstr; -const char* TYPE_STR[17] = { +static const char* TYPE_STR[17] = { "nil", "A", "NS", "MD", "MF", "CNAME", "SOA", "MB", "MG", "MR", "NULL", "WKS", diff --git a/examples/dns.h b/examples/dns.h index 151c46e..ed2c26f 100644 --- a/examples/dns.h +++ b/examples/dns.h @@ -1,6 +1,27 @@ #include "../src/hammer.h" -struct dns_header { +enum DNSTokenType_ { + TT_dns_message_t = TT_USER, + TT_dns_header_t, + TT_dns_label_t, + TT_dns_qname_t, + TT_dns_question_t, + TT_dns_rr_t, + TT_dns_rr_txt_t, + TT_dns_rr_hinfo_t, + TT_dns_rr_minfo_t, + TT_dns_rr_mx_t, + TT_dns_rr_soa_t, + TT_dns_rr_wks_t, + TT_dns_rr_null_t, + TT_dns_domain_t, + TT_dns_cstr_t +}; + +typedef char *dns_domain_t; +typedef uint8_t *dns_cstr_t; + +typedef struct dns_header { uint16_t id; bool qr, aa, tc, rd, ra; char opcode, rcode; @@ -8,74 +29,93 @@ struct dns_header { size_t answer_count; size_t authority_count; size_t additional_count; -}; -struct dns_qname { +} dns_header_t; + +typedef struct dns_label { + size_t len; + uint8_t *label; +} dns_label_t; + +typedef struct dns_qname { size_t qlen; - struct { - size_t len; - uint8_t *label; - } *labels; -}; -struct dns_question { - struct dns_qname qname; + dns_label_t *labels; +} dns_qname_t; + +typedef struct dns_question { + dns_qname_t qname; uint16_t qtype; uint16_t qclass; -}; -struct dns_rr { +} dns_question_t; + +typedef struct { + dns_cstr_t cpu; + dns_cstr_t os; +} dns_rr_hinfo_t; + +typedef struct { + char* rmailbx; + char* emailbx; +} dns_rr_minfo_t; + +typedef struct { + uint16_t preference; + char* exchange; +} dns_rr_mx_t; + +typedef struct { + char* mname; + char* rname; + uint32_t serial; + uint32_t refresh; + uint32_t retry; + uint32_t expire; + uint32_t minimum; +} dns_rr_soa_t; + +typedef struct { + size_t count; + uint8_t** txt_data; +} dns_rr_txt_t; + +typedef struct { + uint32_t address; + uint8_t protocol; + size_t len; + uint8_t* bit_map; +} dns_rr_wks_t; + +typedef uint8_t *dns_rr_null_t; + +typedef struct dns_rr { char* name; uint16_t type; uint16_t class; uint32_t ttl; // cmos is also acceptable. uint16_t rdlength; union { - char* cname; - struct { - uint8_t* cpu; - uint8_t* os; - } hinfo; - char* mb; - char* md; - char* mf; - char* mg; - struct { - char* rmailbx; - char* emailbx; - } minfo; - char* mr; - struct { - uint16_t preference; - char* exchange; - } mx; - uint8_t* null; - char* ns; - char* ptr; - struct { - char* mname; - char* rname; - uint32_t serial; - uint32_t refresh; - uint32_t retry; - uint32_t expire; - uint32_t minimum; - } soa; - struct { - size_t count; - uint8_t** txt_data; - } txt; - uint32_t a; - struct { - uint32_t address; - uint8_t protocol; - size_t len; - uint8_t* bit_map; - } wks; + uint32_t a; + char* ns; + char* md; + char* mf; + char* cname; + dns_rr_soa_t soa; + char* mb; + char* mg; + char* mr; + dns_rr_null_t null; + dns_rr_wks_t wks; + char* ptr; + dns_rr_hinfo_t hinfo; + dns_rr_minfo_t minfo; + dns_rr_mx_t mx; + dns_rr_txt_t txt; }; -}; +} dns_rr_t; typedef struct dns_message { - struct dns_header header; - struct dns_question *questions; - struct dns_rr *answers; - struct dns_rr *authority; - struct dns_rr *additional; + dns_header_t header; + dns_question_t *questions; + dns_rr_t *answers; + dns_rr_t *authority; + dns_rr_t *additional; } dns_message_t; diff --git a/examples/dns_common.c b/examples/dns_common.c index 3d349f1..76915b6 100644 --- a/examples/dns_common.c +++ b/examples/dns_common.c @@ -1,9 +1,12 @@ #include "../src/hammer.h" #include "dns_common.h" +#include "dns.h" #define false 0 #define true 1 +H_ACT_APPLY(act_index0, h_act_index, 0) + /** * A label can't be more than 63 characters. */ @@ -13,51 +16,64 @@ bool validate_label(HParseResult *p) { return (64 > p->ast->seq->used); } +#define act_label h_act_flatten + +const HParsedToken* act_domain(const HParseResult *p) { + const HParsedToken *ret = NULL; + char *arr = NULL; + + switch(p->ast->token_type) { + case TT_UINT: + arr = " "; + break; + case TT_SEQUENCE: + // Sequence of subdomains separated by "." + // Each subdomain is a label, which can be no more than 63 chars. + arr = h_arena_malloc(p->arena, 64*p->ast->seq->used); + size_t count = 0; + for (size_t i=0; iast->seq->used; ++i) { + HParsedToken *tmp = p->ast->seq->elements[i]; + for (size_t j=0; jseq->used; ++j) { + arr[count] = tmp->seq->elements[i]->uint; + ++count; + } + arr[count] = '.'; + ++count; + } + arr[count-1] = '\x00'; + break; + default: + arr = NULL; + ret = NULL; + } + + if(arr) { + dns_domain_t *val = H_ALLOC(dns_domain_t); // dns_domain_t is char* + *val = arr; + ret = H_MAKE(dns_domain_t, val); + } + + return ret; +} + const HParser* init_domain() { - static const HParser *domain = NULL; - if (domain) - return domain; + static const HParser *ret = NULL; + if (ret) + return ret; - const HParser *letter = h_choice(h_ch_range('a', 'z'), - h_ch_range('A', 'Z'), - NULL); + H_RULE (letter, h_choice(h_ch_range('a','z'), h_ch_range('A','Z'), NULL)); + H_RULE (let_dig, h_choice(letter, h_ch_range('0','9'), NULL)); + H_RULE (ldh_str, h_many1(h_choice(let_dig, h_ch('-'), NULL))); + H_VARULE(label, h_sequence(letter, + h_optional(h_sequence(h_optional(ldh_str), + let_dig, + NULL)), + NULL)); + H_RULE (subdomain, h_sepBy1(label, h_ch('.'))); + H_ARULE (domain, h_choice(subdomain, h_ch(' '), NULL)); - const HParser *let_dig = h_choice(letter, - h_ch_range('0', '9'), - NULL); - - const HParser *ldh_str = h_many1(h_choice(let_dig, - h_ch('-'), - NULL)); - - const HParser *label = h_attr_bool(h_sequence(letter, - h_optional(h_sequence(h_optional(ldh_str), - let_dig, - NULL)), - NULL), - validate_label); - - /** - * You could write it like this ... - * HParser *indirect_subdomain = h_indirect(); - * const HParser *subdomain = h_choice(label, - * h_sequence(indirect_subdomain, - * h_ch('.'), - * label, - * NULL), - * NULL); - * h_bind_indirect(indirect_subdomain, subdomain); - * - * ... but this is easier and equivalent - */ - - const HParser *subdomain = h_sepBy1(label, h_ch('.')); - - domain = h_choice(subdomain, - h_ch(' '), - NULL); - - return domain; + ret = domain; + return ret; } const HParser* init_character_string() { diff --git a/examples/dns_common.h b/examples/dns_common.h index 41d73f0..c1d8d7e 100644 --- a/examples/dns_common.h +++ b/examples/dns_common.h @@ -2,8 +2,11 @@ #define HAMMER_DNS_COMMON__H #include "../src/hammer.h" +#include "../src/glue.h" const HParser* init_domain(); const HParser* init_character_string(); +const HParsedToken* act_index0(const HParseResult *p); + #endif diff --git a/examples/rr.c b/examples/rr.c index 8dae885..8c14e0a 100644 --- a/examples/rr.c +++ b/examples/rr.c @@ -1,124 +1,15 @@ #include "../src/hammer.h" #include "dns_common.h" +#include "dns.h" #include "rr.h" #define false 0 #define true 1 -const HParser* init_cname() { - static const HParser *cname = NULL; - if (cname) - return cname; - - cname = h_sequence(init_domain(), - h_end_p(), - NULL); - - return cname; -} -const HParser* init_hinfo() { - static const HParser *hinfo = NULL; - if (hinfo) - return hinfo; - - const HParser* cstr = init_character_string(); - - hinfo = h_sequence(cstr, - cstr, - h_end_p(), - NULL); - - return hinfo; -} - -const HParser* init_mb() { - static const HParser *mb = NULL; - if (mb) - return mb; - - mb = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mb; -} - -const HParser* init_md() { - static const HParser *md = NULL; - if (md) - return md; - - md = h_sequence(init_domain(), - h_end_p, - NULL); - - return md; -} - -const HParser* init_mf() { - static const HParser *mf = NULL; - if (mf) - return mf; - - mf = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mf; -} - -const HParser* init_mg() { - static const HParser *mg = NULL; - if (mg) - return mg; - - mg = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mg; -} - -const HParser* init_minfo() { - static const HParser *minfo = NULL; - if (minfo) - return minfo; - - const HParser* domain = init_domain(); - - minfo = h_sequence(domain, - domain, - h_end_p(), - NULL); - - return minfo; -} - -const HParser* init_mr() { - static const HParser *mr = NULL; - if (mr) - return mr; - - mr = h_sequence(init_domain(), - h_end_p(), - NULL); - - return mr; -} - -const HParser* init_mx() { - static const HParser *mx = NULL; - if (mx) - return mx; - - mx = h_sequence(h_uint16(), - init_domain(), - h_end_p(), - NULL); - - return mx; -} +/// +// Validations and Semantic Actions +/// bool validate_null(HParseResult *p) { if (TT_SEQUENCE != p->ast->token_type) @@ -126,94 +17,177 @@ bool validate_null(HParseResult *p) { return (65536 > p->ast->seq->used); } -const HParser* init_null() { - static const HParser *null_ = NULL; - if (null_) - return null_; +const HParsedToken *act_null(const HParseResult *p) { + dns_rr_null_t *null = H_ALLOC(dns_rr_null_t); - null_ = h_attr_bool(h_many(h_uint8()), validate_null); + size_t len = h_seq_len(p->ast); + uint8_t *buf = h_arena_malloc(p->arena, sizeof(uint8_t)*len); + for (size_t i=0; iast); + uint8_t **ret = h_arena_malloc(arr->arena, sizeof(uint8_t*)*arr->used); + for (size_t i=0; iused; ++i) { + size_t len = h_seq_len(arr->elements[i]); + uint8_t *tmp = h_arena_malloc(arr->arena, sizeof(uint8_t)*len); + for (size_t j=0; jelements[i], j); + ret[i] = tmp; + } - return ns; + txt->count = arr->used; + txt->txt_data = ret; + + return H_MAKE(dns_rr_txt_t, txt); } -const HParser* init_ptr() { - static const HParser *ptr = NULL; - if (ptr) - return ptr; +const HParsedToken* act_cstr(const HParseResult *p) { + dns_cstr_t *cs = H_ALLOC(dns_cstr_t); + + const HCountedArray *arr = H_CAST_SEQ(p->ast); + uint8_t *ret = h_arena_malloc(arr->arena, sizeof(uint8_t)*arr->used); + for (size_t i=0; iused; ++i) + ret[i] = H_CAST_UINT(arr->elements[i]); + assert(ret[arr->used-1] == '\0'); // XXX Is this right?! If so, shouldn't it be a validation? + *cs = ret; + + return H_MAKE(dns_cstr_t, cs); +} + +const HParsedToken* act_soa(const HParseResult *p) { + dns_rr_soa_t *soa = H_ALLOC(dns_rr_soa_t); + + soa->mname = *H_FIELD(dns_domain_t, 0); + soa->rname = *H_FIELD(dns_domain_t, 1); + soa->serial = H_FIELD_UINT(2); + soa->refresh = H_FIELD_UINT(3); + soa->retry = H_FIELD_UINT(4); + soa->expire = H_FIELD_UINT(5); + soa->minimum = H_FIELD_UINT(6); + + return H_MAKE(dns_rr_soa_t, soa); +} + +const HParsedToken* act_wks(const HParseResult *p) { + dns_rr_wks_t *wks = H_ALLOC(dns_rr_wks_t); + + wks->address = H_FIELD_UINT(0); + wks->protocol = H_FIELD_UINT(1); + wks->len = H_FIELD_SEQ(2)->used; + wks->bit_map = h_arena_malloc(p->arena, sizeof(uint8_t)*wks->len); + for (size_t i=0; ilen; ++i) + wks->bit_map[i] = H_INDEX_UINT(p->ast, 2, i); + + return H_MAKE(dns_rr_wks_t, wks); +} + +const HParsedToken* act_hinfo(const HParseResult *p) { + dns_rr_hinfo_t *hinfo = H_ALLOC(dns_rr_hinfo_t); + + hinfo->cpu = *H_FIELD(dns_cstr_t, 0); + hinfo->os = *H_FIELD(dns_cstr_t, 1); + + return H_MAKE(dns_rr_hinfo_t, hinfo); +} + +const HParsedToken* act_minfo(const HParseResult *p) { + dns_rr_minfo_t *minfo = H_ALLOC(dns_rr_minfo_t); + + minfo->rmailbx = *H_FIELD(dns_domain_t, 0); + minfo->emailbx = *H_FIELD(dns_domain_t, 1); + + return H_MAKE(dns_rr_minfo_t, minfo); +} + +const HParsedToken* act_mx(const HParseResult *p) { + dns_rr_mx_t *mx = H_ALLOC(dns_rr_mx_t); + + mx->preference = H_FIELD_UINT(0); + mx->exchange = *H_FIELD(dns_domain_t, 1); + + return H_MAKE(dns_rr_mx_t, mx); +} + + +/// +// Parsers for all types of RDATA +/// + +#define RDATA_TYPE_MAX 16 +const HParser* init_rdata(uint16_t type) { + static const HParser *parsers[RDATA_TYPE_MAX+1]; + static int inited = 0; + + if (type >= sizeof(parsers)) + return NULL; - ptr = h_sequence(init_domain(), - h_end_p(), - NULL); + if (inited) + return parsers[type]; - return ptr; -} - -const HParser* init_soa() { - static const HParser *soa = NULL; - if (soa) - return soa; - - const HParser *domain = init_domain(); - - soa = h_sequence(domain, // MNAME - domain, // RNAME - h_uint32(), // SERIAL - h_uint32(), // REFRESH - h_uint32(), // RETRY - h_uint32(), // EXPIRE - h_uint32(), // MINIMUM - h_end_p(), - NULL); - - return soa; -} - -const HParser* init_txt() { - static const HParser *txt = NULL; - if (txt) - return txt; - - txt = h_sequence(h_many1(init_character_string()), - h_end_p(), - NULL); - - return txt; -} - -const HParser* init_a() { - static const HParser *a = NULL; - if (a) - return a; - - a = h_sequence(h_uint32(), - h_end_p(), - NULL); - - return a; -} - -const HParser* init_wks() { - static const HParser *wks = NULL; - if (wks) - return wks; - - wks = h_sequence(h_uint32(), - h_uint8(), - h_many(h_uint8()), - h_end_p(), - NULL); - - return wks; + + H_RULE (domain, init_domain()); + H_ARULE(cstr, init_character_string()); + + H_RULE (a, h_uint32()); + H_RULE (ns, domain); + H_RULE (md, domain); + H_RULE (mf, domain); + H_RULE (cname, domain); + H_ARULE(soa, h_sequence(domain, // MNAME + domain, // RNAME + h_uint32(), // SERIAL + h_uint32(), // REFRESH + h_uint32(), // RETRY + h_uint32(), // EXPIRE + h_uint32(), // MINIMUM + NULL)); + H_RULE (mb, domain); + H_RULE (mg, domain); + H_RULE (mr, domain); + H_VRULE(null, h_many(h_uint8())); + H_RULE (wks, h_sequence(h_uint32(), + h_uint8(), + h_many(h_uint8()), + NULL)); + H_RULE (ptr, domain); + H_RULE (hinfo, h_sequence(cstr, cstr, NULL)); + H_RULE (minfo, h_sequence(domain, domain, NULL)); + H_RULE (mx, h_sequence(h_uint16(), domain, NULL)); + H_ARULE(txt, h_many1(cstr)); + + + parsers[ 0] = NULL; // there is no type 0 + parsers[ 1] = a; + parsers[ 2] = ns; + parsers[ 3] = md; + parsers[ 4] = mf; + parsers[ 5] = cname; + parsers[ 6] = soa; + parsers[ 7] = mb; + parsers[ 8] = mg; + parsers[ 9] = mr; + parsers[10] = null; + parsers[11] = wks; + parsers[12] = ptr; + parsers[13] = hinfo; + parsers[14] = minfo; + parsers[15] = mx; + parsers[16] = txt; + + // All parsers must consume their input exactly. + for(uint16_t i; i +#include #include #include -#include "allocator.h" +#include "hammer.h" +#include "internal.h" + struct arena_link { // TODO: @@ -36,22 +38,25 @@ struct arena_link { struct HArena_ { struct arena_link *head; + struct HAllocator_ *mm__; size_t block_size; size_t used; size_t wasted; }; -HArena *h_new_arena(size_t block_size) { +HArena *h_new_arena(HAllocator* mm__, size_t block_size) { if (block_size == 0) block_size = 4096; - struct HArena_ *ret = g_new(struct HArena_, 1); - struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + block_size); + struct HArena_ *ret = h_new(struct HArena_, 1); + struct arena_link *link = (struct arena_link*)mm__->alloc(mm__, sizeof(struct arena_link) + block_size); + memset(link, 0, sizeof(struct arena_link) + block_size); link->free = block_size; link->used = 0; link->next = NULL; ret->head = link; ret->block_size = block_size; ret->used = 0; + ret->mm__ = mm__; ret->wasted = sizeof(struct arena_link) + sizeof(struct HArena_) + block_size; return ret; } @@ -70,13 +75,15 @@ void* h_arena_malloc(HArena *arena, size_t size) { // This involves some annoying casting... arena->used += size; arena->wasted += sizeof(struct arena_link*); - void* link = g_malloc(size + sizeof(struct arena_link*)); + void* link = arena->mm__->alloc(arena->mm__, size + sizeof(struct arena_link*)); + memset(link, 0, size + sizeof(struct arena_link*)); *(struct arena_link**)link = arena->head->next; arena->head->next = (struct arena_link*)link; return (void*)(((uint8_t*)link) + sizeof(struct arena_link*)); } else { // we just need to allocate an ordinary new block. - struct arena_link *link = (struct arena_link*)g_malloc0(sizeof(struct arena_link) + arena->block_size); + struct arena_link *link = (struct arena_link*)arena->mm__->alloc(arena->mm__, sizeof(struct arena_link) + arena->block_size); + memset(link, 0, sizeof(struct arena_link) + arena->block_size); link->free = arena->block_size - size; link->used = size; link->next = arena->head; @@ -86,18 +93,23 @@ void* h_arena_malloc(HArena *arena, size_t size) { return link->rest; } } - + +void h_arena_free(HArena *arena, void* ptr) { + // To be used later... +} + void h_delete_arena(HArena *arena) { + HAllocator *mm__ = arena->mm__; struct arena_link *link = arena->head; while (link) { struct arena_link *next = link->next; // Even in the case of a special block, without the full arena // header, this is correct, because the next pointer is the first // in the structure. - g_free(link); + h_free(link); link = next; } - g_free(arena); + h_free(arena); } void h_allocator_stats(HArena *arena, HArenaStats *stats) { diff --git a/src/allocator.h b/src/allocator.h index 3ec9ae9..e83cae7 100644 --- a/src/allocator.h +++ b/src/allocator.h @@ -19,10 +19,17 @@ #define HAMMER_ALLOCATOR__H__ #include +typedef struct HAllocator_ { + void* (*alloc)(struct HAllocator_* allocator, size_t size); + void* (*realloc)(struct HAllocator_* allocator, void* ptr, size_t size); + void (*free)(struct HAllocator_* allocator, void* ptr); +} HAllocator; + typedef struct HArena_ HArena ; // hidden implementation -HArena *h_new_arena(size_t block_size); // pass 0 for default... +HArena *h_new_arena(HAllocator* allocator, size_t block_size); // pass 0 for default... void* h_arena_malloc(HArena *arena, size_t count) __attribute__(( malloc, alloc_size(2) )); +void h_arena_free(HArena *arena, void* ptr); // For future expansion, with alternate memory managers. void h_delete_arena(HArena *arena); typedef struct { diff --git a/src/backends/packrat.c b/src/backends/packrat.c new file mode 100644 index 0000000..cc2a9db --- /dev/null +++ b/src/backends/packrat.c @@ -0,0 +1,209 @@ +#include +#include "../internal.h" +#include "../parsers/parser_internal.h" + +// short-hand for constructing HCachedResult's +static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { + HCachedResult *ret = a_new(HCachedResult, 1); + ret->result = result; + ret->input_stream = state->input_stream; + return ret; +} + +// Really library-internal tool to perform an uncached parse, and handle any common error-handling. +static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) { + // TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when. + HParseResult *tmp_res; + if (parser) { + HInputStream bak = state->input_stream; + tmp_res = parser->vtable->parse(parser->env, state); + if (tmp_res) { + tmp_res->arena = state->arena; + if (!state->input_stream.overrun) { + tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); + if (state->input_stream.endianness & BIT_BIG_ENDIAN) + tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; + else + tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; + } else + tmp_res->bit_length = 0; + } + } else + tmp_res = NULL; + if (state->input_stream.overrun) + return NULL; // overrun is always failure. +#ifdef CONSISTENCY_CHECK + if (!tmp_res) { + state->input_stream = INVALID; + state->input_stream.input = key->input_pos.input; + } +#endif + return tmp_res; +} + +HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { + HParserCacheValue *cached = h_hashtable_get(state->cache, k); + HRecursionHead *head = h_hashtable_get(state->recursion_heads, k); + if (!head) { // No heads found + return cached; + } else { // Some heads found + if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { + // Nothing in the cache, and the key parser is not involved + HParseResult *tmp = a_new(HParseResult, 1); + tmp->ast = NULL; tmp->arena = state->arena; + HParserCacheValue *ret = a_new(HParserCacheValue, 1); + ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); + return ret; + } + if (h_slist_find(head->eval_set, k->parser)) { + // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. + head->eval_set = h_slist_remove_all(head->eval_set, k->parser); + HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); + // we know that cached has an entry here, modify it + if (!cached) + cached = a_new(HParserCacheValue, 1); + cached->value_type = PC_RIGHT; + cached->right = cached_result(state, tmp_res); + } + return cached; + } +} + +/* Setting up the left recursion. We have the LR for the rule head; + * we modify the involved_sets of all LRs in the stack, until we + * see the current parser again. + */ + +void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { + if (!rec_detect->head) { + HRecursionHead *some = a_new(HRecursionHead, 1); + some->head_parser = p; + some->involved_set = h_slist_new(state->arena); + some->eval_set = NULL; + rec_detect->head = some; + } + assert(state->lr_stack->head != NULL); + HSlistNode *head = state->lr_stack->head; + HLeftRec *lr; + while (head && (lr = head->elem)->rule != p) { + lr->head = rec_detect->head; + h_slist_push(lr->head->involved_set, (void*)lr->rule); + head = head->next; + } +} + +/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the + * future parse. + */ + +HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { + // Store the head into the recursion_heads + h_hashtable_put(state->recursion_heads, k, head); + HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); + if (!old_cached || PC_LEFT == old_cached->value_type) + errx(1, "impossible match"); + HParseResult *old_res = old_cached->right->result; + + // reset the eval_set of the head of the recursion at each beginning of growth + head->eval_set = h_slist_copy(head->involved_set); + HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); + + if (tmp_res) { + if ((old_res->ast->index < tmp_res->ast->index) || + (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { + HParserCacheValue *v = a_new(HParserCacheValue, 1); + v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); + h_hashtable_put(state->cache, k, v); + return grow(k, state, head); + } else { + // we're done with growing, we can remove data from the recursion head + h_hashtable_del(state->recursion_heads, k); + HParserCacheValue *cached = h_hashtable_get(state->cache, k); + if (cached && PC_RIGHT == cached->value_type) { + return cached->right->result; + } else { + errx(1, "impossible match"); + } + } + } else { + h_hashtable_del(state->recursion_heads, k); + return old_res; + } +} + +HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) { + if (growable->head) { + if (growable->head->head_parser != k->parser) { + // not the head rule, so not growing + return growable->seed; + } + else { + // update cache + HParserCacheValue *v = a_new(HParserCacheValue, 1); + v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); + h_hashtable_put(state->cache, k, v); + if (!growable->seed) + return NULL; + else + return grow(k, state, growable->head); + } + } else { + errx(1, "lrAnswer with no head"); + } +} + +/* Warth's recursion. Hi Alessandro! */ +HParseResult* h_do_parse(const HParser* parser, HParseState *state) { + HParserCacheKey *key = a_new(HParserCacheKey, 1); + key->input_pos = state->input_stream; key->parser = parser; + HParserCacheValue *m = recall(key, state); + // check to see if there is already a result for this object... + if (!m) { + // It doesn't exist, so create a dummy result to cache + HLeftRec *base = a_new(HLeftRec, 1); + base->seed = NULL; base->rule = parser; base->head = NULL; + h_slist_push(state->lr_stack, base); + // cache it + HParserCacheValue *dummy = a_new(HParserCacheValue, 1); + dummy->value_type = PC_LEFT; dummy->left = base; + h_hashtable_put(state->cache, key, dummy); + // parse the input + HParseResult *tmp_res = perform_lowlevel_parse(state, parser); + // the base variable has passed equality tests with the cache + h_slist_pop(state->lr_stack); + // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one + if (NULL == base->head) { + HParserCacheValue *right = a_new(HParserCacheValue, 1); + right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); + h_hashtable_put(state->cache, key, right); + return tmp_res; + } else { + base->seed = tmp_res; + HParseResult *res = lr_answer(key, state, base); + return res; + } + } else { + // it exists! + if (PC_LEFT == m->value_type) { + setupLR(parser, state, m->left); + return m->left->seed; // BUG: this might not be correct + } else { + state->input_stream = m->right->input_stream; + return m->right->result; + } + } +} + +int h_packrat_compile(HAllocator* mm__, const HParser* parser, const void* params) { + return 0; // No compilation necessary, and everything should work + // out of the box. +} + +HParseResult *h_packrat_parse(HAllocator* mm__, const HParser* parser, HParseState* parse_state) { + return h_do_parse(parser, parse_state); +} + +HParserBackendVTable h__packrat_backend_vtable = { + .compile = h_packrat_compile, + .parse = h_packrat_parse +}; diff --git a/src/benchmark.c b/src/benchmark.c new file mode 100644 index 0000000..577a380 --- /dev/null +++ b/src/benchmark.c @@ -0,0 +1,114 @@ +#include +#include +#include +#include "hammer.h" +#include "internal.h" + +/* + Usage: + Create your parser (i.e., const HParser*), and an array of test cases + (i.e., HParserTestcase[], terminated by { NULL, 0, NULL }) and then call + + HBenchmarkResults* results = h_benchmark(parser, testcases); + + Then, you can format a report with: + + h_benchmark_report(stdout, results); + + or just generate code to make the parser run as fast as possible with: + + h_benchmark_dump_optimized_code(stdout, results); + +*/ + +HBenchmarkResults *h_benchmark(const HParser* parser, HParserTestcase* testcases) { + return h_benchmark__m(&system_allocator, parser, testcases); +} + +HBenchmarkResults *h_benchmark__m(HAllocator* mm__, const HParser* parser, HParserTestcase* testcases) { + // For now, just output the results to stderr + HParserTestcase* tc = testcases; + HParserBackend backend = PB_MIN; + HBenchmarkResults *ret = h_new(HBenchmarkResults, 1); + ret->len = PB_MAX-PB_MIN; + ret->results = h_new(HBackendResults, ret->len); + + for (backend = PB_MIN; backend < PB_MAX; backend++) { + ret->results[backend].backend = backend; + // Step 1: Compile grammar for given parser... + if (h_compile(parser, PB_MIN, NULL) == -1) { + // backend inappropriate for grammar... + fprintf(stderr, "failed\n"); + ret->results[backend].compile_success = false; + ret->results[backend].n_testcases = 0; + ret->results[backend].failed_testcases = 0; + ret->results[backend].cases = NULL; + continue; + } + ret->results[backend].compile_success = true; + int tc_failed = 0; + // Step 1: verify all test cases. + ret->results[backend].n_testcases = 0; + ret->results[backend].failed_testcases = 0; + for (tc = testcases; tc->input != NULL; tc++) { + ret->results[backend].n_testcases++; + HParseResult *res = h_parse(parser, tc->input, tc->length); + char* res_unamb; + if (res != NULL) { + res_unamb = h_write_result_unamb(res->ast); + } else + res_unamb = NULL; + if ((res_unamb == NULL && tc->output_unambiguous == NULL) + || (strcmp(res_unamb, tc->output_unambiguous) != 0)) { + // test case failed... + fprintf(stderr, "failed\n"); + // We want to run all testcases, for purposes of generating a + // report. (eg, if users are trying to fix a grammar for a + // faster backend) + tc_failed++; + ret->results[backend].failed_testcases++; + } + h_parse_result_free(res); + } + + if (tc_failed > 0) { + // Can't use this parser; skip to the next + fprintf(stderr, "Backend failed testcases; skipping benchmark\n"); + continue; + } + + ret->results[backend].cases = h_new(HCaseResult, ret->results[backend].n_testcases); + size_t cur_case = 0; + + for (tc = testcases; tc->input != NULL; tc++) { + // The goal is to run each testcase for at least 50ms each + // TODO: replace this with a posix timer-based benchmark. (cf. timerfd_create, timer_create, setitimer) + int count = 1, cur; + struct timespec ts_start, ts_end; + long long time_diff; + do { + count *= 2; // Yes, this means that the first run will run the function twice. This is fine, as we want multiple runs anyway. + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_start); + for (cur = 0; cur < count; cur++) { + h_parse_result_free(h_parse(parser, tc->input, tc->length)); + } + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts_end); + + // time_diff is in ns + time_diff = (ts_end.tv_sec - ts_start.tv_sec) * 1000000000 + (ts_end.tv_nsec - ts_start.tv_nsec); + } while (time_diff < 100000000); + ret->results[backend].cases[cur_case].parse_time = (time_diff / count); + cur_case++; + } + } + return ret; +} + +void h_benchmark_report(FILE* stream, HBenchmarkResults* result) { + for (size_t i=0; ilen; ++i) { + fprintf(stream, "Backend %ld ... \n", i); + for (size_t j=0; jresults[i].n_testcases; ++j) { + fprintf(stream, "Case %ld: %ld ns/parse\n", j, result->results[i].cases[j].parse_time); + } + } +} diff --git a/src/bitreader.c b/src/bitreader.c index bb93377..4971076 100644 --- a/src/bitreader.c +++ b/src/bitreader.c @@ -108,70 +108,3 @@ long long h_read_bits(HInputStream* state, int count, char signed_p) { out <<= final_shift; return (out ^ msb) - msb; // perform sign extension } - -#ifdef INCLUDE_TESTS - -#define MK_INPUT_STREAM(buf,len,endianness_) \ - { \ - .input = (uint8_t*)buf, \ - .length = len, \ - .index = 0, \ - .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ - .endianness = endianness_ \ - } - - -static void test_bitreader_ints(void) { - HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000); -} - -static void test_bitreader_be(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03); - g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); -} -static void test_bitreader_le(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); - g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02); - g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); -} - -static void test_largebits_be(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); -} - -static void test_largebits_le(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); -} - -static void test_offset_largebits_be(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A); -} - -static void test_offset_largebits_le(void) { - HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); - g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA); - g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3); -} - - -void register_bitreader_tests(void) { - g_test_add_func("/core/bitreader/be", test_bitreader_be); - g_test_add_func("/core/bitreader/le", test_bitreader_le); - g_test_add_func("/core/bitreader/largebits-be", test_largebits_be); - g_test_add_func("/core/bitreader/largebits-le", test_largebits_le); - g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be); - g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le); - g_test_add_func("/core/bitreader/ints", test_bitreader_ints); -} - -#endif // #ifdef INCLUDE_TESTS diff --git a/src/bitwriter.c b/src/bitwriter.c index 9a72e22..9374a88 100644 --- a/src/bitwriter.c +++ b/src/bitwriter.c @@ -4,22 +4,16 @@ #include "internal.h" #include "test_suite.h" -// This file provides the logical inverse of bitreader.c -struct HBitWriter_ { - uint8_t* buf; - size_t index; - size_t capacity; - char bit_offset; // unlike in bit_reader, this is always the number - // of used bits in the current byte. i.e., 0 always - // means that 8 bits are available for use. - char flags; -}; +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) // h_bit_writer_ -HBitWriter *h_bit_writer_new() { - HBitWriter *writer = g_new0(HBitWriter, 1); - writer->buf = g_malloc0(writer->capacity = 8); - +HBitWriter *h_bit_writer_new(HAllocator* mm__) { + HBitWriter *writer = h_new(HBitWriter, 1); + memset(writer, 0, sizeof(*writer)); + writer->buf = mm__->alloc(mm__, writer->capacity = 8); + memset(writer->buf, 0, writer->capacity); + writer->mm__ = mm__; writer->flags = BYTE_BIG_ENDIAN | BIT_BIG_ENDIAN; return writer; @@ -41,7 +35,7 @@ static void h_bit_writer_reserve(HBitWriter* w, size_t nbits) { int nbytes = (nbits + 7) / 8 + ((w->bit_offset != 0) ? 1 : 0); size_t old_capacity = w->capacity; while (w->index + nbytes >= w->capacity) { - w->buf = g_realloc(w->buf, w->capacity *= 2); + w->buf = w->mm__->realloc(w->mm__, w->buf, w->capacity *= 2); } if (old_capacity != w->capacity) @@ -100,114 +94,7 @@ const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len) { } void h_bit_writer_free(HBitWriter* w) { - g_free(w->buf); - g_free(w); + HAllocator *mm__ = w->mm__; + h_free(w->buf); + h_free(w); } - -#ifdef INCLUDE_TESTS -// TESTS BELOW HERE -typedef struct { - unsigned long long data; - size_t nbits; -} bitwriter_test_elem; // should end with {0,0} - -void run_bitwriter_test(bitwriter_test_elem data[], char flags) { - size_t len; - const uint8_t *buf; - HBitWriter *w = h_bit_writer_new(); - int i; - w->flags = flags; - for (i = 0; data[i].nbits; i++) { - h_bit_writer_put(w, data[i].data, data[i].nbits); - } - - buf = h_bit_writer_get_buffer(w, &len); - HInputStream input = { - .input = buf, - .index = 0, - .length = len, - .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, - .endianness = flags, - .overrun = 0 - }; - - for (i = 0; data[i].nbits; i++) { - g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data); - } -} - -static void test_bitwriter_ints(void) { - bitwriter_test_elem data[] = { - { -0x200000000, 64 }, - { 0,0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_bitwriter_be(void) { - bitwriter_test_elem data[] = { - { 0x03, 3 }, - { 0x52, 8 }, - { 0x1A, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_bitwriter_le(void) { - bitwriter_test_elem data[] = { - { 0x02, 3 }, - { 0x4D, 8 }, - { 0x0B, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); -} - -static void test_largebits_be(void) { - bitwriter_test_elem data[] = { - { 0x352, 11 }, - { 0x1A, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_largebits_le(void) { - bitwriter_test_elem data[] = { - { 0x26A, 11 }, - { 0x0B, 5 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); -} - -static void test_offset_largebits_be(void) { - bitwriter_test_elem data[] = { - { 0xD, 5 }, - { 0x25A, 11 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); -} - -static void test_offset_largebits_le(void) { - bitwriter_test_elem data[] = { - { 0xA, 5 }, - { 0x2D3, 11 }, - { 0, 0 } - }; - run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); -} - -void register_bitwriter_tests(void) { - g_test_add_func("/core/bitwriter/be", test_bitwriter_be); - g_test_add_func("/core/bitwriter/le", test_bitwriter_le); - g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be); - g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le); - g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be); - g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le); - g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints); -} - -#endif // #ifdef INCLUDE_TESTS diff --git a/src/compile.c b/src/compile.c new file mode 100644 index 0000000..e151cfb --- /dev/null +++ b/src/compile.c @@ -0,0 +1,15 @@ +// This file contains functions related to managing multiple parse backends +#include "hammer.h" +#include "internal.h" + +static HParserBackendVTable *backends[PB_MAX] = { + &h__packrat_backend_vtable, +}; + +int h_compile(const HParser* parser, HParserBackend backend, const void* params) { + return h_compile__m(&system_allocator, parser, backend, params); +} + +int h_compile__m(HAllocator* mm__, const HParser* parser, HParserBackend backend, const void* params) { + return backends[backend]->compile(mm__, parser, params); +} diff --git a/src/datastructures.c b/src/datastructures.c index 004f7e7..3d94804 100644 --- a/src/datastructures.c +++ b/src/datastructures.c @@ -2,7 +2,8 @@ #include "hammer.h" #include "allocator.h" #include -#include +#include +#include // {{{ counted arrays @@ -15,6 +16,7 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size) { ret->elements = h_arena_malloc(arena, sizeof(void*) * size); return ret; } + HCountedArray *h_carray_new(HArena * arena) { return h_carray_new_sized(arena, 4); } @@ -30,3 +32,213 @@ void h_carray_append(HCountedArray *array, void* item) { } array->elements[array->used++] = item; } + +// HSlist +HSlist* h_slist_new(HArena *arena) { + HSlist *ret = h_arena_malloc(arena, sizeof(HSlist)); + ret->head = NULL; + ret->arena = arena; + return ret; +} + +HSlist* h_slist_copy(HSlist *slist) { + HSlist *ret = h_slist_new(slist->arena); + HSlistNode *head = slist->head; + HSlistNode *tail; + if (head != NULL) { + h_slist_push(ret, head->elem); + tail = ret->head; + head = head->next; + } + while (head != NULL) { + // append head item to tail in a new node + HSlistNode *node = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + node->elem = head->elem; + node->next = NULL; + tail = tail->next = node; + head = head->next; + } + return ret; +} + +void* h_slist_pop(HSlist *slist) { + HSlistNode *head = slist->head; + if (!head) + return NULL; + void* ret = head->elem; + slist->head = head->next; + h_arena_free(slist->arena, head); + return ret; +} + +void h_slist_push(HSlist *slist, void* item) { + HSlistNode *hnode = h_arena_malloc(slist->arena, sizeof(HSlistNode)); + hnode->elem = item; + hnode->next = slist->head; + // write memory barrier here. + slist->head = hnode; +} + +bool h_slist_find(HSlist *slist, const void* item) { + assert (item != NULL); + HSlistNode *head = slist->head; + while (head != NULL) { + if (head->elem == item) + return true; + head = head->next; + } + return false; +} + +HSlist* h_slist_remove_all(HSlist *slist, const void* item) { + assert (item != NULL); + HSlistNode *node = slist->head; + HSlistNode *prev = NULL; + while (node != NULL) { + if (node->elem == item) { + HSlistNode *next = node->next; + if (prev) + prev->next = next; + else + slist->head = next; + // FIXME free the removed node! this leaks. + node = next; + } + else { + prev = node; + node = prev->next; + } + } + return slist; +} + +void h_slist_free(HSlist *slist) { + while (slist->head != NULL) + h_slist_pop(slist); + h_arena_free(slist->arena, slist); +} + +HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc) { + HHashTable *ht = h_arena_malloc(arena, sizeof(HHashTable)); + ht->hashFunc = hashFunc; + ht->equalFunc = equalFunc; + ht->capacity = 64; // to start; should be tuned later... + ht->used = 0; + ht->arena = arena; + ht->contents = h_arena_malloc(arena, sizeof(HHashTableEntry) * ht->capacity); + for (size_t i = 0; i < ht->capacity; i++) { + ht->contents[i].key = NULL; + ht->contents[i].value = NULL; + ht->contents[i].next = NULL; + ht->contents[i].hashval = 0; + } + //memset(ht->contents, 0, sizeof(HHashTableEntry) * ht->capacity); + return ht; +} + +void* h_hashtable_get(HHashTable* ht, void* key) { + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + HHashTableEntry *hte = NULL; + for (hte = &ht->contents[hashval & (ht->capacity - 1)]; + hte != NULL; + hte = hte->next) { + if (hte->hashval != hashval) + continue; + if (ht->equalFunc(key, hte->key)) + return hte->value; + } + return NULL; +} + +void h_hashtable_put(HHashTable* ht, void* key, void* value) { + // # Start with a rebalancing + //h_hashtable_ensure_capacity(ht, ht->used + 1); + + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + if (hte->key != NULL) { + do { + if (hte->hashval == hashval && ht->equalFunc(key, hte->key)) + goto insert_here; + if (hte->next != NULL) + hte = hte->next; + } while (hte->next != NULL); + // Add a new link... + assert (hte->next == NULL); + hte->next = h_arena_malloc(ht->arena, sizeof(HHashTableEntry)); + hte = hte->next; + hte->next = NULL; + ht->used++; + } else + ht->used++; + + insert_here: + hte->key = key; + hte->value = value; + hte->hashval = hashval; +} + +int h_hashtable_present(HHashTable* ht, void* key) { + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + hte != NULL; + hte = hte->next) { + if (hte->hashval != hashval) + continue; + if (ht->equalFunc(key, hte->key)) + return true; + } + return false; +} +void h_hashtable_del(HHashTable* ht, void* key) { + HHashValue hashval = ht->hashFunc(key); +#ifdef CONSISTENCY_CHECK + assert((ht->capacity & (ht->capacity - 1)) == 0); // capacity is a power of 2 +#endif + + for (HHashTableEntry *hte = &ht->contents[hashval & (ht->capacity - 1)]; + hte != NULL; + hte = hte->next) { + if (hte->hashval != hashval) + continue; + if (ht->equalFunc(key, hte->key)) { + // FIXME: Leaks keys and values. + HHashTableEntry* hten = hte->next; + if (hten != NULL) { + *hte = *hten; + h_arena_free(ht->arena, hten); + } else { + hte->key = hte->value = NULL; + hte->hashval = 0; + } + return; + } + } +} +void h_hashtable_free(HHashTable* ht) { + for (size_t i = 0; i < ht->capacity; i++) { + HHashTableEntry *hten, *hte = &ht->contents[i]; + // FIXME: Free key and value + hte = hte->next; + while (hte != NULL) { + // FIXME: leaks keys and values. + hten = hte->next; + h_arena_free(ht->arena, hte); + hte = hten; + } + } + h_arena_free(ht->arena, ht->contents); +} + diff --git a/src/glue.c b/src/glue.c new file mode 100644 index 0000000..f1e086a --- /dev/null +++ b/src/glue.c @@ -0,0 +1,177 @@ +#include "glue.h" +#include "../src/internal.h" // for h_carray_* + + +// The action equivalent of h_ignore. +const HParsedToken *h_act_ignore(const HParseResult *p) +{ + return NULL; +} + +// Helper to build HAction's that pick one index out of a sequence. +const HParsedToken *h_act_index(int i, const HParseResult *p) +{ + if(!p) return NULL; + + const HParsedToken *tok = p->ast; + + if(!tok || tok->token_type != TT_SEQUENCE) + return NULL; + + const HCountedArray *seq = tok->seq; + size_t n = seq->used; + + if(i<0 || (size_t)i>=n) + return NULL; + else + return tok->seq->elements[i]; +} + +// Action version of h_seq_flatten. +const HParsedToken *h_act_flatten(const HParseResult *p) { + return h_seq_flatten(p->arena, p->ast); +} + +// Low-level helper for the h_make family. +HParsedToken *h_make_(HArena *arena, HTokenType type) +{ + HParsedToken *ret = h_arena_malloc(arena, sizeof(HParsedToken)); + ret->token_type = type; + return ret; +} + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value) +{ + assert(type >= TT_USER); + HParsedToken *ret = h_make_(arena, type); + ret->user = value; + return ret; +} + +HParsedToken *h_make_seq(HArena *arena) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new(arena); + return ret; +} + +HParsedToken *h_make_seqn(HArena *arena, size_t n) +{ + HParsedToken *ret = h_make_(arena, TT_SEQUENCE); + ret->seq = h_carray_new_sized(arena, n); + return ret; +} + +HParsedToken *h_make_bytes(HArena *arena, size_t len) +{ + HParsedToken *ret = h_make_(arena, TT_BYTES); + ret->bytes.len = len; + ret->bytes.token = h_arena_malloc(arena, len); + return ret; +} + +HParsedToken *h_make_sint(HArena *arena, int64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_SINT); + ret->sint = val; + return ret; +} + +HParsedToken *h_make_uint(HArena *arena, uint64_t val) +{ + HParsedToken *ret = h_make_(arena, TT_UINT); + ret->uint = val; + return ret; +} + +// XXX -> internal +HParsedToken *h_carray_index(const HCountedArray *a, size_t i) +{ + assert(i < a->used); + return a->elements[i]; +} + +size_t h_seq_len(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->used; +} + +HParsedToken **h_seq_elements(const HParsedToken *p) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return p->seq->elements; +} + +HParsedToken *h_seq_index(const HParsedToken *p, size_t i) +{ + assert(p != NULL); + assert(p->token_type == TT_SEQUENCE); + return h_carray_index(p->seq, i); +} + +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...) +{ + va_list va; + + va_start(va, i); + HParsedToken *ret = h_seq_index_vpath(p, i, va); + va_end(va); + + return ret; +} + +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va) +{ + HParsedToken *ret = h_seq_index(p, i); + int j; + + while((j = va_arg(va, int)) >= 0) + ret = h_seq_index(p, j); + + return ret; +} + +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + + h_carray_append(xs->seq, (HParsedToken *)x); +} + +void h_seq_append(HParsedToken *xs, const HParsedToken *ys) +{ + assert(xs != NULL); + assert(xs->token_type == TT_SEQUENCE); + assert(ys != NULL); + assert(ys->token_type == TT_SEQUENCE); + + for(size_t i=0; iseq->used; i++) + h_carray_append(xs->seq, ys->seq->elements[i]); +} + +// Flatten nested sequences. Always returns a sequence. +// If input element is not a sequence, returns it as a singleton sequence. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p) +{ + assert(p != NULL); + + HParsedToken *ret = h_make_seq(arena); + switch(p->token_type) { + case TT_SEQUENCE: + // Flatten and append all. + for(size_t i; iseq->used; i++) { + h_seq_append(ret, h_seq_flatten(arena, h_seq_index(p, i))); + } + break; + default: + // Make singleton sequence. + h_seq_snoc(ret, p); + break; + } + + return ret; +} diff --git a/src/glue.h b/src/glue.h new file mode 100644 index 0000000..3125ae0 --- /dev/null +++ b/src/glue.h @@ -0,0 +1,253 @@ +// +// API additions for writing grammar and semantic actions more concisely +// +// +// Quick Overview: +// +// Grammars can be succinctly specified with the family of H_RULE macros. +// H_RULE defines a plain parser variable. H_ARULE additionally attaches a +// semantic action; H_VRULE attaches a validation. H_AVRULE and H_VARULE +// combine both. +// +// A few standard semantic actions are defined below. The H_ACT_APPLY macro +// allows semantic actions to be defined by "partial application" of +// a generic action to fixed paramters. +// +// The definition of more complex semantic actions will usually consist of +// extracting data from the given parse tree and constructing a token of custom +// type to represent the result. A number of functions and convenience macros +// are provided to capture the most common cases and idioms. +// +// See the leading comment blocks on the sections below for more details. +// + +#ifndef HAMMER_GLUE__H +#define HAMMER_GLUE__H + +#include +#include "hammer.h" + + +// +// Grammar specification +// +// H_RULE is simply a short-hand for the typical declaration and definition of +// a parser variable. See its plain definition below. The goal is to save +// horizontal space as well as to provide a clear and unified look together with +// the other macro variants that stays close to an abstract PEG or BNF grammar. +// The latter goal is more specifically enabled by H_ARULE, H_VRULE, and their +// combinations as they allow the definition of syntax to be given without +// intermingling it with the semantic specifications. +// +// H_ARULE defines a variable just like H_RULE but attaches a semantic action +// to the result of the parser via h_action. The action is expected to be +// named act_. +// +// H_VRULE is analogous to H_ARULE but attaches a validation via h_attr_bool. +// The validation is expected to be named validate_. +// +// H_VARULE combines H_RULE with both an action and a validation. The action is +// attached before the validation, i.e. the validation receives as input the +// result of the action. +// +// H_AVRULE is like H_VARULE but the action is attached outside the validation, +// i.e. the validation receives the uninterpreted AST as input. +// + + +#define H_RULE(rule, def) const HParser *rule = def +#define H_ARULE(rule, def) const HParser *rule = h_action(def, act_ ## rule) +#define H_VRULE(rule, def) const HParser *rule = \ + h_attr_bool(def, validate_ ## rule) +#define H_VARULE(rule, def) const HParser *rule = \ + h_attr_bool(h_action(def, act_ ## rule), validate_ ## rule) +#define H_AVRULE(rule, def) const HParser *rule = \ + h_action(h_attr_bool(def, validate_ ## rule), act_ ## rule) + + +// +// Pre-fab semantic actions +// +// A collection of generally useful semantic actions is provided. +// +// h_act_ignore is the action equivalent of the parser combinator h_ignore. It +// simply causes the AST it is applied to to be replaced with NULL. This most +// importantly causes it to be elided from the result of a surrounding +// h_sequence. +// +// h_act_index is of note as it is not itself suitable to be passed to +// h_action. It is parameterized by an index to be picked from a sequence +// token. It must be wrapped in a proper HAction to be used. The H_ACT_APPLY +// macro provides a concise way to define such a parameter-application wrapper. +// +// h_act_flatten acts on a token of possibly nested sequences by recursively +// flattening it into a single sequence. Cf. h_seq_flatten below. +// +// H_ACT_APPLY implements "partial application" for semantic actions. It +// defines a new action that supplies given parameters to a parameterized +// action such as h_act_index. +// + +const HParsedToken *h_act_ignore(const HParseResult *p); +const HParsedToken *h_act_index(int i, const HParseResult *p); +const HParsedToken *h_act_flatten(const HParseResult *p); + +// Define 'myaction' as a specialization of 'paction' by supplying the leading +// parameters. +#define H_ACT_APPLY(myaction, paction, ...) \ + const HParsedToken *myaction(const HParseResult *p) { \ + return paction(__VA_ARGS__, p); \ + } + + +// +// Working with HParsedTokens +// +// The type HParsedToken represents a dynamically-typed universe of values. +// Declared below are constructors to turn ordinary values into their +// HParsedToken equivalents, extractors to retrieve the original values from +// inside an HParsedToken, and functions that inspect and modify tokens of +// sequence type directly. +// +// In addition, there are a number of short-hand macros that work with some +// conventions to eliminate common boilerplate. These conventions are listed +// below. Be sure to follow them if you want to use the respective macros. +// +// * The single argument to semantic actions should be called 'p'. +// +// The H_MAKE macros suppy 'p->arena' to their underlying h_make +// counterparts. The H_FIELD macros supply 'p->ast' to their underlying +// H_INDEX counterparts. +// +// * For each custom token type, there should be a typedef for the +// corresponding value type. +// +// H_CAST, H_INDEX and H_FIELD cast the void * user field of such a token to +// a pointer to the given type. +// +// * For each custom token type, say 'foo_t', there must be an integer +// constant 'TT_foo_t' to identify the token type. This constant must have a +// value greater or equal than TT_USER. +// +// One idiom is to define an enum for all custom token types and to assign a +// value of TT_USER to the first element. This can be viewed as extending +// the HTokenType enum. +// +// The H_MAKE and H_ASSERT macros derive the name of the token type constant +// from the given type name. +// +// +// The H_ALLOC macro is useful for allocating values of custom token types. +// +// The H_MAKE family of macros construct tokens of a given type. The native +// token types are indicated by a corresponding suffix such as in H_MAKE_SEQ. +// The form with no suffix is used for custom token types. This convention is +// also used for other macro and function families. +// +// The H_ASSERT family simply asserts that a given token has the expected type. +// It mainly serves as an implementation aid for H_CAST. Of note in that regard +// is that, unlike the standard 'assert' macro, these form _expressions_ that +// return the value of their token argument; thus they can be used in a +// "pass-through" fashion inside other expressions. +// +// The H_CAST family combines a type assertion with access to the +// statically-typed value inside a token. +// +// A number of functions h_seq_* operate on and inspect sequence tokens. +// Note that H_MAKE_SEQ takes no arguments and constructs an empty sequence. +// Therefore there are h_seq_snoc and h_seq_append to build up sequences. +// +// The macro families H_FIELD and H_INDEX combine index access on a sequence +// with a cast to the appropriate result type. H_FIELD is used to access the +// elements of the argument token 'p' in an action. H_INDEX allows any sequence +// token to be specified. Both macro families take an arbitrary number of index +// arguments, giving access to elements in nested sequences by path. +// These macros are very useful to avoid spaghetti chains of unchecked pointer +// dereferences. +// + +// Standard short-hand for arena-allocating a variable in a semantic action. +#define H_ALLOC(TYP) ((TYP *) h_arena_malloc(p->arena, sizeof(TYP))) + +// Token constructors... + +HParsedToken *h_make(HArena *arena, HTokenType type, void *value); +HParsedToken *h_make_seq(HArena *arena); // Makes empty sequence. +HParsedToken *h_make_seqn(HArena *arena, size_t n); // Makes empty sequence of expected size n. +HParsedToken *h_make_bytes(HArena *arena, size_t len); +HParsedToken *h_make_sint(HArena *arena, int64_t val); +HParsedToken *h_make_uint(HArena *arena, uint64_t val); + +// Standard short-hands to make tokens in an action. +#define H_MAKE(TYP, VAL) h_make(p->arena, TT_ ## TYP, VAL) +#define H_MAKE_SEQ() h_make_seq(p->arena) +#define H_MAKE_SEQN(N) h_make_seqn(p->arena, N) +#define H_MAKE_BYTES(LEN) h_make_bytes(p->arena, LEN) +#define H_MAKE_SINT(VAL) h_make_sint(p->arena, VAL) +#define H_MAKE_UINT(VAL) h_make_uint(p->arena, VAL) + +// Extract (cast) type-specific value back from HParsedTokens... + +// Pass-through assertion that a given token has the expected type. +#define h_assert_type(T,P) (assert(P->token_type == (HTokenType)T), P) + +// Convenience short-hand forms of h_assert_type. +#define H_ASSERT(TYP, TOK) h_assert_type(TT_ ## TYP, TOK) +#define H_ASSERT_SEQ(TOK) h_assert_type(TT_SEQUENCE, TOK) +#define H_ASSERT_BYTES(TOK) h_assert_type(TT_BYTES, TOK) +#define H_ASSERT_SINT(TOK) h_assert_type(TT_SINT, TOK) +#define H_ASSERT_UINT(TOK) h_assert_type(TT_UINT, TOK) + +// Assert expected type and return contained value. +#define H_CAST(TYP, TOK) ((TYP *) H_ASSERT(TYP, TOK)->user) +#define H_CAST_SEQ(TOK) (H_ASSERT_SEQ(TOK)->seq) +#define H_CAST_BYTES(TOK) (H_ASSERT_BYTES(TOK)->bytes) +#define H_CAST_SINT(TOK) (H_ASSERT_SINT(TOK)->sint) +#define H_CAST_UINT(TOK) (H_ASSERT_UINT(TOK)->uint) + +// Sequence access... + +// Return the length of a sequence. +size_t h_seq_len(const HParsedToken *p); + +// Access a sequence's element array. +HParsedToken **h_seq_elements(const HParsedToken *p); + +// Access a sequence element by index. +HParsedToken *h_seq_index(const HParsedToken *p, size_t i); + +// Access an element in a nested sequence by a path of indices. +HParsedToken *h_seq_index_path(const HParsedToken *p, size_t i, ...); +HParsedToken *h_seq_index_vpath(const HParsedToken *p, size_t i, va_list va); + +// Convenience macros combining (nested) index access and h_cast. +#define H_INDEX(TYP, SEQ, ...) H_CAST(TYP, H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SEQ(SEQ, ...) H_CAST_SEQ(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_BYTES(SEQ, ...) H_CAST_BYTES(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_SINT(SEQ, ...) H_CAST_SINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_UINT(SEQ, ...) H_CAST_UINT(H_INDEX_TOKEN(SEQ, __VA_ARGS__)) +#define H_INDEX_TOKEN(SEQ, ...) h_seq_index_path(SEQ, __VA_ARGS__, -1) + +// Standard short-hand to access and cast elements on a sequence token. +#define H_FIELD(TYP, ...) H_INDEX(TYP, p->ast, __VA_ARGS__) +#define H_FIELD_SEQ(...) H_INDEX_SEQ(p->ast, __VA_ARGS__) +#define H_FIELD_BYTES(...) H_INDEX_BYTES(p->ast, __VA_ARGS__) +#define H_FIELD_SINT(...) H_INDEX_SINT(p->ast, __VA_ARGS__) +#define H_FIELD_UINT(...) H_INDEX_UINT(p->ast, __VA_ARGS__) + +// Lower-level helper for h_seq_index. +HParsedToken *h_carray_index(const HCountedArray *a, size_t i); // XXX -> internal + +// Sequence modification... + +// Add elements to a sequence. +void h_seq_snoc(HParsedToken *xs, const HParsedToken *x); // append one +void h_seq_append(HParsedToken *xs, const HParsedToken *ys); // append many + +// XXX TODO: Remove elements from a sequence. + +// Flatten nested sequences into one. +const HParsedToken *h_seq_flatten(HArena *arena, const HParsedToken *p); + + +#endif diff --git a/src/hammer.c b/src/hammer.c index d1a3334..c33f6c8 100644 --- a/src/hammer.c +++ b/src/hammer.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -26,202 +26,14 @@ #include "allocator.h" #include "parsers/parser_internal.h" -static guint djbhash(const uint8_t *buf, size_t len) { - guint hash = 5381; +static uint32_t djbhash(const uint8_t *buf, size_t len) { + uint32_t hash = 5381; while (len--) { hash = hash * 33 + *buf++; } return hash; } -// short-hand for constructing HCachedResult's -static HCachedResult *cached_result(const HParseState *state, HParseResult *result) { - HCachedResult *ret = a_new(HCachedResult, 1); - ret->result = result; - ret->input_stream = state->input_stream; - return ret; -} - -// Really library-internal tool to perform an uncached parse, and handle any common error-handling. -static inline HParseResult* perform_lowlevel_parse(HParseState *state, const HParser *parser) { - // TODO(thequux): these nested conditions are ugly. Factor this appropriately, so that it is clear which codes is executed when. - HParseResult *tmp_res; - if (parser) { - HInputStream bak = state->input_stream; - tmp_res = parser->vtable->parse(parser->env, state); - if (tmp_res) { - tmp_res->arena = state->arena; - if (!state->input_stream.overrun) { - tmp_res->bit_length = ((state->input_stream.index - bak.index) << 3); - if (state->input_stream.endianness & BIT_BIG_ENDIAN) - tmp_res->bit_length += state->input_stream.bit_offset - bak.bit_offset; - else - tmp_res->bit_length += bak.bit_offset - state->input_stream.bit_offset; - } else - tmp_res->bit_length = 0; - } - } else - tmp_res = NULL; - if (state->input_stream.overrun) - return NULL; // overrun is always failure. -#ifdef CONSISTENCY_CHECK - if (!tmp_res) { - state->input_stream = INVALID; - state->input_stream.input = key->input_pos.input; - } -#endif - return tmp_res; -} - -HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { - HParserCacheValue *cached = g_hash_table_lookup(state->cache, k); - HRecursionHead *head = g_hash_table_lookup(state->recursion_heads, k); - if (!head) { // No heads found - return cached; - } else { // Some heads found - if (!cached && head->head_parser != k->parser && !g_slist_find(head->involved_set, k->parser)) { - // Nothing in the cache, and the key parser is not involved - HParseResult *tmp = a_new(HParseResult, 1); - tmp->ast = NULL; tmp->arena = state->arena; - HParserCacheValue *ret = a_new(HParserCacheValue, 1); - ret->value_type = PC_RIGHT; ret->right = cached_result(state, tmp); - return ret; - } - if (g_slist_find(head->eval_set, k->parser)) { - // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. - head->eval_set = g_slist_remove_all(head->eval_set, k->parser); - HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - // we know that cached has an entry here, modify it - if (!cached) - cached = a_new(HParserCacheValue, 1); - cached->value_type = PC_RIGHT; - cached->right = cached_result(state, tmp_res); - } - return cached; - } -} - -/* Setting up the left recursion. We have the LR for the rule head; - * we modify the involved_sets of all LRs in the stack, until we - * see the current parser again. - */ - -void setupLR(const HParser *p, HParseState *state, HLeftRec *rec_detect) { - if (!rec_detect->head) { - HRecursionHead *some = a_new(HRecursionHead, 1); - some->head_parser = p; some->involved_set = NULL; some->eval_set = NULL; - rec_detect->head = some; - } - size_t i = 0; - HLeftRec *lr = g_queue_peek_nth(state->lr_stack, i); - while (lr && lr->rule != p) { - lr->head = rec_detect->head; - lr->head->involved_set = g_slist_prepend(lr->head->involved_set, (gpointer)lr->rule); - } -} - -/* If recall() returns NULL, we need to store a dummy failure in the cache and compute the - * future parse. - */ - -HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { - // Store the head into the recursion_heads - g_hash_table_replace(state->recursion_heads, k, head); - HParserCacheValue *old_cached = g_hash_table_lookup(state->cache, k); - if (!old_cached || PC_LEFT == old_cached->value_type) - errx(1, "impossible match"); - HParseResult *old_res = old_cached->right->result; - - // reset the eval_set of the head of the recursion at each beginning of growth - head->eval_set = head->involved_set; - HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); - - if (tmp_res) { - if ((old_res->ast->index < tmp_res->ast->index) || - (old_res->ast->index == tmp_res->ast->index && old_res->ast->bit_offset < tmp_res->ast->bit_offset)) { - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, tmp_res); - g_hash_table_replace(state->cache, k, v); - return grow(k, state, head); - } else { - // we're done with growing, we can remove data from the recursion head - g_hash_table_remove(state->recursion_heads, k); - HParserCacheValue *cached = g_hash_table_lookup(state->cache, k); - if (cached && PC_RIGHT == cached->value_type) { - return cached->right->result; - } else { - errx(1, "impossible match"); - } - } - } else { - g_hash_table_remove(state->recursion_heads, k); - return old_res; - } -} - -HParseResult* lr_answer(HParserCacheKey *k, HParseState *state, HLeftRec *growable) { - if (growable->head) { - if (growable->head->head_parser != k->parser) { - // not the head rule, so not growing - return growable->seed; - } - else { - // update cache - HParserCacheValue *v = a_new(HParserCacheValue, 1); - v->value_type = PC_RIGHT; v->right = cached_result(state, growable->seed); - g_hash_table_replace(state->cache, k, v); - if (!growable->seed) - return NULL; - else - return grow(k, state, growable->head); - } - } else { - errx(1, "lrAnswer with no head"); - } -} - -/* Warth's recursion. Hi Alessandro! */ -HParseResult* h_do_parse(const HParser* parser, HParseState *state) { - HParserCacheKey *key = a_new(HParserCacheKey, 1); - key->input_pos = state->input_stream; key->parser = parser; - HParserCacheValue *m = recall(key, state); - // check to see if there is already a result for this object... - if (!m) { - // It doesn't exist, so create a dummy result to cache - HLeftRec *base = a_new(HLeftRec, 1); - base->seed = NULL; base->rule = parser; base->head = NULL; - g_queue_push_head(state->lr_stack, base); - // cache it - HParserCacheValue *dummy = a_new(HParserCacheValue, 1); - dummy->value_type = PC_LEFT; dummy->left = base; - g_hash_table_replace(state->cache, key, dummy); - // parse the input - HParseResult *tmp_res = perform_lowlevel_parse(state, parser); - // the base variable has passed equality tests with the cache - g_queue_pop_head(state->lr_stack); - // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one - if (NULL == base->head) { - HParserCacheValue *right = a_new(HParserCacheValue, 1); - right->value_type = PC_RIGHT; right->right = cached_result(state, tmp_res); - g_hash_table_replace(state->cache, key, right); - return tmp_res; - } else { - base->seed = tmp_res; - HParseResult *res = lr_answer(key, state, base); - return res; - } - } else { - // it exists! - if (PC_LEFT == m->value_type) { - setupLR(parser, state, m->left); - return m->left->seed; // BUG: this might not be correct - } else { - state->input_stream = m->right->input_stream; - return m->right->result; - } - } -} - /* Helper function, since these lines appear in every parser */ typedef struct { @@ -230,35 +42,38 @@ typedef struct { } HTwoParsers; -static guint cache_key_hash(gconstpointer key) { +static uint32_t cache_key_hash(const void* key) { return djbhash(key, sizeof(HParserCacheKey)); } -static gboolean cache_key_equal(gconstpointer key1, gconstpointer key2) { +static bool cache_key_equal(const void* key1, const void* key2) { return memcmp(key1, key2, sizeof(HParserCacheKey)) == 0; } -HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { +HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length) { + return h_parse__m(&system_allocator, parser, input, length); +} +HParseResult* h_parse__m(HAllocator* mm__, const HParser* parser, const uint8_t* input, size_t length) { // Set up a parse state... - HArena * arena = h_new_arena(0); + HArena * arena = h_new_arena(mm__, 0); HParseState *parse_state = a_new_(arena, HParseState, 1); - parse_state->cache = g_hash_table_new(cache_key_hash, // hash_func - cache_key_equal);// key_equal_func + parse_state->cache = h_hashtable_new(arena, cache_key_equal, // key_equal_func + cache_key_hash); // hash_func parse_state->input_stream.input = input; parse_state->input_stream.index = 0; parse_state->input_stream.bit_offset = 8; // bit big endian parse_state->input_stream.overrun = 0; parse_state->input_stream.endianness = BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN; parse_state->input_stream.length = length; - parse_state->lr_stack = g_queue_new(); - parse_state->recursion_heads = g_hash_table_new(cache_key_hash, - cache_key_equal); + parse_state->lr_stack = h_slist_new(arena); + parse_state->recursion_heads = h_hashtable_new(arena, cache_key_equal, + cache_key_hash); parse_state->arena = arena; HParseResult *res = h_do_parse(parser, parse_state); - g_queue_free(parse_state->lr_stack); - g_hash_table_destroy(parse_state->recursion_heads); + h_slist_free(parse_state->lr_stack); + h_hashtable_free(parse_state->recursion_heads); // tear down the parse state - g_hash_table_destroy(parse_state->cache); + h_hashtable_free(parse_state->cache); if (!res) h_delete_arena(parse_state->arena); @@ -269,405 +84,4 @@ void h_parse_result_free(HParseResult *result) { h_delete_arena(result->arena); } -#ifdef INCLUDE_TESTS -#include "test_suite.h" -static void test_token(void) { - const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3); - - g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>"); - g_check_parse_failed(token_, "95", 2); -} - -static void test_ch(void) { - const HParser *ch_ = h_ch(0xa2); - - g_check_parse_ok(ch_, "\xa2", 1, "u0xa2"); - g_check_parse_failed(ch_, "\xa3", 1); -} - -static void test_ch_range(void) { - const HParser *range_ = h_ch_range('a', 'c'); - - g_check_parse_ok(range_, "b", 1, "u0x62"); - g_check_parse_failed(range_, "d", 1); -} - -//@MARK_START -static void test_int64(void) { - const HParser *int64_ = h_int64(); - - g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000"); - g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7); -} - -static void test_int32(void) { - const HParser *int32_ = h_int32(); - - g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000"); - g_check_parse_failed(int32_, "\xff\xfe\x00", 3); -} - -static void test_int16(void) { - const HParser *int16_ = h_int16(); - - g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200"); - g_check_parse_failed(int16_, "\xfe", 1); -} - -static void test_int8(void) { - const HParser *int8_ = h_int8(); - - g_check_parse_ok(int8_, "\x88", 1, "s-0x78"); - g_check_parse_failed(int8_, "", 0); -} - -static void test_uint64(void) { - const HParser *uint64_ = h_uint64(); - - g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000"); - g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7); -} - -static void test_uint32(void) { - const HParser *uint32_ = h_uint32(); - - g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000"); - g_check_parse_failed(uint32_, "\x00\x02\x00", 3); -} - -static void test_uint16(void) { - const HParser *uint16_ = h_uint16(); - - g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200"); - g_check_parse_failed(uint16_, "\x02", 1); -} - -static void test_uint8(void) { - const HParser *uint8_ = h_uint8(); - - g_check_parse_ok(uint8_, "\x78", 1, "u0x78"); - g_check_parse_failed(uint8_, "", 0); -} -//@MARK_END - -static void test_int_range(void) { - const HParser *int_range_ = h_int_range(h_uint8(), 3, 10); - - g_check_parse_ok(int_range_, "\x05", 1, "u0x5"); - g_check_parse_failed(int_range_, "\xb", 1); -} - -#if 0 -static void test_float64(void) { - const HParser *float64_ = h_float64(); - - g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0); - g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7); -} - -static void test_float32(void) { - const HParser *float32_ = h_float32(); - - g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0); - g_check_parse_failed(float32_, "\x3f\x80\x00"); -} -#endif - - -static void test_whitespace(void) { - const HParser *whitespace_ = h_whitespace(h_ch('a')); - - g_check_parse_ok(whitespace_, "a", 1, "u0x61"); - g_check_parse_ok(whitespace_, " a", 2, "u0x61"); - g_check_parse_ok(whitespace_, " a", 3, "u0x61"); - g_check_parse_ok(whitespace_, "\ta", 2, "u0x61"); - g_check_parse_failed(whitespace_, "_a", 2); -} - -static void test_left(void) { - const HParser *left_ = h_left(h_ch('a'), h_ch(' ')); - - g_check_parse_ok(left_, "a ", 2, "u0x61"); - g_check_parse_failed(left_, "a", 1); - g_check_parse_failed(left_, " ", 1); - g_check_parse_failed(left_, "ab", 2); -} - -static void test_right(void) { - const HParser *right_ = h_right(h_ch(' '), h_ch('a')); - - g_check_parse_ok(right_, " a", 2, "u0x61"); - g_check_parse_failed(right_, "a", 1); - g_check_parse_failed(right_, " ", 1); - g_check_parse_failed(right_, "ba", 2); -} - -static void test_middle(void) { - const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' ')); - - g_check_parse_ok(middle_, " a ", 3, "u0x61"); - g_check_parse_failed(middle_, "a", 1); - g_check_parse_failed(middle_, " ", 1); - g_check_parse_failed(middle_, " a", 2); - g_check_parse_failed(middle_, "a ", 2); - g_check_parse_failed(middle_, " b ", 3); - g_check_parse_failed(middle_, "ba ", 3); - g_check_parse_failed(middle_, " ab", 3); -} - -#include - -const HParsedToken* upcase(const HParseResult *p) { - switch(p->ast->token_type) { - case TT_SEQUENCE: - { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); - HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used); - ret->token_type = TT_SEQUENCE; - for (size_t i=0; iast->seq->used; ++i) { - if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) { - HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1); - tmp->token_type = TT_UINT; - tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint); - h_carray_append(seq, tmp); - } else { - h_carray_append(seq, p->ast->seq->elements[i]); - } - } - ret->seq = seq; - return (const HParsedToken*)ret; - } - case TT_UINT: - { - HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); - ret->token_type = TT_UINT; - ret->uint = toupper(p->ast->uint); - return (const HParsedToken*)ret; - } - default: - return p->ast; - } -} - -static void test_action(void) { - const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'), - h_ch('A'), - NULL), - h_choice(h_ch('b'), - h_ch('B'), - NULL), - NULL), - upcase); - - g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)"); - g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)"); - g_check_parse_failed(action_, "XX", 2); -} - -static void test_in(void) { - uint8_t options[3] = { 'a', 'b', 'c' }; - const HParser *in_ = h_in(options, 3); - g_check_parse_ok(in_, "b", 1, "u0x62"); - g_check_parse_failed(in_, "d", 1); - -} - -static void test_not_in(void) { - uint8_t options[3] = { 'a', 'b', 'c' }; - const HParser *not_in_ = h_not_in(options, 3); - g_check_parse_ok(not_in_, "d", 1, "u0x64"); - g_check_parse_failed(not_in_, "a", 1); - -} - -static void test_end_p(void) { - const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL); - g_check_parse_ok(end_p_, "a", 1, "(u0x61)"); - g_check_parse_failed(end_p_, "aa", 2); -} - -static void test_nothing_p(void) { - const HParser *nothing_p_ = h_nothing_p(); - g_check_parse_failed(nothing_p_, "a", 1); -} - -static void test_sequence(void) { - const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL); - const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL); - - g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)"); - g_check_parse_failed(sequence_1, "a", 1); - g_check_parse_failed(sequence_1, "b", 1); - g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)"); - g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)"); - g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)"); -} - -static void test_choice(void) { - const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL); - - g_check_parse_ok(choice_, "a", 1, "u0x61"); - g_check_parse_ok(choice_, "b", 1, "u0x62"); - g_check_parse_failed(choice_, "c", 1); -} - -static void test_butnot(void) { - const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2)); - const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6')); - - g_check_parse_ok(butnot_1, "a", 1, "u0x61"); - g_check_parse_failed(butnot_1, "ab", 2); - g_check_parse_ok(butnot_1, "aa", 2, "u0x61"); - g_check_parse_failed(butnot_2, "6", 1); -} - -static void test_difference(void) { - const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a')); - - g_check_parse_ok(difference_, "ab", 2, "<61.62>"); - g_check_parse_failed(difference_, "a", 1); -} - -static void test_xor(void) { - const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9')); - - g_check_parse_ok(xor_, "0", 1, "u0x30"); - g_check_parse_ok(xor_, "9", 1, "u0x39"); - g_check_parse_failed(xor_, "5", 1); - g_check_parse_failed(xor_, "a", 1); -} - -static void test_many(void) { - const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL)); - g_check_parse_ok(many_, "adef", 4, "(u0x61)"); - g_check_parse_ok(many_, "bdef", 4, "(u0x62)"); - g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); - g_check_parse_ok(many_, "daabbabadef", 11, "()"); -} - -static void test_many1(void) { - const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL)); - - g_check_parse_ok(many1_, "adef", 4, "(u0x61)"); - g_check_parse_ok(many1_, "bdef", 4, "(u0x62)"); - g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); - g_check_parse_failed(many1_, "daabbabadef", 11); -} - -static void test_repeat_n(void) { - const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2); - - g_check_parse_failed(repeat_n_, "adef", 4); - g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)"); - g_check_parse_failed(repeat_n_, "dabdef", 6); -} - -static void test_optional(void) { - const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL); - - g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)"); - g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)"); - g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)"); - g_check_parse_failed(optional_, "aed", 3); - g_check_parse_failed(optional_, "ab", 2); - g_check_parse_failed(optional_, "ac", 2); -} - -static void test_ignore(void) { - const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL); - - g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)"); - g_check_parse_failed(ignore_, "ac", 2); -} - -static void test_sepBy1(void) { - const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); - - g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)"); - g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)"); - g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)"); - g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)"); -} - -static void test_epsilon_p(void) { - const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL); - const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL); - const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL); - - g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)"); - g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)"); - g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)"); -} - -static void test_attr_bool(void) { - -} - -static void test_and(void) { - const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL); - const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL); - const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL); - - g_check_parse_ok(and_1, "0", 1, "(u0x30)"); - g_check_parse_failed(and_2, "0", 1); - g_check_parse_ok(and_3, "12", 2, "(u0x31)"); -} - -static void test_not(void) { - const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL); - const HParser *not_2 = h_sequence(h_ch('a'), - h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL), - h_token((const uint8_t*)"++", 2), - NULL), h_ch('b'), NULL); - - g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)"); - g_check_parse_failed(not_1, "a++b", 4); - g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)"); - g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)"); -} - -void register_parser_tests(void) { - g_test_add_func("/core/parser/token", test_token); - g_test_add_func("/core/parser/ch", test_ch); - g_test_add_func("/core/parser/ch_range", test_ch_range); - g_test_add_func("/core/parser/int64", test_int64); - g_test_add_func("/core/parser/int32", test_int32); - g_test_add_func("/core/parser/int16", test_int16); - g_test_add_func("/core/parser/int8", test_int8); - g_test_add_func("/core/parser/uint64", test_uint64); - g_test_add_func("/core/parser/uint32", test_uint32); - g_test_add_func("/core/parser/uint16", test_uint16); - g_test_add_func("/core/parser/uint8", test_uint8); - g_test_add_func("/core/parser/int_range", test_int_range); -#if 0 - g_test_add_func("/core/parser/float64", test_float64); - g_test_add_func("/core/parser/float32", test_float32); -#endif - g_test_add_func("/core/parser/whitespace", test_whitespace); - g_test_add_func("/core/parser/left", test_left); - g_test_add_func("/core/parser/right", test_right); - g_test_add_func("/core/parser/middle", test_middle); - g_test_add_func("/core/parser/action", test_action); - g_test_add_func("/core/parser/in", test_in); - g_test_add_func("/core/parser/not_in", test_not_in); - g_test_add_func("/core/parser/end_p", test_end_p); - g_test_add_func("/core/parser/nothing_p", test_nothing_p); - g_test_add_func("/core/parser/sequence", test_sequence); - g_test_add_func("/core/parser/choice", test_choice); - g_test_add_func("/core/parser/butnot", test_butnot); - g_test_add_func("/core/parser/difference", test_difference); - g_test_add_func("/core/parser/xor", test_xor); - g_test_add_func("/core/parser/many", test_many); - g_test_add_func("/core/parser/many1", test_many1); - g_test_add_func("/core/parser/repeat_n", test_repeat_n); - g_test_add_func("/core/parser/optional", test_optional); - g_test_add_func("/core/parser/sepBy1", test_sepBy1); - g_test_add_func("/core/parser/epsilon_p", test_epsilon_p); - g_test_add_func("/core/parser/attr_bool", test_attr_bool); - g_test_add_func("/core/parser/and", test_and); - g_test_add_func("/core/parser/not", test_not); - g_test_add_func("/core/parser/ignore", test_ignore); -} - -#endif // #ifdef INCLUDE_TESTS diff --git a/src/hammer.h b/src/hammer.h index 4f21e29..d1e8688 100644 --- a/src/hammer.h +++ b/src/hammer.h @@ -17,7 +17,7 @@ #ifndef HAMMER_HAMMER__H #define HAMMER_HAMMER__H -#include +#include #include #include #include "allocator.h" @@ -31,6 +31,12 @@ typedef int bool; typedef struct HParseState_ HParseState; +typedef enum HParserBackend_ { + PB_MIN = 0, + PB_PACKRAT = PB_MIN, // PB_MIN is always the default. + PB_MAX +} HParserBackend; + typedef enum HTokenType_ { // Before you change the explicit values of these, think of the poor bindings ;_; TT_NONE = 1, @@ -39,8 +45,7 @@ typedef enum HTokenType_ { TT_UINT = 8, TT_SEQUENCE = 16, TT_ERR = 32, - TT_USER = 64, - TT_MAX = 128 + TT_USER = 64 } HTokenType; typedef struct HCountedArray_ { @@ -50,13 +55,15 @@ typedef struct HCountedArray_ { struct HParsedToken_ **elements; } HCountedArray; +typedef struct HBytes_ { + const uint8_t *token; + size_t len; +} HBytes; + typedef struct HParsedToken_ { HTokenType token_type; union { - struct { - const uint8_t *token; - size_t len; - } bytes; + HBytes bytes; int64_t sint; uint64_t uint; double dbl; @@ -114,18 +121,76 @@ typedef struct HParser_ { void *env; } HParser; +// {{{ Stuff for benchmarking +typedef struct HParserTestcase_ { + unsigned char* input; + size_t length; + char* output_unambiguous; +} HParserTestcase; + +typedef struct HCaseResult_ { + bool success; + union { + const char* actual_results; // on failure, filled in with the results of h_write_result_unamb + size_t parse_time; // on success, filled in with time for a single parse, in nsec + }; +} HCaseResult; + +typedef struct HBackendResults_ { + HParserBackend backend; + bool compile_success; + size_t n_testcases; + size_t failed_testcases; // actually a count... + HCaseResult *cases; +} HBackendResults; + +typedef struct HBenchmarkResults_ { + size_t len; + HBackendResults *results; +} HBenchmarkResults; +// }}} + +// {{{ Preprocessor definitions +#define HAMMER_FN_DECL_NOARG(rtype_t, name) \ + rtype_t name(void); \ + rtype_t name##__m(HAllocator* mm__) + +#define HAMMER_FN_DECL(rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__); \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) + +#define HAMMER_FN_DECL_ATTR(attr, rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__) attr; \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__) attr + +#define HAMMER_FN_DECL_VARARGS(rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__, ...); \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...); \ + rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \ + rtype_t name##__v(__VA_ARGS__, va_list ap) + +// Note: this drops the attributes on the floor for the __v versions +#define HAMMER_FN_DECL_VARARGS_ATTR(attr, rtype_t, name, ...) \ + rtype_t name(__VA_ARGS__, ...) attr; \ + rtype_t name##__m(HAllocator* mm__, __VA_ARGS__, ...) attr; \ + rtype_t name##__mv(HAllocator* mm__, __VA_ARGS__, va_list ap); \ + rtype_t name##__v(__VA_ARGS__, va_list ap) + +// }}} + + /** * Top-level function to call a parser that has been built over some * piece of input (of known size). */ -HParseResult* h_parse(const HParser* parser, const uint8_t* input, size_t length); +HAMMER_FN_DECL(HParseResult*, h_parse, const HParser* parser, const uint8_t* input, size_t length); /** * Given a string, returns a parser that parses that string value. * * Result token type: TT_BYTES */ -const HParser* h_token(const uint8_t *str, const size_t len); +HAMMER_FN_DECL(const HParser*, h_token, const uint8_t *str, const size_t len); /** * Given a single character, returns a parser that parses that @@ -133,7 +198,7 @@ const HParser* h_token(const uint8_t *str, const size_t len); * * Result token type: TT_UINT */ -const HParser* h_ch(const uint8_t c); +HAMMER_FN_DECL(const HParser*, h_ch, const uint8_t c); /** * Given two single-character bounds, lower and upper, returns a parser @@ -142,14 +207,14 @@ const HParser* h_ch(const uint8_t c); * * Result token type: TT_UINT */ -const HParser* h_ch_range(const uint8_t lower, const uint8_t upper); +HAMMER_FN_DECL(const HParser*, h_ch_range, const uint8_t lower, const uint8_t upper); /** * Given an integer parser, p, and two integer bounds, lower and upper, * returns a parser that parses an integral value within the range * [lower, upper] (inclusive). */ -const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper); +HAMMER_FN_DECL(const HParser*, h_int_range, const HParser *p, const int64_t lower, const int64_t upper); /** * Returns a parser that parses the specified number of bits. sign == @@ -157,63 +222,63 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t * * Result token type: TT_SINT if sign == true, TT_UINT if sign == false */ -const HParser* h_bits(size_t len, bool sign); +HAMMER_FN_DECL(const HParser*, h_bits, size_t len, bool sign); /** * Returns a parser that parses a signed 8-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int64(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int64); /** * Returns a parser that parses a signed 4-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int32(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int32); /** * Returns a parser that parses a signed 2-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int16(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int16); /** * Returns a parser that parses a signed 1-byte integer value. * * Result token type: TT_SINT */ -const HParser* h_int8(); +HAMMER_FN_DECL_NOARG(const HParser*, h_int8); /** * Returns a parser that parses an unsigned 8-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint64(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint64); /** * Returns a parser that parses an unsigned 4-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint32(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint32); /** * Returns a parser that parses an unsigned 2-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint16(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint16); /** * Returns a parser that parses an unsigned 1-byte integer value. * * Result token type: TT_UINT */ -const HParser* h_uint8(); +HAMMER_FN_DECL_NOARG(const HParser*, h_uint8); /** * Given another parser, p, returns a parser that skips any whitespace @@ -221,7 +286,7 @@ const HParser* h_uint8(); * * Result token type: p's result type */ -const HParser* h_whitespace(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_whitespace, const HParser* p); /** * Given two parsers, p and q, returns a parser that parses them in @@ -229,7 +294,7 @@ const HParser* h_whitespace(const HParser* p); * * Result token type: p's result type */ -const HParser* h_left(const HParser* p, const HParser* q); +HAMMER_FN_DECL(const HParser*, h_left, const HParser* p, const HParser* q); /** * Given two parsers, p and q, returns a parser that parses them in @@ -237,7 +302,7 @@ const HParser* h_left(const HParser* p, const HParser* q); * * Result token type: q's result type */ -const HParser* h_right(const HParser* p, const HParser* q); +HAMMER_FN_DECL(const HParser*, h_right, const HParser* p, const HParser* q); /** * Given three parsers, p, x, and q, returns a parser that parses them in @@ -245,7 +310,7 @@ const HParser* h_right(const HParser* p, const HParser* q); * * Result token type: x's result type */ -const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q); +HAMMER_FN_DECL(const HParser*, h_middle, const HParser* p, const HParser* x, const HParser* q); /** * Given another parser, p, and a function f, returns a parser that @@ -253,21 +318,21 @@ const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q); * * Result token type: any */ -const HParser* h_action(const HParser* p, const HAction a); +HAMMER_FN_DECL(const HParser*, h_action, const HParser* p, const HAction a); /** * Parse a single character in the given charset. * * Result token type: TT_UINT */ -const HParser* h_in(const uint8_t *charset, size_t length); +HAMMER_FN_DECL(const HParser*, h_in, const uint8_t *charset, size_t length); /** * Parse a single character *NOT* in the given charset. * * Result token type: TT_UINT */ -const HParser* h_not_in(const uint8_t *charset, size_t length); +HAMMER_FN_DECL(const HParser*, h_not_in, const uint8_t *charset, size_t length); /** * A no-argument parser that succeeds if there is no more input to @@ -275,14 +340,14 @@ const HParser* h_not_in(const uint8_t *charset, size_t length); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_end_p(); +HAMMER_FN_DECL_NOARG(const HParser*, h_end_p); /** * This parser always fails. * * Result token type: NULL. Always. */ -const HParser* h_nothing_p(); +HAMMER_FN_DECL_NOARG(const HParser*, h_nothing_p); /** * Given a null-terminated list of parsers, apply each parser in order. @@ -290,7 +355,7 @@ const HParser* h_nothing_p(); * * Result token type: TT_SEQUENCE */ -const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel)); +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_sequence, const HParser* p); /** * Given an array of parsers, p_array, apply each parser in order. The @@ -299,7 +364,7 @@ const HParser* h_sequence(const HParser* p, ...) __attribute__((sentinel)); * * Result token type: The type of the first successful parser's result. */ -const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel)); +HAMMER_FN_DECL_VARARGS_ATTR(__attribute__((sentinel)), const HParser*, h_choice, const HParser* p); /** * Given two parsers, p1 and p2, this parser succeeds in the following @@ -309,7 +374,7 @@ const HParser* h_choice(const HParser* p, ...) __attribute__((sentinel)); * * Result token type: p1's result type. */ -const HParser* h_butnot(const HParser* p1, const HParser* p2); +HAMMER_FN_DECL(const HParser*, h_butnot, const HParser* p1, const HParser* p2); /** * Given two parsers, p1 and p2, this parser succeeds in the following @@ -319,7 +384,7 @@ const HParser* h_butnot(const HParser* p1, const HParser* p2); * * Result token type: p1's result type. */ -const HParser* h_difference(const HParser* p1, const HParser* p2); +HAMMER_FN_DECL(const HParser*, h_difference, const HParser* p1, const HParser* p2); /** * Given two parsers, p1 and p2, this parser succeeds if *either* p1 or @@ -327,7 +392,7 @@ const HParser* h_difference(const HParser* p1, const HParser* p2); * * Result token type: The type of the result of whichever parser succeeded. */ -const HParser* h_xor(const HParser* p1, const HParser* p2); +HAMMER_FN_DECL(const HParser*, h_xor, const HParser* p1, const HParser* p2); /** * Given a parser, p, this parser succeeds for zero or more repetitions @@ -335,7 +400,7 @@ const HParser* h_xor(const HParser* p1, const HParser* p2); * * Result token type: TT_SEQUENCE */ -const HParser* h_many(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_many, const HParser* p); /** * Given a parser, p, this parser succeeds for one or more repetitions @@ -343,7 +408,7 @@ const HParser* h_many(const HParser* p); * * Result token type: TT_SEQUENCE */ -const HParser* h_many1(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_many1, const HParser* p); /** * Given a parser, p, this parser succeeds for exactly N repetitions @@ -351,7 +416,7 @@ const HParser* h_many1(const HParser* p); * * Result token type: TT_SEQUENCE */ -const HParser* h_repeat_n(const HParser* p, const size_t n); +HAMMER_FN_DECL(const HParser*, h_repeat_n, const HParser* p, const size_t n); /** * Given a parser, p, this parser succeeds with the value p parsed or @@ -359,7 +424,7 @@ const HParser* h_repeat_n(const HParser* p, const size_t n); * * Result token type: If p succeeded, the type of its result; if not, TT_NONE. */ -const HParser* h_optional(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_optional, const HParser* p); /** * Given a parser, p, this parser succeeds if p succeeds, but doesn't @@ -367,7 +432,7 @@ const HParser* h_optional(const HParser* p); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_ignore(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_ignore, const HParser* p); /** * Given a parser, p, and a parser for a separator, sep, this parser @@ -378,7 +443,7 @@ const HParser* h_ignore(const HParser* p); * * Result token type: TT_SEQUENCE */ -const HParser* h_sepBy(const HParser* p, const HParser* sep); +HAMMER_FN_DECL(const HParser*, h_sepBy, const HParser* p, const HParser* sep); /** * Given a parser, p, and a parser for a separator, sep, this parser matches a list of things that p can parse, separated by sep. Unlike sepBy, this ensures that the result has at least one element. @@ -386,14 +451,14 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep); * * Result token type: TT_SEQUENCE */ -const HParser* h_sepBy1(const HParser* p, const HParser* sep); +HAMMER_FN_DECL(const HParser*, h_sepBy1, const HParser* p, const HParser* sep); /** * This parser always returns a zero length match, i.e., empty string. * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_epsilon_p(); +HAMMER_FN_DECL_NOARG(const HParser*, h_epsilon_p); /** * This parser applies its first argument to read an unsigned integer @@ -404,7 +469,7 @@ const HParser* h_epsilon_p(); * * Result token type: TT_SEQUENCE */ -const HParser* h_length_value(const HParser* length, const HParser* value); +HAMMER_FN_DECL(const HParser*, h_length_value, const HParser* length, const HParser* value); /** * This parser attaches a predicate function, which returns true or @@ -419,7 +484,7 @@ const HParser* h_length_value(const HParser* length, const HParser* value); * * Result token type: p's result type if pred succeeded, NULL otherwise. */ -const HParser* h_attr_bool(const HParser* p, HPredicate pred); +HAMMER_FN_DECL(const HParser*, h_attr_bool, const HParser* p, HPredicate pred); /** * The 'and' parser asserts that a conditional syntax is satisfied, @@ -436,7 +501,7 @@ const HParser* h_attr_bool(const HParser* p, HPredicate pred); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_and(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_and, const HParser* p); /** * The 'not' parser asserts that a conditional syntax is *not* @@ -456,7 +521,7 @@ const HParser* h_and(const HParser* p); * * Result token type: None. The HParseResult exists but its AST is NULL. */ -const HParser* h_not(const HParser* p); +HAMMER_FN_DECL(const HParser*, h_not, const HParser* p); /** * Create a parser that just calls out to another, as yet unknown, @@ -467,35 +532,44 @@ const HParser* h_not(const HParser* p); * Result token type: the type of whatever parser is bound to it with * bind_indirect(). */ -HParser *h_indirect(); +HAMMER_FN_DECL_NOARG(HParser*, h_indirect); /** * Set the inner parser of an indirect. See comments on indirect for * details. */ -void h_bind_indirect(HParser* indirect, const HParser* inner); +HAMMER_FN_DECL(void, h_bind_indirect, HParser* indirect, const HParser* inner); /** * Free the memory allocated to an HParseResult when it is no longer needed. */ -void h_parse_result_free(HParseResult *result); +HAMMER_FN_DECL(void, h_parse_result_free, HParseResult *result); // Some debugging aids /** * Format token into a compact unambiguous form. Useful for parser test cases. * Caller is responsible for freeing the result. */ -char* h_write_result_unamb(const HParsedToken* tok); +HAMMER_FN_DECL(char*, h_write_result_unamb, const HParsedToken* tok); /** * Format token to the given output stream. Indent starting at * [indent] spaces, with [delta] spaces between levels. */ -void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta); +HAMMER_FN_DECL(void, h_pprint, FILE* stream, const HParsedToken* tok, int indent, int delta); + +/** + * Build parse tables for the given parser backend. See the + * documentation for the parser backend in question for information + * about the [params] parameter, or just pass in NULL for the defaults. + * + * Returns -1 if grammar cannot be compiled with the specified options; 0 otherwise. + */ +HAMMER_FN_DECL(int, h_compile, const HParser* parser, HParserBackend backend, const void* params); /** * TODO: Document me */ -HBitWriter *h_bit_writer_new(void); +HBitWriter *h_bit_writer_new(HAllocator* mm__); /** * TODO: Document me @@ -507,11 +581,17 @@ void h_bit_writer_put(HBitWriter* w, unsigned long long data, size_t nbits); * Must not free [w] until you're done with the result. * [len] is in bytes. */ -const uint8_t *h_bit_writer_get_buffer(HBitWriter* w, size_t *len); +const uint8_t* h_bit_writer_get_buffer(HBitWriter* w, size_t *len); /** * TODO: Document me */ void h_bit_writer_free(HBitWriter* w); +// {{{ Benchmark functions +HAMMER_FN_DECL(HBenchmarkResults *, h_benchmark, const HParser* parser, HParserTestcase* testcases); +void h_benchmark_report(FILE* stream, HBenchmarkResults* results); +void h_benchmark_dump_optimized_code(FILE* stream, HBenchmarkResults* results); +// }}} + #endif // #ifndef HAMMER_HAMMER__H diff --git a/src/internal.h b/src/internal.h index cc35a8b..0dcf857 100644 --- a/src/internal.h +++ b/src/internal.h @@ -17,7 +17,6 @@ #ifndef HAMMER_INTERNAL__H #define HAMMER_INTERNAL__H -#include #include #include "hammer.h" @@ -29,9 +28,28 @@ errx(1, "Assertion failed (programmer error): %s", message); \ } while(0) #endif + +#define HAMMER_FN_IMPL_NOARGS(rtype_t, name) \ + rtype_t name(void) { \ + return name##__m(system_allocator); \ + } \ + rtype_t name##__m(HAllocator* mm__) +// Functions with arguments are difficult to forward cleanly. Alas, we will need to forward them manually. + +#define h_new(type, count) ((type*)(mm__->alloc(mm__, sizeof(type)*(count)))) +#define h_free(addr) (mm__->free(mm__, (addr))) + #define false 0 #define true 1 +// This is going to be generally useful. +static inline void h_generic_free(HAllocator *allocator, void* ptr) { + allocator->free(allocator, ptr); +} + +HAllocator system_allocator; + + typedef struct HInputStream_ { // This should be considered to be a really big value type. const uint8_t *input; @@ -42,6 +60,36 @@ typedef struct HInputStream_ { char overrun; } HInputStream; +typedef struct HSlistNode_ { + void* elem; + struct HSlistNode_ *next; +} HSlistNode; + +typedef struct HSlist_ { + HSlistNode *head; + struct HArena_ *arena; +} HSlist; + +typedef unsigned int HHashValue; +typedef HHashValue (*HHashFunc)(const void* key); +typedef bool (*HEqualFunc)(const void* key1, const void* key2); + +typedef struct HHashTableEntry_ { + struct HHashTableEntry_ *next; + void* key; + void* value; + HHashValue hashval; +} HHashTableEntry; + +typedef struct HHashTable_ { + HHashTableEntry *contents; + HHashFunc hashFunc; + HEqualFunc equalFunc; + size_t capacity; + size_t used; + HArena *arena; +} HHashTable; + /* The state of the parser. * * Members: @@ -54,13 +102,19 @@ typedef struct HInputStream_ { */ struct HParseState_ { - GHashTable *cache; + HHashTable *cache; HInputStream input_stream; HArena * arena; - GQueue *lr_stack; - GHashTable *recursion_heads; + HSlist *lr_stack; + HHashTable *recursion_heads; }; +typedef struct HParserBackendVTable_ { + int (*compile)(HAllocator *mm__, const HParser* parser, const void* params); + HParseResult* (*parse)(HAllocator *mm__, const HParser* parser, HParseState* parse_state); +} HParserBackendVTable; + + /* The (location, parser) tuple used to key the cache. */ @@ -90,8 +144,8 @@ typedef enum HParserCacheValueType_ { */ typedef struct HRecursionHead_ { const HParser *head_parser; - GSList *involved_set; - GSList *eval_set; + HSlist *involved_set; + HSlist *eval_set; } HRecursionHead; @@ -125,23 +179,23 @@ typedef struct HParserCacheValue_t { }; } HParserCacheValue; -typedef unsigned int *HCharset; +// This file provides the logical inverse of bitreader.c +struct HBitWriter_ { + uint8_t* buf; + HAllocator *mm__; + size_t index; + size_t capacity; + char bit_offset; // unlike in bit_reader, this is always the number + // of used bits in the current byte. i.e., 0 always + // means that 8 bits are available for use. + char flags; +}; -static inline HCharset new_charset() { - HCharset cs = g_new0(unsigned int, 256 / sizeof(unsigned int)); - return cs; -} +// }}} -static inline int charset_isset(HCharset cs, uint8_t pos) { - return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); -} - -static inline void charset_set(HCharset cs, uint8_t pos, int val) { - cs[pos / sizeof(*cs)] = - val - ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) - : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); -} +// Backends {{{ +extern HParserBackendVTable h__packrat_backend_vtable; +// }}} // TODO(thequux): Set symbol visibility for these functions so that they aren't exported. @@ -154,10 +208,24 @@ HCountedArray *h_carray_new_sized(HArena * arena, size_t size); HCountedArray *h_carray_new(HArena * arena); void h_carray_append(HCountedArray *array, void* item); +HSlist* h_slist_new(HArena *arena); +HSlist* h_slist_copy(HSlist *slist); +void* h_slist_pop(HSlist *slist); +void h_slist_push(HSlist *slist, void* item); +bool h_slist_find(HSlist *slist, const void* item); +HSlist* h_slist_remove_all(HSlist *slist, const void* item); +void h_slist_free(HSlist *slist); + +HHashTable* h_hashtable_new(HArena *arena, HEqualFunc equalFunc, HHashFunc hashFunc); +void* h_hashtable_get(HHashTable* ht, void* key); +void h_hashtable_put(HHashTable* ht, void* key, void* value); +int h_hashtable_present(HHashTable* ht, void* key); +void h_hashtable_del(HHashTable* ht, void* key); +void h_hashtable_free(HHashTable* ht); #if 0 -#include -#define arena_malloc(a, s) malloc(s) +#include +#define h_arena_malloc(a, s) malloc(s) #endif #endif // #ifndef HAMMER_INTERNAL__H diff --git a/src/parsers/action.c b/src/parsers/action.c index 479a840..0f1686f 100644 --- a/src/parsers/action.c +++ b/src/parsers/action.c @@ -23,10 +23,14 @@ static const HParserVtable action_vt = { .parse = parse_action, }; -const HParser* h_action(const HParser* p, const HAction a) { - HParser *res = g_new(HParser, 1); +const HParser* h_action(const HParser* p, const HAction a) { + return h_action__m(&system_allocator, p, a); +} + +const HParser* h_action__m(HAllocator* mm__, const HParser* p, const HAction a) { + HParser *res = h_new(HParser, 1); res->vtable = &action_vt; - HParseAction *env = g_new(HParseAction, 1); + HParseAction *env = h_new(HParseAction, 1); env->p = p; env->action = a; res->env = (void*)env; diff --git a/src/parsers/and.c b/src/parsers/and.c index fb117fb..532cbba 100644 --- a/src/parsers/and.c +++ b/src/parsers/and.c @@ -13,9 +13,13 @@ static const HParserVtable and_vt = { .parse = parse_and, }; + const HParser* h_and(const HParser* p) { + return h_and__m(&system_allocator, p); +} +const HParser* h_and__m(HAllocator* mm__, const HParser* p) { // zero-width postive lookahead - HParser *res = g_new(HParser, 1); + HParser *res = h_new(HParser, 1); res->env = (void*)p; res->vtable = &and_vt; return res; diff --git a/src/parsers/attr_bool.c b/src/parsers/attr_bool.c index bf9e6dc..6fa36f8 100644 --- a/src/parsers/attr_bool.c +++ b/src/parsers/attr_bool.c @@ -21,10 +21,14 @@ static const HParserVtable attr_bool_vt = { .parse = parse_attr_bool, }; -const HParser* h_attr_bool(const HParser* p, HPredicate pred) { - HParser *res = g_new(HParser, 1); + +const HParser* h_attr_bool(const HParser* p, HPredicate pred) { + return h_attr_bool__m(&system_allocator, p, pred); +} +const HParser* h_attr_bool__m(HAllocator* mm__, const HParser* p, HPredicate pred) { + HParser *res = h_new(HParser, 1); res->vtable = &attr_bool_vt; - HAttrBool *env = g_new(HAttrBool, 1); + HAttrBool *env = h_new(HAttrBool, 1); env->p = p; env->pred = pred; res->env = (void*)env; diff --git a/src/parsers/bits.c b/src/parsers/bits.c index 32b7a55..196e33b 100644 --- a/src/parsers/bits.c +++ b/src/parsers/bits.c @@ -20,18 +20,24 @@ static const HParserVtable bits_vt = { .parse = parse_bits, }; const HParser* h_bits(size_t len, bool sign) { - struct bits_env *env = g_new(struct bits_env, 1); + return h_bits__m(&system_allocator, len, sign); +} +const HParser* h_bits__m(HAllocator* mm__, size_t len, bool sign) { + struct bits_env *env = h_new(struct bits_env, 1); env->length = len; env->signedp = sign; - HParser *res = g_new(HParser, 1); + HParser *res = h_new(HParser, 1); res->vtable = &bits_vt; res->env = env; return res; } #define SIZED_BITS(name_pre, len, signedp) \ - const HParser* h_##name_pre##len () { \ - return h_bits(len, signedp); \ + const HParser* h_##name_pre##len () { \ + return h_bits__m(&system_allocator, len, signedp); \ + } \ + const HParser* h_##name_pre##len##__m(HAllocator* mm__) { \ + return h_bits__m(mm__, len, signedp); \ } SIZED_BITS(int, 8, true) SIZED_BITS(int, 16, true) diff --git a/src/parsers/butnot.c b/src/parsers/butnot.c index 5026d79..422e0e1 100644 --- a/src/parsers/butnot.c +++ b/src/parsers/butnot.c @@ -39,10 +39,13 @@ static const HParserVtable butnot_vt = { .parse = parse_butnot, }; -const HParser* h_butnot(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); +const HParser* h_butnot(const HParser* p1, const HParser* p2) { + return h_butnot__m(&system_allocator, p1, p2); +} +const HParser* h_butnot__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { + HTwoParsers *env = h_new(HTwoParsers, 1); env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &butnot_vt; ret->env = (void*)env; return ret; } diff --git a/src/parsers/ch.c b/src/parsers/ch.c index fbfa57e..032731e 100644 --- a/src/parsers/ch.c +++ b/src/parsers/ch.c @@ -1,7 +1,7 @@ #include "parser_internal.h" static HParseResult* parse_ch(void* env, HParseState *state) { - uint8_t c = (uint8_t)GPOINTER_TO_UINT(env); + uint8_t c = (uint8_t)(unsigned long)(env); uint8_t r = (uint8_t)h_read_bits(&state->input_stream, 8, false); if (c == r) { HParsedToken *tok = a_new(HParsedToken, 1); @@ -15,9 +15,13 @@ static HParseResult* parse_ch(void* env, HParseState *state) { static const HParserVtable ch_vt = { .parse = parse_ch, }; -const HParser* h_ch(const uint8_t c) { - HParser *ret = g_new(HParser, 1); + +const HParser* h_ch(const uint8_t c) { + return h_ch__m(&system_allocator, c); +} +const HParser* h_ch__m(HAllocator* mm__, const uint8_t c) { + HParser *ret = h_new(HParser, 1); ret->vtable = &ch_vt; - ret->env = GUINT_TO_POINTER(c); + ret->env = (void*)(unsigned long)(c); return (const HParser*)ret; } diff --git a/src/parsers/charset.c b/src/parsers/charset.c index b9642fc..f11715f 100644 --- a/src/parsers/charset.c +++ b/src/parsers/charset.c @@ -1,5 +1,24 @@ +#include #include "parser_internal.h" +typedef unsigned int *HCharset; + +static inline HCharset new_charset(HAllocator* mm__) { + HCharset cs = h_new(unsigned int, 256 / sizeof(unsigned int)); + memset(cs, 0, 256); + return cs; +} + +static inline int charset_isset(HCharset cs, uint8_t pos) { + return !!(cs[pos / sizeof(*cs)] & (1 << (pos % sizeof(*cs)))); +} + +static inline void charset_set(HCharset cs, uint8_t pos, int val) { + cs[pos / sizeof(*cs)] = + val + ? cs[pos / sizeof(*cs)] | (1 << (pos % sizeof(*cs))) + : cs[pos / sizeof(*cs)] & ~(1 << (pos % sizeof(*cs))); +} static HParseResult* parse_charset(void *env, HParseState *state) { uint8_t in = h_read_bits(&state->input_stream, 8, false); @@ -18,8 +37,11 @@ static const HParserVtable charset_vt = { }; const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { - HParser *ret = g_new(HParser, 1); - HCharset cs = new_charset(); + return h_ch_range__m(&system_allocator, lower, upper); +} +const HParser* h_ch_range__m(HAllocator* mm__, const uint8_t lower, const uint8_t upper) { + HParser *ret = h_new(HParser, 1); + HCharset cs = new_charset(mm__); for (int i = 0; i < 256; i++) charset_set(cs, i, (lower <= i) && (i <= upper)); ret->vtable = &charset_vt; @@ -28,9 +50,9 @@ const HParser* h_ch_range(const uint8_t lower, const uint8_t upper) { } -const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) { - HParser *ret = g_new(HParser, 1); - HCharset cs = new_charset(); +static const HParser* h_in_or_not__m(HAllocator* mm__, const uint8_t *options, size_t count, int val) { + HParser *ret = h_new(HParser, 1); + HCharset cs = new_charset(mm__); for (size_t i = 0; i < 256; i++) charset_set(cs, i, 1-val); for (size_t i = 0; i < count; i++) @@ -42,10 +64,18 @@ const HParser* h_in_or_not(const uint8_t *options, size_t count, int val) { } const HParser* h_in(const uint8_t *options, size_t count) { - return h_in_or_not(options, count, 1); + return h_in_or_not__m(&system_allocator, options, count, 1); +} + +const HParser* h_in__m(HAllocator* mm__, const uint8_t *options, size_t count) { + return h_in_or_not__m(mm__, options, count, 1); } const HParser* h_not_in(const uint8_t *options, size_t count) { - return h_in_or_not(options, count, 0); + return h_in_or_not__m(&system_allocator, options, count, 0); +} + +const HParser* h_not_in__m(HAllocator* mm__, const uint8_t *options, size_t count) { + return h_in_or_not__m(mm__, options, count, 0); } diff --git a/src/parsers/choice.c b/src/parsers/choice.c index 082a2e1..73dedde 100644 --- a/src/parsers/choice.c +++ b/src/parsers/choice.c @@ -1,3 +1,4 @@ +#include #include "parser_internal.h" typedef struct { @@ -25,20 +26,40 @@ static const HParserVtable choice_vt = { }; const HParser* h_choice(const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_choice__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_choice__m(HAllocator* mm__, const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_choice__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_choice__v(const HParser* p, va_list ap) { + return h_choice__mv(&system_allocator, p, ap); +} + +const HParser* h_choice__mv(HAllocator* mm__, const HParser* p, va_list ap_) { va_list ap; size_t len = 0; - HSequence *s = g_new(HSequence, 1); + HSequence *s = h_new(HSequence, 1); const HParser *arg; - va_start(ap, p); + va_copy(ap, ap_); do { len++; arg = va_arg(ap, const HParser *); } while (arg); va_end(ap); - s->p_array = g_new(const HParser *, len); + s->p_array = h_new(const HParser *, len); - va_start(ap, p); + va_copy(ap, ap_); s->p_array[0] = p; for (size_t i = 1; i < len; i++) { s->p_array[i] = va_arg(ap, const HParser *); @@ -46,7 +67,7 @@ const HParser* h_choice(const HParser* p, ...) { va_end(ap); s->len = len; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &choice_vt; ret->env = (void*)s; return ret; } diff --git a/src/parsers/difference.c b/src/parsers/difference.c index 7f167a0..c6cc69c 100644 --- a/src/parsers/difference.c +++ b/src/parsers/difference.c @@ -38,10 +38,13 @@ static HParserVtable difference_vt = { .parse = parse_difference, }; -const HParser* h_difference(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); +const HParser* h_difference(const HParser* p1, const HParser* p2) { + return h_difference__m(&system_allocator, p1, p2); +} +const HParser* h_difference__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { + HTwoParsers *env = h_new(HTwoParsers, 1); env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &difference_vt; ret->env = (void*)env; return ret; } diff --git a/src/parsers/end.c b/src/parsers/end.c index 8e427bd..0f0d1c7 100644 --- a/src/parsers/end.c +++ b/src/parsers/end.c @@ -14,8 +14,13 @@ static const HParserVtable end_vt = { .parse = parse_end, }; -const HParser* h_end_p() { - HParser *ret = g_new(HParser, 1); - ret->vtable = &end_vt; ret->env = NULL; +const HParser* h_end_p() { + return h_end_p__m(&system_allocator); +} + +const HParser* h_end_p__m(HAllocator* mm__) { + HParser *ret = h_new(HParser, 1); + ret->vtable = &end_vt; + ret->env = NULL; return (const HParser*)ret; } diff --git a/src/parsers/epsilon.c b/src/parsers/epsilon.c index dc6d7a6..ae959cd 100644 --- a/src/parsers/epsilon.c +++ b/src/parsers/epsilon.c @@ -20,3 +20,6 @@ static const HParser epsilon_p = { const HParser* h_epsilon_p() { return &epsilon_p; } +const HParser* h_epsilon_p__m(HAllocator* mm__) { + return &epsilon_p; +} diff --git a/src/parsers/ignore.c b/src/parsers/ignore.c index 5972548..03cdc10 100644 --- a/src/parsers/ignore.c +++ b/src/parsers/ignore.c @@ -15,7 +15,10 @@ static const HParserVtable ignore_vt = { }; const HParser* h_ignore(const HParser* p) { - HParser* ret = g_new(HParser, 1); + return h_ignore__m(&system_allocator, p); +} +const HParser* h_ignore__m(HAllocator* mm__, const HParser* p) { + HParser* ret = h_new(HParser, 1); ret->vtable = &ignore_vt; ret->env = (void*)p; return ret; diff --git a/src/parsers/ignoreseq.c b/src/parsers/ignoreseq.c index 8aac2c8..228f635 100644 --- a/src/parsers/ignoreseq.c +++ b/src/parsers/ignoreseq.c @@ -35,38 +35,48 @@ static const HParserVtable ignoreseq_vt = { // API frontends // -static const HParser* h_leftright(const HParser* p, const HParser* q, size_t which) { - HIgnoreSeq *seq = g_new(HIgnoreSeq, 1); - seq->parsers = g_new(const HParser*, 2); +static const HParser* h_leftright__m(HAllocator* mm__, const HParser* p, const HParser* q, size_t which) { + HIgnoreSeq *seq = h_new(HIgnoreSeq, 1); + seq->parsers = h_new(const HParser*, 2); seq->parsers[0] = p; seq->parsers[1] = q; seq->count = 2; seq->which = which; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &ignoreseq_vt; ret->env = (void*)seq; return ret; } const HParser* h_left(const HParser* p, const HParser* q) { - return h_leftright(p, q, 0); + return h_leftright__m(&system_allocator, p, q, 0); +} +const HParser* h_left__m(HAllocator* mm__, const HParser* p, const HParser* q) { + return h_leftright__m(mm__, p, q, 0); } const HParser* h_right(const HParser* p, const HParser* q) { - return h_leftright(p, q, 1); + return h_leftright__m(&system_allocator, p, q, 1); +} +const HParser* h_right__m(HAllocator* mm__, const HParser* p, const HParser* q) { + return h_leftright__m(mm__, p, q, 1); } + const HParser* h_middle(const HParser* p, const HParser* x, const HParser* q) { - HIgnoreSeq *seq = g_new(HIgnoreSeq, 1); - seq->parsers = g_new(const HParser*, 3); + return h_middle__m(&system_allocator, p, x, q); +} +const HParser* h_middle__m(HAllocator* mm__, const HParser* p, const HParser* x, const HParser* q) { + HIgnoreSeq *seq = h_new(HIgnoreSeq, 1); + seq->parsers = h_new(const HParser*, 3); seq->parsers[0] = p; seq->parsers[1] = x; seq->parsers[2] = q; seq->count = 3; seq->which = 1; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &ignoreseq_vt; ret->env = (void*)seq; return ret; diff --git a/src/parsers/indirect.c b/src/parsers/indirect.c index 758116d..96e2a65 100644 --- a/src/parsers/indirect.c +++ b/src/parsers/indirect.c @@ -13,7 +13,10 @@ void h_bind_indirect(HParser* indirect, const HParser* inner) { } HParser* h_indirect() { - HParser *res = g_new(HParser, 1); + return h_indirect__m(&system_allocator); +} +HParser* h_indirect__m(HAllocator* mm__) { + HParser *res = h_new(HParser, 1); res->vtable = &indirect_vt; res->env = NULL; return res; diff --git a/src/parsers/int_range.c b/src/parsers/int_range.c index 9fb1c7e..2a1db63 100644 --- a/src/parsers/int_range.c +++ b/src/parsers/int_range.c @@ -33,6 +33,9 @@ static const HParserVtable int_range_vt = { }; const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t upper) { + return h_int_range__m(&system_allocator, p, lower, upper); +} +const HParser* h_int_range__m(HAllocator* mm__, const HParser *p, const int64_t lower, const int64_t upper) { // p must be an integer parser, which means it's using parse_bits // TODO: re-add this check //assert_message(p->vtable == &bits_vt, "int_range requires an integer parser"); @@ -40,11 +43,11 @@ const HParser* h_int_range(const HParser *p, const int64_t lower, const int64_t // and regardless, the bounds need to fit in the parser in question // TODO: check this as well. - HRange *r_env = g_new(HRange, 1); + HRange *r_env = h_new(HRange, 1); r_env->p = p; r_env->lower = lower; r_env->upper = upper; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &int_range_vt; ret->env = (void*)r_env; return ret; diff --git a/src/parsers/many.c b/src/parsers/many.c index f18be26..6cb818b 100644 --- a/src/parsers/many.c +++ b/src/parsers/many.c @@ -49,10 +49,13 @@ static const HParserVtable many_vt = { }; const HParser* h_many(const HParser* p) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_many__m(&system_allocator, p); +} +const HParser* h_many__m(HAllocator* mm__, const HParser* p) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p(); + env->sep = h_epsilon_p__m(mm__); env->count = 0; env->min_p = true; res->vtable = &many_vt; @@ -61,10 +64,13 @@ const HParser* h_many(const HParser* p) { } const HParser* h_many1(const HParser* p) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_many1__m(&system_allocator, p); +} +const HParser* h_many1__m(HAllocator* mm__, const HParser* p) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p(); + env->sep = h_epsilon_p__m(mm__); env->count = 1; env->min_p = true; res->vtable = &many_vt; @@ -73,10 +79,13 @@ const HParser* h_many1(const HParser* p) { } const HParser* h_repeat_n(const HParser* p, const size_t n) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_repeat_n__m(&system_allocator, p, n); +} +const HParser* h_repeat_n__m(HAllocator* mm__, const HParser* p, const size_t n) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; - env->sep = h_epsilon_p(); + env->sep = h_epsilon_p__m(mm__); env->count = n; env->min_p = false; res->vtable = &many_vt; @@ -85,8 +94,11 @@ const HParser* h_repeat_n(const HParser* p, const size_t n) { } const HParser* h_sepBy(const HParser* p, const HParser* sep) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_sepBy__m(&system_allocator, p, sep); +} +const HParser* h_sepBy__m(HAllocator* mm__, const HParser* p, const HParser* sep) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; env->sep = sep; env->count = 0; @@ -97,8 +109,11 @@ const HParser* h_sepBy(const HParser* p, const HParser* sep) { } const HParser* h_sepBy1(const HParser* p, const HParser* sep) { - HParser *res = g_new(HParser, 1); - HRepeat *env = g_new(HRepeat, 1); + return h_sepBy1__m(&system_allocator, p, sep); +} +const HParser* h_sepBy1__m(HAllocator* mm__, const HParser* p, const HParser* sep) { + HParser *res = h_new(HParser, 1); + HRepeat *env = h_new(HRepeat, 1); env->p = p; env->sep = sep; env->count = 1; @@ -135,9 +150,12 @@ static const HParserVtable length_value_vt = { }; const HParser* h_length_value(const HParser* length, const HParser* value) { - HParser *res = g_new(HParser, 1); + return h_length_value__m(&system_allocator, length, value); +} +const HParser* h_length_value__m(HAllocator* mm__, const HParser* length, const HParser* value) { + HParser *res = h_new(HParser, 1); res->vtable = &length_value_vt; - HLenVal *env = g_new(HLenVal, 1); + HLenVal *env = h_new(HLenVal, 1); env->length = length; env->value = value; res->env = (void*)env; diff --git a/src/parsers/not.c b/src/parsers/not.c index 1c46b6d..73cf129 100644 --- a/src/parsers/not.c +++ b/src/parsers/not.c @@ -15,7 +15,10 @@ static const HParserVtable not_vt = { }; const HParser* h_not(const HParser* p) { - HParser *res = g_new(HParser, 1); + return h_not__m(&system_allocator, p); +} +const HParser* h_not__m(HAllocator* mm__, const HParser* p) { + HParser *res = h_new(HParser, 1); res->vtable = ¬_vt; res->env = (void*)p; return res; diff --git a/src/parsers/nothing.c b/src/parsers/nothing.c index 9f81c02..120c8ef 100644 --- a/src/parsers/nothing.c +++ b/src/parsers/nothing.c @@ -10,8 +10,11 @@ static const HParserVtable nothing_vt = { .parse = parse_nothing, }; -const HParser* h_nothing_p() { - HParser *ret = g_new(HParser, 1); +const HParser* h_nothing_p() { + return h_nothing_p__m(&system_allocator); +} +const HParser* h_nothing_p__m(HAllocator* mm__) { + HParser *ret = h_new(HParser, 1); ret->vtable = ¬hing_vt; ret->env = NULL; return (const HParser*)ret; } diff --git a/src/parsers/optional.c b/src/parsers/optional.c index c084576..9625fa0 100644 --- a/src/parsers/optional.c +++ b/src/parsers/optional.c @@ -16,9 +16,12 @@ static const HParserVtable optional_vt = { }; const HParser* h_optional(const HParser* p) { + return h_optional__m(&system_allocator, p); +} +const HParser* h_optional__m(HAllocator* mm__, const HParser* p) { // TODO: re-add this //assert_message(p->vtable != &ignore_vt, "Thou shalt ignore an option, rather than the other way 'round."); - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &optional_vt; ret->env = (void*)p; return ret; diff --git a/src/parsers/sequence.c b/src/parsers/sequence.c index 54196da..21ae31d 100644 --- a/src/parsers/sequence.c +++ b/src/parsers/sequence.c @@ -1,3 +1,4 @@ +#include #include "parser_internal.h" typedef struct { @@ -27,20 +28,40 @@ static const HParserVtable sequence_vt = { .parse = parse_sequence, }; -const HParser* h_sequence(const HParser *p, ...) { +const HParser* h_sequence(const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_sequence__mv(&system_allocator, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_sequence__m(HAllocator* mm__, const HParser* p, ...) { + va_list ap; + va_start(ap, p); + const HParser* ret = h_sequence__mv(mm__, p, ap); + va_end(ap); + return ret; +} + +const HParser* h_sequence__v(const HParser* p, va_list ap) { + return h_sequence__mv(&system_allocator, p, ap); +} + +const HParser* h_sequence__mv(HAllocator* mm__, const HParser *p, va_list ap_) { va_list ap; size_t len = 0; const HParser *arg; - va_start(ap, p); + va_copy(ap, ap_); do { len++; arg = va_arg(ap, const HParser *); } while (arg); va_end(ap); - HSequence *s = g_new(HSequence, 1); - s->p_array = g_new(const HParser *, len); + HSequence *s = h_new(HSequence, 1); + s->p_array = h_new(const HParser *, len); - va_start(ap, p); + va_copy(ap, ap_); s->p_array[0] = p; for (size_t i = 1; i < len; i++) { s->p_array[i] = va_arg(ap, const HParser *); @@ -48,7 +69,7 @@ const HParser* h_sequence(const HParser *p, ...) { va_end(ap); s->len = len; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &sequence_vt; ret->env = (void*)s; return ret; } diff --git a/src/parsers/token.c b/src/parsers/token.c index b3be207..13bafbb 100644 --- a/src/parsers/token.c +++ b/src/parsers/token.c @@ -20,14 +20,17 @@ static HParseResult* parse_token(void *env, HParseState *state) { return make_result(state, tok); } -const const HParserVtable token_vt = { +const HParserVtable token_vt = { .parse = parse_token, }; -const HParser* h_token(const uint8_t *str, const size_t len) { - HToken *t = g_new(HToken, 1); +const HParser* h_token(const uint8_t *str, const size_t len) { + return h_token__m(&system_allocator, str, len); +} +const HParser* h_token__m(HAllocator* mm__, const uint8_t *str, const size_t len) { + HToken *t = h_new(HToken, 1); t->str = (uint8_t*)str, t->len = len; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &token_vt; ret->env = t; return (const HParser*)ret; diff --git a/src/parsers/unimplemented.c b/src/parsers/unimplemented.c index 99d153b..35ee391 100644 --- a/src/parsers/unimplemented.c +++ b/src/parsers/unimplemented.c @@ -24,3 +24,6 @@ static HParser unimplemented = { const HParser* h_unimplemented() { return &unimplemented; } +const HParser* h_unimplemented__m(HAllocator* mm__) { + return &unimplemented; +} diff --git a/src/parsers/whitespace.c b/src/parsers/whitespace.c index 4d2ec17..45c7dcb 100644 --- a/src/parsers/whitespace.c +++ b/src/parsers/whitespace.c @@ -8,7 +8,7 @@ static HParseResult* parse_whitespace(void* env, HParseState *state) { bak = state->input_stream; c = h_read_bits(&state->input_stream, 8, false); if (state->input_stream.overrun) - return NULL; + break; } while (isspace(c)); state->input_stream = bak; return h_do_parse((HParser*)env, state); @@ -19,7 +19,10 @@ static const HParserVtable whitespace_vt = { }; const HParser* h_whitespace(const HParser* p) { - HParser *ret = g_new(HParser, 1); + return h_whitespace__m(&system_allocator, p); +} +const HParser* h_whitespace__m(HAllocator* mm__, const HParser* p) { + HParser *ret = h_new(HParser, 1); ret->vtable = &whitespace_vt; ret->env = (void*)p; return ret; diff --git a/src/parsers/xor.c b/src/parsers/xor.c index 9ffd51e..a11ad4a 100644 --- a/src/parsers/xor.c +++ b/src/parsers/xor.c @@ -35,10 +35,13 @@ static const HParserVtable xor_vt = { .parse = parse_xor, }; -const HParser* h_xor(const HParser* p1, const HParser* p2) { - HTwoParsers *env = g_new(HTwoParsers, 1); +const HParser* h_xor(const HParser* p1, const HParser* p2) { + return h_xor__m(&system_allocator, p1, p2); +} +const HParser* h_xor__m(HAllocator* mm__, const HParser* p1, const HParser* p2) { + HTwoParsers *env = h_new(HTwoParsers, 1); env->p1 = p1; env->p2 = p2; - HParser *ret = g_new(HParser, 1); + HParser *ret = h_new(HParser, 1); ret->vtable = &xor_vt; ret->env = (void*)env; return ret; } diff --git a/src/pprint.c b/src/pprint.c index 8dc5852..d8b22e2 100644 --- a/src/pprint.c +++ b/src/pprint.c @@ -17,10 +17,10 @@ #define _GNU_SOURCE #include -#include #include #include "hammer.h" -#include +#include "internal.h" +#include typedef struct pp_state { int delta; @@ -69,20 +69,25 @@ void h_pprint(FILE* stream, const HParsedToken* tok, int indent, int delta) { fprintf(stream, "%*sUSER\n", indent, ""); break; default: - g_assert_not_reached(); + if(tok->token_type > TT_USER) { + fprintf(stream, "%*sUSER %d\n", indent, "", tok->token_type-TT_USER); + } else { + assert_message(0, "Should not reach here."); + } } } struct result_buf { char* output; + HAllocator *mm__; size_t len; size_t capacity; }; static inline void ensure_capacity(struct result_buf *buf, int amt) { while (buf->len + amt >= buf->capacity) - buf->output = g_realloc(buf->output, buf->capacity *= 2); + buf->output = buf->mm__->realloc(buf->mm__, buf->output, buf->capacity *= 2); } static inline void append_buf(struct result_buf *buf, const char* input, int len) { @@ -149,15 +154,19 @@ static void unamb_sub(const HParsedToken* tok, struct result_buf *buf) { break; default: fprintf(stderr, "Unexpected token type %d\n", tok->token_type); - g_assert_not_reached(); + assert_message(0, "Should not reach here."); } } char* h_write_result_unamb(const HParsedToken* tok) { + return h_write_result_unamb__m(&system_allocator, tok); +} +char* h_write_result_unamb__m(HAllocator* mm__, const HParsedToken* tok) { struct result_buf buf = { - .output = g_malloc0(16), + .output = mm__->alloc(mm__, 16), .len = 0, + .mm__ = mm__, .capacity = 16 }; unamb_sub(tok, &buf); diff --git a/src/system_allocator.c b/src/system_allocator.c new file mode 100644 index 0000000..7248fd2 --- /dev/null +++ b/src/system_allocator.c @@ -0,0 +1,20 @@ +#include +#include "internal.h" + +static void* system_alloc(HAllocator *allocator, size_t size) { + return malloc(size); +} + +static void* system_realloc(HAllocator *allocator, void* ptr, size_t size) { + return realloc(ptr, size); +} + +static void system_free(HAllocator *allocator, void* ptr) { + free(ptr); +} + +HAllocator system_allocator = { + .alloc = system_alloc, + .realloc = system_realloc, + .free = system_free, +}; diff --git a/src/t_benchmark.c b/src/t_benchmark.c new file mode 100644 index 0000000..60d22c5 --- /dev/null +++ b/src/t_benchmark.c @@ -0,0 +1,22 @@ +#include +#include "hammer.h" +#include "test_suite.h" + +HParserTestcase testcases[] = { + {(unsigned char*)"1,2,3", 5, "(u0x31 u0x32 u0x33)"}, + {(unsigned char*)"1,3,2", 5, "(u0x31 u0x33 u0x32)"}, + {(unsigned char*)"1,3", 3, "(u0x31 u0x33)"}, + {(unsigned char*)"3", 1, "(u0x33)"}, + { NULL, 0, NULL } +}; + +static void test_benchmark_1() { + const HParser *parser = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); + + HBenchmarkResults *res = h_benchmark(parser, testcases); + h_benchmark_report(stderr, res); +} + +void register_benchmark_tests(void) { + g_test_add_func("/core/benchmark/1", test_benchmark_1); +} diff --git a/src/t_bitreader.c b/src/t_bitreader.c new file mode 100644 index 0000000..84e1057 --- /dev/null +++ b/src/t_bitreader.c @@ -0,0 +1,67 @@ +#include +#include "hammer.h" +#include "internal.h" +#include "test_suite.h" + +#define MK_INPUT_STREAM(buf,len,endianness_) \ + { \ + .input = (uint8_t*)buf, \ + .length = len, \ + .index = 0, \ + .bit_offset = (((endianness_) & BIT_BIG_ENDIAN) ? 8 : 0), \ + .endianness = endianness_ \ + } + + +static void test_bitreader_ints(void) { + HInputStream is = MK_INPUT_STREAM("\xFF\xFF\xFF\xFE\x00\x00\x00\x00", 8, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmplong(h_read_bits(&is, 64, true), ==, -0x200000000); +} + +static void test_bitreader_be(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x03); + g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x52); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); +} +static void test_bitreader_le(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmpint(h_read_bits(&is, 3, false), ==, 0x02); + g_check_cmpint(h_read_bits(&is, 8, false), ==, 0x4D); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); +} + +static void test_largebits_be(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x352); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x1A); +} + +static void test_largebits_le(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x26A); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0x0B); +} + +static void test_offset_largebits_be(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xD); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x25A); +} + +static void test_offset_largebits_le(void) { + HInputStream is = MK_INPUT_STREAM("\x6A\x5A", 2, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); + g_check_cmpint(h_read_bits(&is, 5, false), ==, 0xA); + g_check_cmpint(h_read_bits(&is, 11, false), ==, 0x2D3); +} + + +void register_bitreader_tests(void) { + g_test_add_func("/core/bitreader/be", test_bitreader_be); + g_test_add_func("/core/bitreader/le", test_bitreader_le); + g_test_add_func("/core/bitreader/largebits-be", test_largebits_be); + g_test_add_func("/core/bitreader/largebits-le", test_largebits_le); + g_test_add_func("/core/bitreader/offset-largebits-be", test_offset_largebits_be); + g_test_add_func("/core/bitreader/offset-largebits-le", test_offset_largebits_le); + g_test_add_func("/core/bitreader/ints", test_bitreader_ints); +} diff --git a/src/t_bitwriter.c b/src/t_bitwriter.c new file mode 100644 index 0000000..d38c53c --- /dev/null +++ b/src/t_bitwriter.c @@ -0,0 +1,108 @@ +#include +#include "hammer.h" +#include "internal.h" +#include "test_suite.h" + +typedef struct { + unsigned long long data; + size_t nbits; +} bitwriter_test_elem; // should end with {0,0} + +void run_bitwriter_test(bitwriter_test_elem data[], char flags) { + size_t len; + const uint8_t *buf; + HBitWriter *w = h_bit_writer_new(&system_allocator); + int i; + w->flags = flags; + for (i = 0; data[i].nbits; i++) { + h_bit_writer_put(w, data[i].data, data[i].nbits); + } + + buf = h_bit_writer_get_buffer(w, &len); + HInputStream input = { + .input = buf, + .index = 0, + .length = len, + .bit_offset = (flags & BIT_BIG_ENDIAN) ? 8 : 0, + .endianness = flags, + .overrun = 0 + }; + + for (i = 0; data[i].nbits; i++) { + g_check_cmpulonglong ((unsigned long long)h_read_bits(&input, data[i].nbits, FALSE), ==, data[i].data); + } +} + +static void test_bitwriter_ints(void) { + bitwriter_test_elem data[] = { + { -0x200000000, 64 }, + { 0,0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_bitwriter_be(void) { + bitwriter_test_elem data[] = { + { 0x03, 3 }, + { 0x52, 8 }, + { 0x1A, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_bitwriter_le(void) { + bitwriter_test_elem data[] = { + { 0x02, 3 }, + { 0x4D, 8 }, + { 0x0B, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); +} + +static void test_largebits_be(void) { + bitwriter_test_elem data[] = { + { 0x352, 11 }, + { 0x1A, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_largebits_le(void) { + bitwriter_test_elem data[] = { + { 0x26A, 11 }, + { 0x0B, 5 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); +} + +static void test_offset_largebits_be(void) { + bitwriter_test_elem data[] = { + { 0xD, 5 }, + { 0x25A, 11 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_BIG_ENDIAN | BYTE_BIG_ENDIAN); +} + +static void test_offset_largebits_le(void) { + bitwriter_test_elem data[] = { + { 0xA, 5 }, + { 0x2D3, 11 }, + { 0, 0 } + }; + run_bitwriter_test(data, BIT_LITTLE_ENDIAN | BYTE_LITTLE_ENDIAN); +} + +void register_bitwriter_tests(void) { + g_test_add_func("/core/bitwriter/be", test_bitwriter_be); + g_test_add_func("/core/bitwriter/le", test_bitwriter_le); + g_test_add_func("/core/bitwriter/largebits-be", test_largebits_be); + g_test_add_func("/core/bitwriter/largebits-le", test_largebits_le); + g_test_add_func("/core/bitwriter/offset-largebits-be", test_offset_largebits_be); + g_test_add_func("/core/bitwriter/offset-largebits-le", test_offset_largebits_le); + g_test_add_func("/core/bitwriter/ints", test_bitwriter_ints); +} diff --git a/src/t_misc.c b/src/t_misc.c new file mode 100644 index 0000000..5c08a2e --- /dev/null +++ b/src/t_misc.c @@ -0,0 +1,16 @@ +#include +#include "test_suite.h" +#include "hammer.h" + +static void test_tt_user(void) { + g_check_cmpint(TT_USER, >, TT_NONE); + g_check_cmpint(TT_USER, >, TT_BYTES); + g_check_cmpint(TT_USER, >, TT_SINT); + g_check_cmpint(TT_USER, >, TT_UINT); + g_check_cmpint(TT_USER, >, TT_SEQUENCE); + g_check_cmpint(TT_USER, >, TT_ERR); +} + +void register_misc_tests(void) { + g_test_add_func("/core/misc/tt_user", test_tt_user); +} diff --git a/src/t_parser.c b/src/t_parser.c new file mode 100644 index 0000000..daca1a3 --- /dev/null +++ b/src/t_parser.c @@ -0,0 +1,421 @@ +#include +#include +#include "hammer.h" +#include "internal.h" +#include "test_suite.h" +#include "parsers/parser_internal.h" + +static void test_token(void) { + const HParser *token_ = h_token((const uint8_t*)"95\xa2", 3); + + g_check_parse_ok(token_, "95\xa2", 3, "<39.35.a2>"); + g_check_parse_failed(token_, "95", 2); +} + +static void test_ch(void) { + const HParser *ch_ = h_ch(0xa2); + + g_check_parse_ok(ch_, "\xa2", 1, "u0xa2"); + g_check_parse_failed(ch_, "\xa3", 1); +} + +static void test_ch_range(void) { + const HParser *range_ = h_ch_range('a', 'c'); + + g_check_parse_ok(range_, "b", 1, "u0x62"); + g_check_parse_failed(range_, "d", 1); +} + +//@MARK_START +static void test_int64(void) { + const HParser *int64_ = h_int64(); + + g_check_parse_ok(int64_, "\xff\xff\xff\xfe\x00\x00\x00\x00", 8, "s-0x200000000"); + g_check_parse_failed(int64_, "\xff\xff\xff\xfe\x00\x00\x00", 7); +} + +static void test_int32(void) { + const HParser *int32_ = h_int32(); + + g_check_parse_ok(int32_, "\xff\xfe\x00\x00", 4, "s-0x20000"); + g_check_parse_failed(int32_, "\xff\xfe\x00", 3); +} + +static void test_int16(void) { + const HParser *int16_ = h_int16(); + + g_check_parse_ok(int16_, "\xfe\x00", 2, "s-0x200"); + g_check_parse_failed(int16_, "\xfe", 1); +} + +static void test_int8(void) { + const HParser *int8_ = h_int8(); + + g_check_parse_ok(int8_, "\x88", 1, "s-0x78"); + g_check_parse_failed(int8_, "", 0); +} + +static void test_uint64(void) { + const HParser *uint64_ = h_uint64(); + + g_check_parse_ok(uint64_, "\x00\x00\x00\x02\x00\x00\x00\x00", 8, "u0x200000000"); + g_check_parse_failed(uint64_, "\x00\x00\x00\x02\x00\x00\x00", 7); +} + +static void test_uint32(void) { + const HParser *uint32_ = h_uint32(); + + g_check_parse_ok(uint32_, "\x00\x02\x00\x00", 4, "u0x20000"); + g_check_parse_failed(uint32_, "\x00\x02\x00", 3); +} + +static void test_uint16(void) { + const HParser *uint16_ = h_uint16(); + + g_check_parse_ok(uint16_, "\x02\x00", 2, "u0x200"); + g_check_parse_failed(uint16_, "\x02", 1); +} + +static void test_uint8(void) { + const HParser *uint8_ = h_uint8(); + + g_check_parse_ok(uint8_, "\x78", 1, "u0x78"); + g_check_parse_failed(uint8_, "", 0); +} +//@MARK_END + +static void test_int_range(void) { + const HParser *int_range_ = h_int_range(h_uint8(), 3, 10); + + g_check_parse_ok(int_range_, "\x05", 1, "u0x5"); + g_check_parse_failed(int_range_, "\xb", 1); +} + +#if 0 +static void test_float64(void) { + const HParser *float64_ = h_float64(); + + g_check_parse_ok(float64_, "\x3f\xf0\x00\x00\x00\x00\x00\x00", 8, 1.0); + g_check_parse_failed(float64_, "\x3f\xf0\x00\x00\x00\x00\x00", 7); +} + +static void test_float32(void) { + const HParser *float32_ = h_float32(); + + g_check_parse_ok(float32_, "\x3f\x80\x00\x00", 4, 1.0); + g_check_parse_failed(float32_, "\x3f\x80\x00"); +} +#endif + + +static void test_whitespace(void) { + const HParser *whitespace_ = h_whitespace(h_ch('a')); + const HParser *whitespace_end = h_whitespace(h_end_p()); + + g_check_parse_ok(whitespace_, "a", 1, "u0x61"); + g_check_parse_ok(whitespace_, " a", 2, "u0x61"); + g_check_parse_ok(whitespace_, " a", 3, "u0x61"); + g_check_parse_ok(whitespace_, "\ta", 2, "u0x61"); + g_check_parse_failed(whitespace_, "_a", 2); + + g_check_parse_ok(whitespace_end, "", 0, "NULL"); + g_check_parse_ok(whitespace_end, " ", 2, "NULL"); + g_check_parse_failed(whitespace_end, " x", 3); +} + +static void test_left(void) { + const HParser *left_ = h_left(h_ch('a'), h_ch(' ')); + + g_check_parse_ok(left_, "a ", 2, "u0x61"); + g_check_parse_failed(left_, "a", 1); + g_check_parse_failed(left_, " ", 1); + g_check_parse_failed(left_, "ab", 2); +} + +static void test_right(void) { + const HParser *right_ = h_right(h_ch(' '), h_ch('a')); + + g_check_parse_ok(right_, " a", 2, "u0x61"); + g_check_parse_failed(right_, "a", 1); + g_check_parse_failed(right_, " ", 1); + g_check_parse_failed(right_, "ba", 2); +} + +static void test_middle(void) { + const HParser *middle_ = h_middle(h_ch(' '), h_ch('a'), h_ch(' ')); + + g_check_parse_ok(middle_, " a ", 3, "u0x61"); + g_check_parse_failed(middle_, "a", 1); + g_check_parse_failed(middle_, " ", 1); + g_check_parse_failed(middle_, " a", 2); + g_check_parse_failed(middle_, "a ", 2); + g_check_parse_failed(middle_, " b ", 3); + g_check_parse_failed(middle_, "ba ", 3); + g_check_parse_failed(middle_, " ab", 3); +} + +#include + +const HParsedToken* upcase(const HParseResult *p) { + switch(p->ast->token_type) { + case TT_SEQUENCE: + { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + HCountedArray *seq = h_carray_new_sized(p->arena, p->ast->seq->used); + ret->token_type = TT_SEQUENCE; + for (size_t i=0; iast->seq->used; ++i) { + if (TT_UINT == ((HParsedToken*)p->ast->seq->elements[i])->token_type) { + HParsedToken *tmp = a_new_(p->arena, HParsedToken, 1); + tmp->token_type = TT_UINT; + tmp->uint = toupper(((HParsedToken*)p->ast->seq->elements[i])->uint); + h_carray_append(seq, tmp); + } else { + h_carray_append(seq, p->ast->seq->elements[i]); + } + } + ret->seq = seq; + return (const HParsedToken*)ret; + } + case TT_UINT: + { + HParsedToken *ret = a_new_(p->arena, HParsedToken, 1); + ret->token_type = TT_UINT; + ret->uint = toupper(p->ast->uint); + return (const HParsedToken*)ret; + } + default: + return p->ast; + } +} + +static void test_action(void) { + const HParser *action_ = h_action(h_sequence(h_choice(h_ch('a'), + h_ch('A'), + NULL), + h_choice(h_ch('b'), + h_ch('B'), + NULL), + NULL), + upcase); + + g_check_parse_ok(action_, "ab", 2, "(u0x41 u0x42)"); + g_check_parse_ok(action_, "AB", 2, "(u0x41 u0x42)"); + g_check_parse_failed(action_, "XX", 2); +} + +static void test_in(void) { + uint8_t options[3] = { 'a', 'b', 'c' }; + const HParser *in_ = h_in(options, 3); + g_check_parse_ok(in_, "b", 1, "u0x62"); + g_check_parse_failed(in_, "d", 1); + +} + +static void test_not_in(void) { + uint8_t options[3] = { 'a', 'b', 'c' }; + const HParser *not_in_ = h_not_in(options, 3); + g_check_parse_ok(not_in_, "d", 1, "u0x64"); + g_check_parse_failed(not_in_, "a", 1); + +} + +static void test_end_p(void) { + const HParser *end_p_ = h_sequence(h_ch('a'), h_end_p(), NULL); + g_check_parse_ok(end_p_, "a", 1, "(u0x61)"); + g_check_parse_failed(end_p_, "aa", 2); +} + +static void test_nothing_p(void) { + const HParser *nothing_p_ = h_nothing_p(); + g_check_parse_failed(nothing_p_, "a", 1); +} + +static void test_sequence(void) { + const HParser *sequence_1 = h_sequence(h_ch('a'), h_ch('b'), NULL); + const HParser *sequence_2 = h_sequence(h_ch('a'), h_whitespace(h_ch('b')), NULL); + + g_check_parse_ok(sequence_1, "ab", 2, "(u0x61 u0x62)"); + g_check_parse_failed(sequence_1, "a", 1); + g_check_parse_failed(sequence_1, "b", 1); + g_check_parse_ok(sequence_2, "ab", 2, "(u0x61 u0x62)"); + g_check_parse_ok(sequence_2, "a b", 3, "(u0x61 u0x62)"); + g_check_parse_ok(sequence_2, "a b", 4, "(u0x61 u0x62)"); +} + +static void test_choice(void) { + const HParser *choice_ = h_choice(h_ch('a'), h_ch('b'), NULL); + + g_check_parse_ok(choice_, "a", 1, "u0x61"); + g_check_parse_ok(choice_, "b", 1, "u0x62"); + g_check_parse_failed(choice_, "c", 1); +} + +static void test_butnot(void) { + const HParser *butnot_1 = h_butnot(h_ch('a'), h_token((const uint8_t*)"ab", 2)); + const HParser *butnot_2 = h_butnot(h_ch_range('0', '9'), h_ch('6')); + + g_check_parse_ok(butnot_1, "a", 1, "u0x61"); + g_check_parse_failed(butnot_1, "ab", 2); + g_check_parse_ok(butnot_1, "aa", 2, "u0x61"); + g_check_parse_failed(butnot_2, "6", 1); +} + +static void test_difference(void) { + const HParser *difference_ = h_difference(h_token((const uint8_t*)"ab", 2), h_ch('a')); + + g_check_parse_ok(difference_, "ab", 2, "<61.62>"); + g_check_parse_failed(difference_, "a", 1); +} + +static void test_xor(void) { + const HParser *xor_ = h_xor(h_ch_range('0', '6'), h_ch_range('5', '9')); + + g_check_parse_ok(xor_, "0", 1, "u0x30"); + g_check_parse_ok(xor_, "9", 1, "u0x39"); + g_check_parse_failed(xor_, "5", 1); + g_check_parse_failed(xor_, "a", 1); +} + +static void test_many(void) { + const HParser *many_ = h_many(h_choice(h_ch('a'), h_ch('b'), NULL)); + g_check_parse_ok(many_, "adef", 4, "(u0x61)"); + g_check_parse_ok(many_, "bdef", 4, "(u0x62)"); + g_check_parse_ok(many_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); + g_check_parse_ok(many_, "daabbabadef", 11, "()"); +} + +static void test_many1(void) { + const HParser *many1_ = h_many1(h_choice(h_ch('a'), h_ch('b'), NULL)); + + g_check_parse_ok(many1_, "adef", 4, "(u0x61)"); + g_check_parse_ok(many1_, "bdef", 4, "(u0x62)"); + g_check_parse_ok(many1_, "aabbabadef", 10, "(u0x61 u0x61 u0x62 u0x62 u0x61 u0x62 u0x61)"); + g_check_parse_failed(many1_, "daabbabadef", 11); +} + +static void test_repeat_n(void) { + const HParser *repeat_n_ = h_repeat_n(h_choice(h_ch('a'), h_ch('b'), NULL), 2); + + g_check_parse_failed(repeat_n_, "adef", 4); + g_check_parse_ok(repeat_n_, "abdef", 5, "(u0x61 u0x62)"); + g_check_parse_failed(repeat_n_, "dabdef", 6); +} + +static void test_optional(void) { + const HParser *optional_ = h_sequence(h_ch('a'), h_optional(h_choice(h_ch('b'), h_ch('c'), NULL)), h_ch('d'), NULL); + + g_check_parse_ok(optional_, "abd", 3, "(u0x61 u0x62 u0x64)"); + g_check_parse_ok(optional_, "acd", 3, "(u0x61 u0x63 u0x64)"); + g_check_parse_ok(optional_, "ad", 2, "(u0x61 null u0x64)"); + g_check_parse_failed(optional_, "aed", 3); + g_check_parse_failed(optional_, "ab", 2); + g_check_parse_failed(optional_, "ac", 2); +} + +static void test_ignore(void) { + const HParser *ignore_ = h_sequence(h_ch('a'), h_ignore(h_ch('b')), h_ch('c'), NULL); + + g_check_parse_ok(ignore_, "abc", 3, "(u0x61 u0x63)"); + g_check_parse_failed(ignore_, "ac", 2); +} + +static void test_sepBy1(void) { + const HParser *sepBy1_ = h_sepBy1(h_choice(h_ch('1'), h_ch('2'), h_ch('3'), NULL), h_ch(',')); + + g_check_parse_ok(sepBy1_, "1,2,3", 5, "(u0x31 u0x32 u0x33)"); + g_check_parse_ok(sepBy1_, "1,3,2", 5, "(u0x31 u0x33 u0x32)"); + g_check_parse_ok(sepBy1_, "1,3", 3, "(u0x31 u0x33)"); + g_check_parse_ok(sepBy1_, "3", 1, "(u0x33)"); +} + +static void test_epsilon_p(void) { + const HParser *epsilon_p_1 = h_sequence(h_ch('a'), h_epsilon_p(), h_ch('b'), NULL); + const HParser *epsilon_p_2 = h_sequence(h_epsilon_p(), h_ch('a'), NULL); + const HParser *epsilon_p_3 = h_sequence(h_ch('a'), h_epsilon_p(), NULL); + + g_check_parse_ok(epsilon_p_1, "ab", 2, "(u0x61 u0x62)"); + g_check_parse_ok(epsilon_p_2, "a", 1, "(u0x61)"); + g_check_parse_ok(epsilon_p_3, "a", 1, "(u0x61)"); +} + +static void test_attr_bool(void) { + +} + +static void test_and(void) { + const HParser *and_1 = h_sequence(h_and(h_ch('0')), h_ch('0'), NULL); + const HParser *and_2 = h_sequence(h_and(h_ch('0')), h_ch('1'), NULL); + const HParser *and_3 = h_sequence(h_ch('1'), h_and(h_ch('2')), NULL); + + g_check_parse_ok(and_1, "0", 1, "(u0x30)"); + g_check_parse_failed(and_2, "0", 1); + g_check_parse_ok(and_3, "12", 2, "(u0x31)"); +} + +static void test_not(void) { + const HParser *not_1 = h_sequence(h_ch('a'), h_choice(h_ch('+'), h_token((const uint8_t*)"++", 2), NULL), h_ch('b'), NULL); + const HParser *not_2 = h_sequence(h_ch('a'), + h_choice(h_sequence(h_ch('+'), h_not(h_ch('+')), NULL), + h_token((const uint8_t*)"++", 2), + NULL), h_ch('b'), NULL); + + g_check_parse_ok(not_1, "a+b", 3, "(u0x61 u0x2b u0x62)"); + g_check_parse_failed(not_1, "a++b", 4); + g_check_parse_ok(not_2, "a+b", 3, "(u0x61 (u0x2b) u0x62)"); + g_check_parse_ok(not_2, "a++b", 4, "(u0x61 <2b.2b> u0x62)"); +} + +static void test_leftrec(void) { + const HParser *a_ = h_ch('a'); + + HParser *lr_ = h_indirect(); + h_bind_indirect(lr_, h_choice(h_sequence(lr_, a_, NULL), a_, NULL)); + + g_check_parse_ok(lr_, "a", 1, "u0x61"); + g_check_parse_ok(lr_, "aa", 2, "(u0x61 u0x61)"); + g_check_parse_ok(lr_, "aaa", 3, "((u0x61 u0x61) u0x61)"); +} + +void register_parser_tests(void) { + g_test_add_func("/core/parser/token", test_token); + g_test_add_func("/core/parser/ch", test_ch); + g_test_add_func("/core/parser/ch_range", test_ch_range); + g_test_add_func("/core/parser/int64", test_int64); + g_test_add_func("/core/parser/int32", test_int32); + g_test_add_func("/core/parser/int16", test_int16); + g_test_add_func("/core/parser/int8", test_int8); + g_test_add_func("/core/parser/uint64", test_uint64); + g_test_add_func("/core/parser/uint32", test_uint32); + g_test_add_func("/core/parser/uint16", test_uint16); + g_test_add_func("/core/parser/uint8", test_uint8); + g_test_add_func("/core/parser/int_range", test_int_range); +#if 0 + g_test_add_func("/core/parser/float64", test_float64); + g_test_add_func("/core/parser/float32", test_float32); +#endif + g_test_add_func("/core/parser/whitespace", test_whitespace); + g_test_add_func("/core/parser/left", test_left); + g_test_add_func("/core/parser/right", test_right); + g_test_add_func("/core/parser/middle", test_middle); + g_test_add_func("/core/parser/action", test_action); + g_test_add_func("/core/parser/in", test_in); + g_test_add_func("/core/parser/not_in", test_not_in); + g_test_add_func("/core/parser/end_p", test_end_p); + g_test_add_func("/core/parser/nothing_p", test_nothing_p); + g_test_add_func("/core/parser/sequence", test_sequence); + g_test_add_func("/core/parser/choice", test_choice); + g_test_add_func("/core/parser/butnot", test_butnot); + g_test_add_func("/core/parser/difference", test_difference); + g_test_add_func("/core/parser/xor", test_xor); + g_test_add_func("/core/parser/many", test_many); + g_test_add_func("/core/parser/many1", test_many1); + g_test_add_func("/core/parser/repeat_n", test_repeat_n); + g_test_add_func("/core/parser/optional", test_optional); + g_test_add_func("/core/parser/sepBy1", test_sepBy1); + g_test_add_func("/core/parser/epsilon_p", test_epsilon_p); + g_test_add_func("/core/parser/attr_bool", test_attr_bool); + g_test_add_func("/core/parser/and", test_and); + g_test_add_func("/core/parser/not", test_not); + g_test_add_func("/core/parser/ignore", test_ignore); + g_test_add_func("/core/parser/leftrec", test_leftrec); +} diff --git a/src/test_suite.c b/src/test_suite.c index af22c7e..e01d020 100644 --- a/src/test_suite.c +++ b/src/test_suite.c @@ -15,12 +15,15 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ +#include #include "hammer.h" #include "test_suite.h" extern void register_bitreader_tests(); extern void register_bitwriter_tests(); extern void register_parser_tests(); +extern void register_misc_tests(); +extern void register_benchmark_tests(); int main(int argc, char** argv) { g_test_init(&argc, &argv, NULL); @@ -29,6 +32,8 @@ int main(int argc, char** argv) { register_bitreader_tests(); register_bitwriter_tests(); register_parser_tests(); + register_misc_tests(); + register_benchmark_tests(); g_test_run(); } diff --git a/src/test_suite.h b/src/test_suite.h index 68cf83a..24932bb 100644 --- a/src/test_suite.h +++ b/src/test_suite.h @@ -17,7 +17,7 @@ #ifndef HAMMER_TEST_SUITE__H #define HAMMER_TEST_SUITE__H -#include +#include // Equivalent to g_assert_*, but not using g_assert... #define g_check_inttype(fmt, typ, n1, op, n2) do { \