代码之家  ›  专栏  ›  技术社区  ›  joshu

解析器中的boost spirit segfault

  •  2
  • joshu  · 技术社区  · 6 年前

    我一直在尝试将我在本科编译器中编写的lex和yacc代码转换成spirit代码来学习spirit,我发现了一个segfault,我似乎无法理解。我是这样写的:

    namespace lex = boost::spirit::lex;
    
    enum Tokens
    {
        k_andTok = 1,
        k_def = 2,
        k_elihw = 3,
        k_elseTok = 4,
        k_falseTok = 5,
        k_fed = 6,
        k_fi = 7,
        k_ifTok = 8,
        k_input = 9,
        k_notTok = 10,
        k_orTok = 11,
        k_print = 12,
        k_returnTok = 13,
        k_trueTok = 14,
        k_whileTok = 15,
        k_plues = 16,
        k_minus = 17,
        k_mult = 18,
        k_div = 19,
        k_bang = 20,
        k_equalTo = 21,
        k_greaterEq = 22,
        k_lessEq = 23,
        k_notEq = 24,
        k_less = 25,
        k_greater = 26,
        k_assign = 27,
        k_comma = 28,
        k_colon = 29,
        k_leftParen = 30,
        k_rightParen = 31,
        k_leftBracket = 32,
        k_rightBracket = 33,
        k_nonTerminal = 34,
        k_terminal = 35
    };
    
    template <typename Lexer>
    struct LexerTokens : lex::lexer<Lexer>
    {
        LexerTokens() :
           whiteSpace("[ \\t\\n]"),
           andTok("and"),
           def("def"),
           elihw("elihw"),
           elseTok("else"),
           falseTok("false"),
           fed("fed"),
           fi("fi"),
           ifTok("if"),
           input("input"),
           notTok("not"),
           orTok("or"),
           print("print"),
           returnTok("return"),
           trueTok("true"),
           whileTok("while"),
           plus("\\+"),
           minus("\\-"),
           mult("\\*"),
           div("\\/"),
           bang("\\!"),
           equalTo("=="),
           greaterEq(">="),
           lessEq("<="),
           notEq("!="),
           less("<"),
           greater(">"),
           assign("="),
           comma(","),
           colon(":"),
           leftParen("\\("),
           rightParen("\\)"),
           leftBracket("\\["),
           rightBracket("\\["),
           nonTerminal("[a-z][a-zA-Z0-9]*"),
           terminal("[0-9]")
        {
            this->self("WHITESPACE") = whiteSpace;
    
            this->self.add
                (andTok, k_andTok)
                (def, k_def)
                (elihw, k_elihw)
                (elseTok, k_elseTok)
                (falseTok, k_falseTok)
                (fed, k_fed)
                (fi, k_fi)
                (ifTok, k_ifTok)
                (andTok, k_andTok)
                (input, k_input)
                (notTok, k_notTok)
                (orTok, k_orTok)
                (print, k_print)
                (returnTok, k_returnTok)
                (trueTok, k_trueTok)
                (whileTok, k_whileTok)
                (plus, k_plues)
                (minus, k_minus)
                (mult, k_mult)
                (div, k_div)
                (bang, k_bang)
                (equalTo, k_equalTo)
                (greaterEq, k_greaterEq)
                (lessEq, k_lessEq)
                (notEq, k_notEq)
                (less, k_less)
                (greater, k_greater)
                (assign, k_assign)
                (comma, k_comma)
                (colon, k_colon)
                (leftParen, k_leftParen)
                (rightParen, k_rightParen)
                (leftBracket, k_leftBracket)
                (rightBracket, k_rightBracket)
                (nonTerminal, k_nonTerminal)
                (terminal, k_terminal);
        }
    
        lex::token_def<lex::omit> whiteSpace;
        lex::token_def<std::string> andTok;
        lex::token_def<std::string> def;
        lex::token_def<std::string> elihw;
        lex::token_def<std::string> elseTok;
        lex::token_def<std::string> falseTok;
        lex::token_def<std::string> fed;
        lex::token_def<std::string> fi;
        lex::token_def<std::string> ifTok;
        lex::token_def<std::string> input;
        lex::token_def<std::string> notTok;
        lex::token_def<std::string> orTok;
        lex::token_def<std::string> print;
        lex::token_def<std::string> returnTok;
        lex::token_def<std::string> trueTok;
        lex::token_def<std::string> whileTok;
        lex::token_def<std::string> plus;
        lex::token_def<std::string> minus;
        lex::token_def<std::string> mult;
        lex::token_def<std::string> div;
        lex::token_def<std::string> bang;
        lex::token_def<std::string> equalTo;
        lex::token_def<std::string> greaterEq;
        lex::token_def<std::string> lessEq;
        lex::token_def<std::string> notEq;
        lex::token_def<std::string> less;
        lex::token_def<std::string> greater;
        lex::token_def<std::string> assign;
        lex::token_def<std::string> comma;
        lex::token_def<std::string> colon;
        lex::token_def<std::string> leftParen;
        lex::token_def<std::string> rightParen;
        lex::token_def<std::string> leftBracket;
        lex::token_def<std::string> rightBracket;
        lex::token_def<std::string> nonTerminal;
        lex::token_def<std::string> terminal;
    };
    

    以及解析器

    namespace qi = boost::spirit::qi;
    template <typename Iterator, typename Skipper>
    struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
    {        
    //    using boost::phoenix::ref;
    //    using boost::phoenix::size;
    
        template <typename TokenDef>
        InterpreterGrammar(TokenDef const& tok)
            : InterpreterGrammar::base_type(start),
            connect(0)
        {
            start %= functionList >> endList >> qi::eoi;
    
            // different expressions
            exp %= exp >> qi::token(k_equalTo) >> exp
                  |
                  exp >> qi::token(k_notEq) >> exp
                  |
                  exp >> qi::token(k_less) >> exp
                  |
                  exp >> qi::token(k_lessEq) >> exp
                  |
                  exp >> qi::token(k_greater) >> exp
                  |
                  exp >> qi::token(k_greaterEq) >> exp
                  |
                  exp >> qi::token(k_andTok) >> exp
                  |
                  exp >> qi::token(k_orTok) >> exp
                  |
                  qi::token(k_notTok) >> exp 
                  |
                  exp >> qi::token(k_plues) >> exp
                  |
                  exp >> qi::token(k_minus) >> exp
                  |
                  exp >> qi::token(k_mult) >> exp
                  |
                  qi::token(k_minus) >> exp
                  |
                  qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                  |
                  qi::token(k_nonTerminal)
                  |
                  qi::token(k_terminal)
                  |
                  qi::token(k_trueTok)
                  |
                  qi::token(k_falseTok);
    
            // parameter list
            paramList %= paramList >> qi::token(k_comma) >> exp
                        |
                        exp;
    
            // return statements
            returnStatement %= returnStatement >> exp
                             |
                             returnStatement;
    
            // function call statements
            callStatement %= qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                            |
                            qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen);
    
            // variable assignment
            assignmentStatement %= qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                                  |
                                  qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                                      >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp;
    
            // list of integers
            intList %= intList >> qi::token(k_comma) >> qi::token(k_terminal)
                      |
                      qi::token(k_terminal);
    
            // print out a variable
            printStatement %= qi::token(k_print) >> exp;
    
            // take input
            inputStatement %= qi::token(k_nonTerminal) >> qi::token(k_input);
    
            // conditional statement
            conditionStatement %= qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse;
    
            // consitions have optional else
            optionalElse %= qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                           |
                           qi::eps;
    
            // while loop
            whileStatement %= qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw);
    
            // actual program statements
            endList %= endList >> end
                      |
                      end;
    
            // end possibilities of program in global space
            end %= callStatement
                  |
                  printStatement
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                      >> qi::token(k_rightBracket)
                  |
                  qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                      >> qi::token(k_assign) >> exp;
    
            // function parameters
            paramList %= paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                        |
                        qi::token(k_nonTerminal)
                        |
                        qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket);
    
            // define a statement as assignment print input condition while or call
            statement %= assignmentStatement
                        |
                        printStatement
                        |
                        inputStatement
                        |
                        conditionStatement
                        |
                        whileStatement
                        |
                        callStatement
                        |
                        returnStatement;
    
            // general statement list
            statements %= statements >> statement
                         |
                         statement;
    
            // functions
            functionList %= qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                               >> statements >> qi::token(k_fed)
                           |
                           qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                               >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed);
                           | qi::eps;
    
            BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
            debug(start);
        }
    
        qi::rule<Iterator, Skipper> start;
        qi::rule<Iterator, Skipper> functionList;
        qi::rule<Iterator, Skipper> endList;
        qi::rule<Iterator, Skipper> paramList;
        qi::rule<Iterator, Skipper> statements;
        qi::rule<Iterator, Skipper> statement;
        qi::rule<Iterator, Skipper> assignmentStatement;
        qi::rule<Iterator, Skipper> printStatement;
        qi::rule<Iterator, Skipper> inputStatement;
        qi::rule<Iterator, Skipper> conditionStatement;
        qi::rule<Iterator, Skipper> whileStatement;
        qi::rule<Iterator, Skipper> callStatement;
        qi::rule<Iterator, Skipper> returnStatement;
        qi::rule<Iterator, Skipper> exp;
        qi::rule<Iterator, Skipper> intList;
        qi::rule<Iterator, Skipper> optionalElse;
        qi::rule<Iterator, Skipper> end;
    };
    

    主要部分

    int main(int argc, char** argv)
    {
    namespace lex = boost::spirit::lex;
    namespace qi = boost::spirit::qi;
    
    typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
    typedef lex::lexertl::lexer<token_type> lexer_type;
    typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
    typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
    
    LexerTokens< lexer_type > lexer;
    InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
    
    // read the file
    if (argc != 2)
    {
        std::cout << "File required" << std::endl;
        return 1;
    }
    
    std::ifstream t(argv[1]); 
    
    t.seekg(0, std::ios::end);   
    sourceCode.reserve(t.tellg());
    t.seekg(0, std::ios::beg);
    
    sourceCode.assign(std::istreambuf_iterator<char>(t), 
                      std::istreambuf_iterator<char>());
    
    char const* first = sourceCode.c_str();
    char const* last = &first[sourceCode.size()];
    bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
    
    std::cout << "Remaining " << std::string(first,last) << std::endl;
    std::cout << "R is " << r << std::endl;
    }
    

    语言中的一个例子是:

    def add(x,y) :                                                                                                                              
      if (x <= 0) : return y fi
       return 1 + add(x-1,y) 
    fed
    y = add(5,4)
    print y
    

    我遇到的错误是调用语法时语法分析器segfaults。

    我看到了如果我

    • 注释掉相关规则(功能列表)的部分 语法需要调用另一个规则的地方(如paramlist)
    • 并删除发送到lexer/parser的部分源代码 只包括标记部分,

    语法不能正确地分段和解析表达式。

    当我在调试器中运行代码时,在 代码段错误,将打印一个大表达式,其中所有成员都具有 弦说,

    读取变量时出错:无法访问地址0x7fffff7fe0处的内存

    我查了其他类似的帖子,里面的错误是精神上的错误, 然而,

    • 我不认为这个错误是由于语法递归造成的,或者 规则中有临时语法,因为我编写的ll解析器 以前在lex和yacc中成功地解析了语句 相信在整个程序运行过程中所有的规则都会存在。

    在正确的方向上的任何一点,或者对当前代码的批评将是 非常感谢。

    1 回复  |  直到 6 年前
        1
  •  1
  •   sehe    6 年前

    如果你使用地址消毒剂,它会告诉你:

    <start>...
      <try>[]</try>...
    ASAN:DEADLYSIGNAL...
    =================================================================...
    ==8985==ERROR: AddressSanitizer: stack-overflow on address 0x7ffeb280dfc8 (pc 0x0000004c9cf6 bp 0x7f...
        #0 0x4c9cf5 in __asan_memcpy (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5)...
        #1 0x68eb77 in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::spi...
        #2 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
        #3 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion::...
        #4 0x68e190 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
        #5 0x68de4a in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
        #6 0x68d8b5 in bool boost::spirit::qi::detail::alternative_function<boost::spirit::lex::lexertl:...
        #7 0x6e085c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusion...
        #8 0x6e053f in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::sequence<b...
        #9 0x6e0218 in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::sequence<boost::fu...
        #10 0x6dffc5 in bool boost::spirit::qi::alternative<boost::fusion::cons<boost::spirit::qi::seque...
        #11 0x6dfbf7 in bool boost::spirit::qi::detail::parser_binder<boost::spirit::qi::alternative<boo...
        #12 0x6de330 in boost::detail::function::function_obj_invoker4<boost::spirit::qi::detail::parser...
        #13 0x5d633a in boost::function4<bool, boost::spirit::lex::lexertl::iterator<boost::spirit::lex:...
        #14 0x5d58e8 in bool boost::spirit::qi::rule<boost::spirit::lex::lexertl::iterator<boost::spirit...
        #15 0x5d54e9 in bool boost::spirit::qi::reference<boost::spirit::qi::rule<boost::spirit::lex::le...
        #16 0x5d49bf in bool boost::spirit::qi::detail::fail_function<boost::spirit::lex::lexertl::itera...
        #17 0x68f56c in bool boost::fusion::detail::linear_any<boost::fusion::cons_iterator<boost::fusio...
        #18 0x68f267 in bool boost::fusion::detail::any<boost::fusion::cons<boost::spirit::qi::reference...
        #19 0x68ef6e in bool boost::fusion::any<boost::fusion::cons<boost::spirit::qi::reference<boost::...
        #20 0x68ebae in bool boost::spirit::any_if<boost::spirit::traits::attribute_not_unused<boost::sp...
        #21 0x68e844 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion:...
        [ snip repeated frames ]
        #250 0x68e487 in bool boost::spirit::qi::sequence_base<boost::spirit::qi::sequence<boost::fusion...
    
    
    SUMMARY: AddressSanitizer: stack-overflow (/home/sehe/Projects/stackoverflow/sotest+0x4c9cf5) in __a...
    ==8985==ABORTING...
    

    所以,这显然是左递归导致堆栈溢出。

    其他解析器生成器处理这个问题的事实意义很小:spirit是一个peg解析器生成器,左递归是不可能的。

    你需要重写

        exp %= exp >> qi::token(k_equalTo) >> exp
    

    使左撇子更具体。

    注意:我不得不修复一些随机问题,你的方式提出你的代码。这就是我曾经责备过的:

    Live On Coliru

    #include <boost/spirit/include/lex.hpp>
    #include <boost/spirit/include/lex_lexertl.hpp>
    #include <boost/spirit/include/qi.hpp>
    namespace lex = boost::spirit::lex;
    
    namespace interpreter {
        enum Tokens
        {
            k_andTok = 1,
            k_def = 2,
            k_elihw = 3,
            k_elseTok = 4,
            k_falseTok = 5,
            k_fed = 6,
            k_fi = 7,
            k_ifTok = 8,
            k_input = 9,
            k_notTok = 10,
            k_orTok = 11,
            k_print = 12,
            k_returnTok = 13,
            k_trueTok = 14,
            k_whileTok = 15,
            k_plues = 16,
            k_minus = 17,
            k_mult = 18,
            k_div = 19,
            k_bang = 20,
            k_equalTo = 21,
            k_greaterEq = 22,
            k_lessEq = 23,
            k_notEq = 24,
            k_less = 25,
            k_greater = 26,
            k_assign = 27,
            k_comma = 28,
            k_colon = 29,
            k_leftParen = 30,
            k_rightParen = 31,
            k_leftBracket = 32,
            k_rightBracket = 33,
            k_nonTerminal = 34,
            k_terminal = 35
        };
    
        template <typename Lexer>
        struct LexerTokens : lex::lexer<Lexer>
        {
            LexerTokens() :
               whiteSpace("[ \\t\\n]"),
               andTok("and"),
               def("def"),
               elihw("elihw"),
               elseTok("else"),
               falseTok("false"),
               fed("fed"),
               fi("fi"),
               ifTok("if"),
               input("input"),
               notTok("not"),
               orTok("or"),
               print("print"),
               returnTok("return"),
               trueTok("true"),
               whileTok("while"),
               plus("\\+"),
               minus("\\-"),
               mult("\\*"),
               div("\\/"),
               bang("\\!"),
               equalTo("=="),
               greaterEq(">="),
               lessEq("<="),
               notEq("!="),
               less("<"),
               greater(">"),
               assign("="),
               comma(","),
               colon(":"),
               leftParen("\\("),
               rightParen("\\)"),
               leftBracket("\\["),
               rightBracket("\\["),
               nonTerminal("[a-z][a-zA-Z0-9]*"),
               terminal("[0-9]")
            {
                this->self("WHITESPACE") = whiteSpace;
    
                this->self.add
                    (andTok, k_andTok)
                    (def, k_def)
                    (elihw, k_elihw)
                    (elseTok, k_elseTok)
                    (falseTok, k_falseTok)
                    (fed, k_fed)
                    (fi, k_fi)
                    (ifTok, k_ifTok)
                    (andTok, k_andTok)
                    (input, k_input)
                    (notTok, k_notTok)
                    (orTok, k_orTok)
                    (print, k_print)
                    (returnTok, k_returnTok)
                    (trueTok, k_trueTok)
                    (whileTok, k_whileTok)
                    (plus, k_plues)
                    (minus, k_minus)
                    (mult, k_mult)
                    (div, k_div)
                    (bang, k_bang)
                    (equalTo, k_equalTo)
                    (greaterEq, k_greaterEq)
                    (lessEq, k_lessEq)
                    (notEq, k_notEq)
                    (less, k_less)
                    (greater, k_greater)
                    (assign, k_assign)
                    (comma, k_comma)
                    (colon, k_colon)
                    (leftParen, k_leftParen)
                    (rightParen, k_rightParen)
                    (leftBracket, k_leftBracket)
                    (rightBracket, k_rightBracket)
                    (nonTerminal, k_nonTerminal)
                    (terminal, k_terminal);
            }
    
            lex::token_def<lex::omit> whiteSpace;
            lex::token_def<std::string> andTok;
            lex::token_def<std::string> def;
            lex::token_def<std::string> elihw;
            lex::token_def<std::string> elseTok;
            lex::token_def<std::string> falseTok;
            lex::token_def<std::string> fed;
            lex::token_def<std::string> fi;
            lex::token_def<std::string> ifTok;
            lex::token_def<std::string> input;
            lex::token_def<std::string> notTok;
            lex::token_def<std::string> orTok;
            lex::token_def<std::string> print;
            lex::token_def<std::string> returnTok;
            lex::token_def<std::string> trueTok;
            lex::token_def<std::string> whileTok;
            lex::token_def<std::string> plus;
            lex::token_def<std::string> minus;
            lex::token_def<std::string> mult;
            lex::token_def<std::string> div;
            lex::token_def<std::string> bang;
            lex::token_def<std::string> equalTo;
            lex::token_def<std::string> greaterEq;
            lex::token_def<std::string> lessEq;
            lex::token_def<std::string> notEq;
            lex::token_def<std::string> less;
            lex::token_def<std::string> greater;
            lex::token_def<std::string> assign;
            lex::token_def<std::string> comma;
            lex::token_def<std::string> colon;
            lex::token_def<std::string> leftParen;
            lex::token_def<std::string> rightParen;
            lex::token_def<std::string> leftBracket;
            lex::token_def<std::string> rightBracket;
            lex::token_def<std::string> nonTerminal;
            lex::token_def<std::string> terminal;
        };
    
        namespace qi = boost::spirit::qi;
        template <typename Iterator, typename Skipper>
        struct InterpreterGrammar : qi::grammar<Iterator, Skipper>
        {        
        //    using boost::phoenix::ref;
        //    using boost::phoenix::size;
    
            template <typename TokenDef>
            InterpreterGrammar(TokenDef const& )
                : InterpreterGrammar::base_type(start)
                  //, connect(0)
            {
                start 
                    = functionList >> endList >> qi::eoi
                    ;
    
                // different expressions
                exp = exp >> qi::token(k_equalTo) >> exp
                    | exp >> qi::token(k_notEq) >> exp
                    | exp >> qi::token(k_less) >> exp
                    | exp >> qi::token(k_lessEq) >> exp
                    | exp >> qi::token(k_greater) >> exp
                    | exp >> qi::token(k_greaterEq) >> exp
                    | exp >> qi::token(k_andTok) >> exp
                    | exp >> qi::token(k_orTok) >> exp
                    | qi::token(k_notTok) >> exp 
                    | exp >> qi::token(k_plues) >> exp
                    | exp >> qi::token(k_minus) >> exp
                    | exp >> qi::token(k_mult) >> exp
                    | qi::token(k_minus) >> exp
                    | qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                    | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket) 
                    | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                    | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> exp >> qi::token(k_rightParen)
                    | qi::token(k_nonTerminal)
                    | qi::token(k_terminal)
                    | qi::token(k_trueTok)
                    | qi::token(k_falseTok)
                    ;
    
                // parameter list
                paramList 
                    = paramList >> qi::token(k_comma) >> exp
                    | exp
                    ;
    
                // return statements
                returnStatement 
                    = returnStatement >> exp
                    | returnStatement
                    ;
    
                // function call statements
                callStatement 
                    = qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> qi::token(k_rightParen)
                    | qi::token(k_nonTerminal) >> qi::token(k_leftParen) >> paramList >> qi::token(k_rightParen)
                    ;
    
                // variable assignment
                assignmentStatement 
                    = qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                    | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp
                        >> qi::token(k_rightBracket) >> qi::token(k_assign) >> exp
                    ;
    
                // list of integers
                intList 
                    = intList >> qi::token(k_comma) >> qi::token(k_terminal)
                    | qi::token(k_terminal)
                    ;
    
                // print out a variable
                printStatement 
                    = qi::token(k_print) >> exp
                    ;
    
                // take input
                inputStatement 
                    = qi::token(k_nonTerminal) >> qi::token(k_input)
                    ;
    
                // conditional statement
                conditionStatement 
                    = qi::token(k_ifTok) >> exp >> qi::token(k_colon) >> statements >> optionalElse
                    ;
    
                // consitions have optional else
                optionalElse 
                    = qi::token(k_elseTok) >> qi::token(k_colon) >> statements
                    | qi::eps
                    ;
    
                // while loop
                whileStatement 
                    = qi::token(k_whileTok) >> exp >> qi::token(k_colon) >> statements >> qi::token(k_elihw)
                    ;
    
                // actual program statements
                endList 
                    = endList >> end
                    | end
                    ;
    
                // end possibilities of program in global space
                end = callStatement
                    | printStatement
                    | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_input)
                    | qi::token(k_nonTerminal) >> qi::token(k_assign) >> exp
                    | qi::token(k_nonTerminal) >> qi::token(k_assign) >> qi::token(k_leftBracket) >> intList
                        >> qi::token(k_rightBracket)
                    | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> exp >> qi::token(k_rightBracket)
                        >> qi::token(k_assign) >> exp
                    ;
    
                // function parameters
                paramList 
                    = paramList >> qi::token(k_comma) >> qi::token(k_nonTerminal)
                    | qi::token(k_nonTerminal)
                    | qi::token(k_nonTerminal) >> qi::token(k_leftBracket) >> qi::token(k_rightBracket)
                    ;
    
                // define a statement as assignment print input condition while or call
                statement 
                    = assignmentStatement
                    | printStatement
                    | inputStatement
                    | conditionStatement
                    | whileStatement
                    | callStatement
                    | returnStatement
                    ;
    
                // general statement list
                statements 
                    = statements >> statement
                    | statement
                    ;
    
                // functions
                functionList 
                    = qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                                   >> paramList >> qi::token(k_rightParen) >> qi::token(k_colon)
                                   >> statements >> qi::token(k_fed)
                    | qi::token(k_def) >> qi::token(k_nonTerminal) >> qi::token(k_leftParen)
                                   >> qi::token(k_rightParen) >> qi::token(k_colon) >> statements >> qi::token(k_fed)
                    | qi::eps
                    ;
    
                BOOST_SPIRIT_DEBUG_NODES((start)(functionList));
            }
    
            qi::rule<Iterator, Skipper> start;
            qi::rule<Iterator, Skipper> functionList;
            qi::rule<Iterator, Skipper> endList;
            qi::rule<Iterator, Skipper> paramList;
            qi::rule<Iterator, Skipper> statements;
            qi::rule<Iterator, Skipper> statement;
            qi::rule<Iterator, Skipper> assignmentStatement;
            qi::rule<Iterator, Skipper> printStatement;
            qi::rule<Iterator, Skipper> inputStatement;
            qi::rule<Iterator, Skipper> conditionStatement;
            qi::rule<Iterator, Skipper> whileStatement;
            qi::rule<Iterator, Skipper> callStatement;
            qi::rule<Iterator, Skipper> returnStatement;
            qi::rule<Iterator, Skipper> exp;
            qi::rule<Iterator, Skipper> intList;
            qi::rule<Iterator, Skipper> optionalElse;
            qi::rule<Iterator, Skipper> end;
        };
    }
    
    #include <fstream>
    #include <iterator>
    
    int main(int argc, char** argv) {
        namespace lex = boost::spirit::lex;
        namespace qi = boost::spirit::qi;
    
        typedef lex::lexertl::token< char const*, lex::omit, boost::mpl::true_ > token_type;
        typedef lex::lexertl::lexer<token_type> lexer_type;
        typedef interpreter::LexerTokens<lexer_type>::iterator_type iterator_type;
        typedef qi::in_state_skipper<interpreter::LexerTokens<lexer_type>::lexer_def> skipper_type;
    
        interpreter::LexerTokens< lexer_type > lexer;
        interpreter::InterpreterGrammar< iterator_type, skipper_type > parser(lexer);
    
        // read the file
        if (argc != 2)
        {
            std::cout << "File required" << std::endl;
            return 1;
        }
    
        std::ifstream t(argv[1]); 
        std::string const sourceCode { std::istreambuf_iterator<char>(t), {} };
    
        char const* first = sourceCode.data();
        char const* last = first + sourceCode.size();
        bool r = lex::tokenize_and_phrase_parse(first, last, lexer, parser, qi::in_state("WHITESPACE")[lexer.self]);
    
        std::cout << "Remaining " << std::string(first,last) << std::endl;
        std::cout << "R is " << r << std::endl;
    }