Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
174 views
in Technique[技术] by (71.8m points)

c++ - Parsing a command language using Boost Spirit

I am building a parser for a command language that I've pieced together from various samples. I've read the Boost Spirit Qi and Lex docs, and I think I understand the basics, but from what I've read, I should avoid attributes and use utree. What docs I've found on utree basically suck. Given the code below, I have the following questions:

  1. How do I annotate the parser to create an AST using utree?
  2. How do I walk the utree after it is built, to discover what was parsed? e.g. for token-only commands, such as SET DEBUG ON, as well as commands with values, such as LOAD "file.ext" or SET DRIVE C:
  3. I want to add a comment character, "!". So, how can I ignore everything after that - except when it occurs in a quoted string?
  4. Why doesn't my error handler get called when I give it invalid input?
  5. How can I make the command tokens case insensitive, but not change the contents of a quoted string?

    #include <Windows.h>
    #include <conio.h>
    #include <string>
    #include <vector>
    #include <iostream>
    
    #define BOOST_SPIRIT_DEBUG
    
    #include <boostspiritincludeqi.hpp>
    #include <boostspiritincludephoenix.hpp>
    #include <boostspiritincludelex.hpp>
    #include <boostspiritincludelex_lexertl.hpp>
    
    using namespace std;
    using namespace boost::spirit;
    using boost::spirit::utree;
    
    //
    // Tokens used by the command grammar
    //
    
    template <typename Lexer>
    struct command_tokens : lex::lexer <Lexer>
        {
        command_tokens () :
    
            //
            // Verbs, with abbreviation (just enough characters to make each unique)
            //
    
            boot        ("B(O(O(T)?)?)?"),
            exit        ("E(X(I(T)?)?)?"),
            help        ("H(E(L(P)?)?)?"),
            dash_help   ("-H(E(L(P)?)?)?"),
            slash_help  ("\/H(E(L(P)?)?)?"),
            load        ("L(O(A(D)?)?)?"),
            quit        ("Q(U(I(T)?)?)?"),
            set         ("SE(T)?"),
            show        ("SH(O(W)?)?"),
    
            //
            // Nouns, with abbreviation (the minimum number of characters is usually 3, but may be more to ensure uniqueness)
            //
    
            debug       ("DEB(U(G)?)?"),
            drive       ("DRI(V(E)?)?"),
            trace       ("TRA(C(E)?)?"),
    
            //
            // Qualifiers
            //
    
            on          ("ON"),
            off         ("OFF"),
    
            //
            // Tokens to pass back to the grammar
            //
    
            quoted_string   ("...")
    
            {
            using namespace boost::spirit::lex;
    
            //
            // Associate the tokens with the lexer
            //
    
            this->self 
                = boot
                | exit
                | help
                | dash_help
                | slash_help
                | load
                | quit
                | set
                | show
                | debug
                | drive
                | trace
                | off
                | on
                | quoted_string
                ;
    
            //
            // Define whitespace to ignore: space, tab, newline
            //
    
            this->self ("WS")
                = lex::token_def <> ("[ \t\n]+")
                ;
            }
    
        lex::token_def <>   boot;
        lex::token_def <>   dash_help;
        lex::token_def <>   debug;
        lex::token_def <string> drive;
        lex::token_def <>   exit;
        lex::token_def <>   help;
        lex::token_def <>   load;
        lex::token_def <>   off;
        lex::token_def <>   on;
        lex::token_def <>   quit;
        lex::token_def <string> quoted_string;
        lex::token_def <>   set;
        lex::token_def <>   show;
        lex::token_def <>   slash_help;
        lex::token_def <>   trace;
        };
    
    //
    // Display parse error
    //
    
    struct error_handler_
        {
        template <typename, typename, typename>
        struct result
            {
            typedef void type;
            };
    
        template <typename Iterator>
        void operator ()
            (
            qi::info const& What,
            Iterator        Err_pos,
            Iterator        Last
            ) const
    
            {
            cout << "Error! Expecting "
                << What
                << " here: ""
                << string (Err_pos, Last)
                << """
                << endl;
            }
        };
    
    boost::phoenix::function <error_handler_> const error_handler = error_handler_ ();
    
    //
    // Grammar describing the valid commands
    //
    
    template <typename Iterator, typename Lexer>
    struct command_grammar : qi::grammar <Iterator>
        {
        template <typename Lexer>
        command_grammar (command_tokens <Lexer> const& Tok) :
            command_grammar::base_type (start)
            {
            using qi::on_error;
            using qi::fail;
            using qi::char_;
    
            start
                = +commands;
    
            commands
                = (
                  boot_command
                | exit_command
                | help_command
                | load_command
                | set_command
                | show_command
                );
    
            boot_command
                = Tok.boot;
    
            exit_command
                = Tok.exit
                | Tok.quit;
    
            help_command
                = Tok.help
                | Tok.dash_help
                | Tok.slash_help;
    
            load_command
                = Tok.load >> Tok.quoted_string;
    
            set_command
                = Tok.set;
    
            show_command
                = Tok.show;
    
            set_property
                = debug_property
                | drive_property
                | trace_property;
    
            debug_property
                = Tok.debug >> on_off;
    
           drive_property
                = Tok.drive >> char_ ("A-Z") >> char_ (":");
    
            trace_property
                = Tok.trace >> on_off;
    
            on_off
                = Tok.on
                | Tok.off;
    
            BOOST_SPIRIT_DEBUG_NODE (start);
            BOOST_SPIRIT_DEBUG_NODE (commands);
            BOOST_SPIRIT_DEBUG_NODE (boot_command);
            BOOST_SPIRIT_DEBUG_NODE (exit_command);
            BOOST_SPIRIT_DEBUG_NODE (help_command);
            BOOST_SPIRIT_DEBUG_NODE (load_command);
            BOOST_SPIRIT_DEBUG_NODE (quit_command);
            BOOST_SPIRIT_DEBUG_NODE (set_command);
            BOOST_SPIRIT_DEBUG_NODE (show_command);
            BOOST_SPIRIT_DEBUG_NODE (set_property);
            BOOST_SPIRIT_DEBUG_NODE (debug_property);
            BOOST_SPIRIT_DEBUG_NODE (drive_property);
            BOOST_SPIRIT_DEBUG_NODE (trace_property);
            BOOST_SPIRIT_DEBUG_NODE (target_property);
            on_error <fail> (start, error_handler (_4, _3, _2));
            }
    
        qi::rule <Iterator> start;
        qi::rule <Iterator> commands;
        qi::rule <Iterator> boot_command;
        qi::rule <Iterator> exit_command;
        qi::rule <Iterator> help_command;
        qi::rule <Iterator> load_command;
        qi::rule <Iterator> quit_command;
        qi::rule <Iterator> set_command;
        qi::rule <Iterator> show_command;
        qi::rule <Iterator> set_property;
        qi::rule <Iterator> debug_property;
        qi::rule <Iterator, string ()>  drive_property;
        qi::rule <Iterator> target_property;
        qi::rule <Iterator> trace_property;
        qi::rule <Iterator> on_off;
        };
    
    int
    main
        (
        int     Argc,
        PCHAR   Argv
        )
    {
        typedef std::string::iterator                       base_iterator_type;
        typedef lex::lexertl::token <base_iterator_type>    token_type;
        typedef lex::lexertl::lexer <token_type>            lexer_type;
        typedef command_tokens <lexer_type>                 command_tokens;
        typedef command_tokens::iterator_type               iterator_type;
        typedef command_grammar <iterator_type, command_tokens::lexer_def>  command_grammar;
    
        command_tokens      tokens;
        command_grammar     commands (tokens);
        string              input = "SET DRIVE C:";
        string::iterator    it = input.begin ();
        iterator_type       iter = tokens.begin (it, input.end ());
        iterator_type       end = tokens.end ();
        string              ws ("WS");
    
        bool                result = lex::tokenize_and_phrase_parse (it, input.end (), tokens, commands, qi::in_state (ws) [tokens.self]);
    
        if (result)
            {
            cout << "Parse succeeded" << endl;
            }
        else
            {
            string  rest (it, input.end ());
            cout << "Parse failed" << endl;
            cout << "Stopped at " << rest << endl;
            }
    
        return 0;
    }                           // End of main
    
See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

I'm going to side-step the majority of your code, for the simple reasons that experience tells me that Lex and utree are generally not what you want to use.

What you do want is define an AST to represent your command language and then come up with a grammar to build it.

AST

namespace Ast {
    struct NoValue {
        bool operator==(NoValue const &) const { return true; }
    };
    template <typename Tag> struct GenericCommand {};

    namespace tag {
        struct boot;
        struct help;
        struct load;
        struct exit;
        struct set;
        struct show;
    };

    template <> struct GenericCommand<tag::load> { std::string name; };

    template <> struct GenericCommand<tag::set> {
        std::string property;
        boost::variant<NoValue, std::string, bool> value; // optional
    };

    using BootCmd = GenericCommand<tag::boot>;
    using HelpCmd = GenericCommand<tag::help>;
    using ExitCmd = GenericCommand<tag::exit>;
    using ShowCmd = GenericCommand<tag::show>;
    using LoadCmd = GenericCommand<tag::load>;
    using SetCmd  = GenericCommand<tag::set>;

    using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
    using Commands = std::list<Command>;
}

The full code only adds debug output helpers. And here's the full Fusion Adaption:

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)

Grammar

Here I make some choices:

  • let's make things white-space and case insensitive, allowing line-separated commands: (see also Boost spirit skipper issues)

    start = skip(blank) [lazy_command % eol];
    
  • let's use Nabialek Trick to associate commands with prefixes. I used a very simple snippet of code to generate the unique prefixes:

    std::set<std::string> const verbs { "boot", "exit", "help", "-help", "/help", "load", "quit", "set", "show", };
    for (auto const full : verbs)
        for (auto partial=full; partial.length(); partial.resize(partial.size()-1)) {
            auto n = std::distance(verbs.lower_bound(partial), verbs.upper_bound(full));
            if (n < 2) std::cout << "("" << partial << "", &" << full << "_command)
    ";
        }
    
  • you could do the same for properties, but I thought the current setup is simpler:

template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
    command_grammar() : command_grammar::base_type(start) {
        using namespace qi;

        start = skip(blank) [lazy_command % eol];

        // nabialek trick
        lazy_command = no_case [ commands [ _a = _1 ] > lazy(*_a) [ _val = _1 ] ];

        on_off.add("on", true)("off", false);

        commands.add
            ("-help", &help_command) ("-hel", &help_command) ("-he", &help_command) ("-h", &help_command)
            ("/help", &help_command) ("/hel", &help_command) ("/he", &help_command) ("/h", &help_command)
            ("help", &help_command) ("hel", &help_command) ("he", &help_command) ("h", &help_command)
            ("boot", &boot_command) ("boo", &boot_command) ("bo", &boot_command) ("b", &boot_command)
            ("exit", &exit_command) ("exi", &exit_command) ("ex", &exit_command) ("e", &exit_command)
            ("quit", &exit_command) ("qui", &exit_command) ("qu", &exit_command) ("q", &exit_command)
            ("load", &load_command) ("loa", &load_command) ("lo", &load_command) ("l", &load_command)
            ("set", &set_command) ("se", &set_command)
            ("show", &show_command) ("sho", &show_command) ("sh", &show_command);

        quoted_string = '"' >> +~char_('"') >> '"';

        // nullary commands
        boot_command_ = eps;
        exit_command_ = eps;
        help_command_ = eps;
        show_command_ = eps;

        // non-nullary commands
        load_command_ = quoted_string;
        drive_        = char_("A-Z") >> ':';
        set_command_  = no_case[lit("drive")|"driv"|"dri"|"dr"] >> attr("DRIVE") >> drive_
                | no_case[ (lit("debug")|"debu"|"deb"|"de")     >> attr("DEBUG") >> on_off ]
                | no_case[ (lit("trace")|"trac"|"tra"|"tr"|"t") >> attr("TRACE") >> on_off ]
                ;

        BOOST_SPIRIT_DEBUG_NODES(
                (start)(lazy_command)
                (boot_command) (exit_command) (help_command) (show_command) (set_command) (load_command)
                (boot_command_)(exit_command_)(help_command_)(show_command_)(set_command_)(load_command_)
                (quoted_string)(drive_)
            )

        on_error<fail>(start, error_handler_(_4, _3, _2));
        on_error<fail>(lazy_command, error_handler_(_4, _3, _2));
        boot_command = boot_command_;
        exit_command = exit_command_;
        help_command = help_command_;
        load_command = load_command_;
        exit_command = exit_command_;
        set_command  = set_command_;
        show_command = show_command_;
    }

  private:
    struct error_handler_t {
        template <typename...> struct result { typedef void type; };

        void operator()(qi::info const &What, Iterator Err_pos, Iterator Last) const {
            std::cout << "Error! Expecting " << What << " here: "" << std::string(Err_pos, Last) << """ << std::endl;
        }
    };

    boost::phoenix::function<error_handler_t> const error_handler_ = error_handler_t {};

    qi::rule<Iterator, Ast::Commands()> start;

    using Skipper = qi::blank_type;
    using CommandRule  = qi::rule<Iterator, Ast::Command(), Skipper>;

    qi::symbols<char, bool> on_off;
    qi::symbols<char, CommandRule const*> commands;

    qi::rule<Iterator, std::string()> drive_property, quoted_string, drive_;

    qi::rule<Iterator, Ast::Command(), Skipper, qi::locals<CommandRule const*> > lazy_command;
    CommandRule boot_command, exit_command, help_command, load_command, set_command, show_command;

    qi::rule<Iterator, Ast::BootCmd(), Skipper> boot_command_;
    qi::rule<Iterator, Ast::ExitCmd(), Skipper> exit_command_;
    qi::rule<Iterator, Ast::HelpCmd(), Skipper> help_command_;
    qi::rule<Iterator, Ast::LoadCmd(), Skipper> load_command_;
    qi::rule<Iterator, Ast::SetCmd(),  Skipper> set_command_;
    qi::rule<Iterator, Ast::ShowCmd(), Skipper> show_command_;
};

Test Cases

Live On Coliru

int main() {
    typedef std::string::const_iterator It;
    command_grammar<It> const commands;

    for (std::string const input : {
            "help",
            "set drive C:",
            "SET DRIVE C:",
            "loAD "XYZ"",
            "load "anything 
at all"",
            // multiline
            "load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q"
            })
    {
        std::cout << "----- '" << input << "' -----
";
        It f = input.begin(), l = input.end();

        Ast::Commands parsed;
        bool result = parse(f, l, commands, parsed);

        if (result) {
            for (auto& cmd : parsed) {
                std::cout << "Parsed " << cmd << "
";
            }
        } else {
            std::cout << "Parse failed
";
        }

        if (f != l) {
            std::cout << "Remaining unparsed '" << std::string(f, l) << "'
";
        }
    }
}

Prints:

----- 'help' -----
Parsed HELP ()
----- 'set drive C:' -----
Parsed SET (DRIVE C)
----- 'SET DRIVE C:' -----
Parsed SET (DRIVE C)
----- 'loAD "XYZ"' -----
Parsed LOAD (XYZ)
----- 'load "anything 
at all"' -----
Parsed LOAD (anything 
at all)
----- 'load "ABC"
help
-he
/H
sh
se t off
se debug ON
b
q' -----
Parsed LOAD (ABC)
Parsed HELP ()
Parsed HELP ()
Parsed HELP ()
Parsed SHOW ()
Parsed SET (TRACE 0)
Parsed SET (DEBUG 1)
Parsed BOOT ()
Parsed EXIT ()

Full Listing

Live On Coliru

//#define BOOST_SPIRIT_DEBUG
#include <boost/fusion/include/io.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/spirit/include/phoenix.hpp>
#include <boost/spirit/include/qi.hpp>

namespace qi = boost::spirit::qi;

namespace Ast {
    struct NoValue {
        bool operator==(NoValue const &) const { return true; }
        friend std::ostream& operator<<(std::ostream& os, NoValue) { return os; }
    };
    template <typename Tag> struct GenericCommand {};

    namespace tag {
        struct boot {};
        struct help {};
        struct load {};
        struct exit {};
        struct set {};
        struct show {};

        static std::ostream& operator<<(std::ostream& os, boot) { return os << "BOOT"; }
        static std::ostream& operator<<(std::ostream& os, help) { return os << "HELP"; }
        static std::ostream& operator<<(std::ostream& os, load) { return os << "LOAD"; }
        static std::ostream& operator<<(std::ostream& os, exit) { return os << "EXIT"; }
        static std::ostream& operator<<(std::ostream& os, set ) { return os << "SET"; }
        static std::ostream& operator<<(std::ostream& os, show) { return os << "SHOW"; }
    };

    template <> struct GenericCommand<tag::load> { std::string name; };

    template <> struct GenericCommand<tag::set> {
        std::string property;
        boost::variant<NoValue, std::string, bool> value; // optional
    };

    using BootCmd = GenericCommand<tag::boot>;
    using HelpCmd = GenericCommand<tag::help>;
    using ExitCmd = GenericCommand<tag::exit>;
    using ShowCmd = GenericCommand<tag::show>;
    using LoadCmd = GenericCommand<tag::load>;
    using SetCmd  = GenericCommand<tag::set>;

    using Command = boost::variant<BootCmd, HelpCmd, ExitCmd, ShowCmd, LoadCmd, SetCmd>;
    using Commands = std::list<Command>;

    template <typename Tag>
    static inline std::ostream& operator<<(std::ostream& os, Ast::GenericCommand<Tag> const& command) { 
        return os << Tag{} << " " << boost::fusion::as_vector(command);
    }
}

BOOST_FUSION_ADAPT_TPL_STRUCT((Tag), (Ast::GenericCommand) (Tag), )
BOOST_FUSION_ADAPT_STRUCT(Ast::LoadCmd, name)
BOOST_FUSION_ADAPT_STRUCT(Ast::SetCmd, property, value)

template <typename Iterator>
struct command_grammar : qi::grammar<Iterator, Ast::Commands()> {
    command_grammar() : comma

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...