c++ - boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

Question

Welcome To Ask or Share your Answers For Others

c++ - boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

posted Oct 24, 2021 in Technique[技术] by 深蓝 (71.8m points)

c++ - boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

edit : I have ripped out the lexer as it does not cleanly integrate with Qi and just obfuscates grammars (see answer below).

My lexer looks as follows :

template <typename Lexer>
struct tokens : lex::lexer<Lexer>
{
tokens()
    : left_curly(""{""),
    right_curly(""}""),
    left_paren(""(""),
    right_paren("")""),
    colon(":"),
    scolon(";"),
    namespace_("(?i:namespace)"),
    event("(?i:event)"),
    optional("(?i:optional)"),
    required("(?i:required)"),
    repeated("(?i:repeated)"),
    t_int_4("(?i:int4)"),
    t_int_8("(?i:int8)"),
    t_string("(?i:string)"),
    ordinal("\d+"),
    identifier("\w+")

{
    using boost::spirit::lex::_val;

    this->self
        = 
        left_curly    
        | right_curly 
        | left_paren
        | right_paren
        | colon         
        | scolon
        | namespace_      
        | event             
        | optional           
        | required          
        | repeated
        | t_int_4
        | t_int_8
        | t_string
        | ordinal             
        | identifier         
        | lex::token_def<>("[ \t\n]+")   [lex::_pass = lex::pass_flags::pass_ignore];
}


lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren;
lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string;
lex::token_def<boost::uint32_t> ordinal;
lex::token_def<std::string> identifier;

};

I want t_int_4,t_int_8, and t_string to represented by a single token type attributed by an integral type. At the moment my QI grammar has to do the lifting for this and then set the token in a qi::rule semantic action :

 atomic_type = tok.t_int_4     [ _val = RBL_INT4]
                | tok.t_int_8             [ _val = RBL_INT8]
                | tok.t_string            [ _val = RBL_STRING];

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙；凝视深渊过久,深渊将回以凝视…

1 Reply

深蓝 · Answer 1 · 2021-10-23T17:57:46+0000

From your questions relating to integrating lex into qi grammar, from the last few days. It seems you've identified multiple integration issues. At this point you should ask yourself why you are even trying to integrate a lexer into a PEG grammar. PEG grammars can neatly capture tokenization in situ, and so you don't really gain much from introducing lexer especially considering the lex->qi case where introducing a lexer has shown you that not only do you need hacks to do what is neat in qi in terms of expressing your grammar but also hacks for getting error handling and annotation working properly. Therefore I suggest removing Lex and sticking to Qi.

Here is your grammar with the lexer removed. The ast is in a file of it's own.

#include "ast.hpp"
#define BOOST_SPIRIT_USE_PHOENIX_V3
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_core.hpp>
#include <boost/range/iterator_range.hpp>
#include <vector>

namespace qi = boost::spirit::qi;
namespace ascii = boost::spirit::ascii;
namespace px = boost::phoenix;

template <typename Iterator>
struct skipper : qi::grammar<Iterator>
{
    skipper() : skipper::base_type(start)
    {
        using boost::spirit::ascii::char_;

        start = ascii::space | qi::lit("//") >> *(ascii::char_ - qi::eol) >> qi::eol;
    }

    qi::rule<Iterator> start;
};

struct error_handler_
{
    typedef void result_type;
    template<typename First, typename Last, typename ErrorPos, typename What>
    void operator()(First f, Last l, ErrorPos e, What w) const
    {
        std::cout << "Expected : " << w << std::endl;
        std::cout << std::string(f,l) << std::endl;
        int i = std::distance(f,e);
        std::cout << std::string(i+1,' ') <<  "^---- here"  << std::endl;
    }
};

px::function<error_handler_> error_handler;

template<typename Iterator>
struct annotation_state
{
  typedef boost::iterator_range<Iterator> annotation_iterator;
  typedef std::vector<annotation_iterator> annotation_iterators;

  annotation_iterators annotations;
};

template<typename Iterator>
struct annotate_
{
    typedef void result_type;

    annotation_state<Iterator> & as;
    annotate_(annotation_state<Iterator> & as) : as(as) {}

    template<typename Val, typename First, typename Last>
    void operator()(Val v, First f, Last l) const
    {
      v.id = as.annotations.size();
      as.annotations.push_back(boost::make_iterator_range(f,l));
      std::cout << std::string(f,l) << std::endl;
    }
};



template <typename Iterator, typename Skipper>
struct grammar : qi::grammar<Iterator,namespace_descriptor(),Skipper>
{
    grammar(annotation_state<Iterator> & as) 
        : grammar::base_type(namespace_descriptor_),
          annotation_state_(as),
          annotate(as)

    {
        using namespace qi;

        atomic_type.add
            ("int4", RBL_INT4)
            ("int8", RBL_INT8)
            ("string", RBL_STRING);

        event_entry_qualifier.add
            ("optional", ENTRY_OPTIONAL)
            ("required", ENTRY_REQUIRED)
            ("repeated", ENTRY_REPEATED);

        oid_ = ordinal  > ':' > identifier;
        ordinal = uint_parser<boost::uint32_t>();
        identifier = +(char_("a","z") | char_("A","Z") | char_('_'));
        type_descriptor_ = atomic_type_ | compound_type_;
        atomic_type_ = no_case[atomic_type] > attr("");

        compound_type_ = 
            no_case[lit("event")] 
            > attr(RBL_EVENT) 
            > '(' 
            > identifier  
            > ')';

        event_entry_ = 
            no_case[event_entry_qualifier] 
            > oid_ 
            > type_descriptor_ 
            > ';';

        event_descriptor_ = 
            no_case[lit("event")] 
            > oid_ 
            > '{' 
            > *(event_entry_) 
            > '}'; 

        namespace_descriptor_ = 
            no_case[lit("namespace")] 
            > identifier 
            > '{' 
            > * (event_descriptor_) 
            > '}'; 

        identifier.name("identifier");
        oid_.name("ordinal-identifier pair");
        ordinal.name("ordinal");

        on_error<fail>(namespace_descriptor_, ::error_handler(_1,_2,_3,_4));
        on_success(oid_, annotate(_val,_1,_3));
        on_success(type_descriptor_, annotate(_val,_1,_3));
        on_success(event_entry_, annotate(_val,_1,_3));
        on_success(event_descriptor_, annotate(_val,_1,_3));
    }

    annotation_state<Iterator> & annotation_state_;
    px::function<annotate_<Iterator> > annotate;

    qi::rule< Iterator, oid()> oid_;
    qi::rule< Iterator, boost::uint32_t()> ordinal;
    qi::rule< Iterator, std::string()> identifier;
    qi::rule< Iterator, type_descriptor()> type_descriptor_;
    qi::rule< Iterator, type_descriptor()> atomic_type_;
    qi::rule< Iterator, type_descriptor()> compound_type_; 

    qi::rule< Iterator, event_entry(), Skipper> event_entry_;
    qi::rule< Iterator, event_descriptor(), Skipper> event_descriptor_;
    qi::rule< Iterator, namespace_descriptor(), Skipper> namespace_descriptor_;

    qi::symbols<char, int> atomic_type;
    qi::symbols<char, int> event_entry_qualifier;
};

int main()
{
    std::string test = "namespace ns { event 1:sihan { OpTIONAL 1:hassan event(haSsan);} }";
    typedef std::string::iterator it;

    it beg = test.begin();
    it end = test.end();

    annotation_state<it> as;
    skipper<it> skip;
    grammar<it, skipper<it> > g(as);


    bool r = qi::phrase_parse(beg,end,g,skip);
    if(r)
        ;
    else
    {
        std::cout << "parsing failed" << std::endl;
    }
}

Categories

c++ - boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

c++ - boost-sprit-lex unifying multiple tokens into a single token in lex differentiated by the id

Please log in or register to add a comment.

Please log in or register to reply this article.

1 Reply

Please log in or register to add a comment.

Just Browsing Browsing

Most popular tags