mirror of
https://github.com/YACReader/yacreader
synced 2025-07-19 21:44:43 -04:00
Add commit 43aab01 of BenHanson/lexertl14 from github
This commit is contained in:
committed by
Luis Ángel San Martín
parent
c4f792bd40
commit
d3de52ca82
926
YACReaderLibrary/lexertl/parser/parser.hpp
Normal file
926
YACReaderLibrary/lexertl/parser/parser.hpp
Normal file
@ -0,0 +1,926 @@
|
||||
// parser.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_PARSER_HPP
|
||||
#define LEXERTL_PARSER_HPP
|
||||
|
||||
#include <assert.h>
|
||||
#include <algorithm>
|
||||
#include "tree/end_node.hpp"
|
||||
#include "tree/iteration_node.hpp"
|
||||
#include "tree/leaf_node.hpp"
|
||||
#include <map>
|
||||
#include "tokeniser/re_tokeniser.hpp"
|
||||
#include "../runtime_error.hpp"
|
||||
#include "tree/selection_node.hpp"
|
||||
#include "tree/sequence_node.hpp"
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
/*
|
||||
General principles of regex parsing:
|
||||
- Every regex is a sequence of sub-regexes.
|
||||
- Regexes consist of operands and operators
|
||||
- All operators decompose to sequence, selection ('|') and iteration ('*')
|
||||
- Regex tokens are stored on a stack.
|
||||
- When a complete sequence of regex tokens is on the stack it is processed.
|
||||
|
||||
Grammar:
|
||||
|
||||
<REGEX> -> <OREXP>
|
||||
<OREXP> -> <SEQUENCE> | <OREXP>'|'<SEQUENCE>
|
||||
<SEQUENCE> -> <SUB>
|
||||
<SUB> -> <EXPRESSION> | <SUB><EXPRESSION>
|
||||
<EXPRESSION> -> <REPEAT>
|
||||
<REPEAT> -> charset | macro | '('<REGEX>')' | <REPEAT><DUPLICATE>
|
||||
<DUPLICATE> -> '?' | '??' | '*' | '*?' | '+' | '+?' | '{n[,[m]]}' |
|
||||
'{n[,[m]]}?'
|
||||
*/
|
||||
|
||||
template<typename rules_char_type, typename sm_traits>
|
||||
class basic_parser
|
||||
{
|
||||
public:
|
||||
enum {char_24_bit = sm_traits::char_24_bit};
|
||||
using char_type = typename sm_traits::char_type;
|
||||
using id_type = typename sm_traits::id_type;
|
||||
using end_node = basic_end_node<id_type>;
|
||||
using input_char_type = typename sm_traits::input_char_type;
|
||||
using input_string_token = basic_string_token<input_char_type>;
|
||||
using iteration_node = basic_iteration_node<id_type>;
|
||||
using leaf_node = basic_leaf_node<id_type>;
|
||||
using tokeniser =
|
||||
basic_re_tokeniser<rules_char_type, input_char_type, id_type>;
|
||||
using node = basic_node<id_type>;
|
||||
using node_ptr_vector = typename node::node_ptr_vector;
|
||||
using string = std::basic_string<rules_char_type>;
|
||||
using string_token = basic_string_token<char_type>;
|
||||
using selection_node = basic_selection_node<id_type>;
|
||||
using sequence_node = basic_sequence_node<id_type>;
|
||||
using charset_map = std::map<string_token, id_type>;
|
||||
using charset_pair = std::pair<string_token, id_type>;
|
||||
using compressed = std::integral_constant<bool, sm_traits::compressed>;
|
||||
using token = basic_re_token<rules_char_type, input_char_type>;
|
||||
static_assert(std::is_move_assignable<token>::value &&
|
||||
std::is_move_constructible<token>::value,
|
||||
"token is not movable.");
|
||||
using token_vector = std::vector<token>;
|
||||
|
||||
basic_parser(const std::locale &locale_,
|
||||
node_ptr_vector &node_ptr_vector_,
|
||||
charset_map &charset_map_, const id_type eoi_) :
|
||||
_locale(locale_),
|
||||
_node_ptr_vector(node_ptr_vector_),
|
||||
_charset_map(charset_map_),
|
||||
_eoi(eoi_),
|
||||
_token_stack(),
|
||||
_tree_node_stack()
|
||||
{
|
||||
}
|
||||
|
||||
observer_ptr<node> parse(const token_vector ®ex_, const id_type id_,
|
||||
const id_type user_id_, const id_type next_dfa_,
|
||||
const id_type push_dfa_, const bool pop_dfa_,
|
||||
const std::size_t flags_, id_type &nl_id_, const bool seen_bol_)
|
||||
{
|
||||
auto iter_ = regex_.cbegin();
|
||||
auto end_ = regex_.cend();
|
||||
observer_ptr<node> root_ = nullptr;
|
||||
observer_ptr<token> lhs_token_ = nullptr;
|
||||
// There cannot be less than 2 tokens
|
||||
auto rhs_token_ = std::make_unique<token>(*iter_++);
|
||||
char action_ = 0;
|
||||
|
||||
_token_stack.emplace(std::move(rhs_token_));
|
||||
rhs_token_ = std::make_unique<token>(*iter_);
|
||||
|
||||
if (iter_ + 1 != end_) ++iter_;
|
||||
|
||||
do
|
||||
{
|
||||
lhs_token_ = _token_stack.top().get();
|
||||
action_ = lhs_token_->precedence(rhs_token_->_type);
|
||||
|
||||
switch (action_)
|
||||
{
|
||||
case '<':
|
||||
case '=':
|
||||
_token_stack.emplace(std::move(rhs_token_));
|
||||
rhs_token_ = std::make_unique<token>(*iter_);
|
||||
|
||||
if (iter_ + 1 != end_) ++iter_;
|
||||
|
||||
break;
|
||||
case '>':
|
||||
reduce(nl_id_);
|
||||
break;
|
||||
default:
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "A syntax error occurred: '" <<
|
||||
lhs_token_->precedence_string() <<
|
||||
"' against '" << rhs_token_->precedence_string() <<
|
||||
" in rule id " << id_ << '.';
|
||||
throw runtime_error(ss_.str());
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (!_token_stack.empty());
|
||||
|
||||
if (_tree_node_stack.empty())
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Empty rules are not allowed in rule id " <<
|
||||
id_ << '.';
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
assert(_tree_node_stack.size() == 1);
|
||||
|
||||
observer_ptr<node> lhs_node_ = _tree_node_stack.top();
|
||||
|
||||
_tree_node_stack.pop();
|
||||
_node_ptr_vector.emplace_back(std::make_unique<end_node>
|
||||
(id_, user_id_, next_dfa_, push_dfa_, pop_dfa_));
|
||||
|
||||
observer_ptr<node> rhs_node_ = _node_ptr_vector.back().get();
|
||||
|
||||
_node_ptr_vector.emplace_back(std::make_unique<sequence_node>
|
||||
(lhs_node_, rhs_node_));
|
||||
root_ = _node_ptr_vector.back().get();
|
||||
|
||||
if (seen_bol_)
|
||||
{
|
||||
fixup_bol(root_);
|
||||
}
|
||||
|
||||
if ((flags_ & match_zero_len) == 0)
|
||||
{
|
||||
const auto &firstpos_ = root_->firstpos();
|
||||
|
||||
for (observer_ptr<const node> node_ : firstpos_)
|
||||
{
|
||||
if (node_->end_state())
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Rules that match zero characters are not allowed "
|
||||
"as this can cause an infinite loop in user code. The "
|
||||
"match_zero_len flag overrides this check. Rule id " <<
|
||||
id_ << '.';
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return root_;
|
||||
}
|
||||
|
||||
static id_type bol_token()
|
||||
{
|
||||
return static_cast<id_type>(~1);
|
||||
}
|
||||
|
||||
static id_type eol_token()
|
||||
{
|
||||
return static_cast<id_type>(~2);
|
||||
}
|
||||
|
||||
private:
|
||||
using input_range = typename input_string_token::range;
|
||||
using range = typename string_token::range;
|
||||
using string_token_vector = std::vector<std::unique_ptr<string_token>>;
|
||||
using token_stack = std::stack<std::unique_ptr<token>>;
|
||||
using tree_node_stack = typename node::node_stack;
|
||||
|
||||
const std::locale &_locale;
|
||||
node_ptr_vector &_node_ptr_vector;
|
||||
charset_map &_charset_map;
|
||||
id_type _eoi;
|
||||
token_stack _token_stack;
|
||||
tree_node_stack _tree_node_stack;
|
||||
|
||||
void reduce(id_type &nl_id_)
|
||||
{
|
||||
observer_ptr<token> lhs_ = nullptr;
|
||||
observer_ptr<token> rhs_ = nullptr;
|
||||
token_stack handle_;
|
||||
char action_ = 0;
|
||||
|
||||
do
|
||||
{
|
||||
handle_.emplace();
|
||||
rhs_ = _token_stack.top().release();
|
||||
handle_.top().reset(rhs_);
|
||||
_token_stack.pop();
|
||||
|
||||
if (!_token_stack.empty())
|
||||
{
|
||||
lhs_ = _token_stack.top().get();
|
||||
action_ = lhs_->precedence(rhs_->_type);
|
||||
}
|
||||
} while (!_token_stack.empty() && action_ == '=');
|
||||
|
||||
assert(_token_stack.empty() || action_ == '<');
|
||||
|
||||
switch (rhs_->_type)
|
||||
{
|
||||
case BEGIN:
|
||||
// finished processing so exit
|
||||
break;
|
||||
case REGEX:
|
||||
// finished parsing, nothing to do
|
||||
break;
|
||||
case OREXP:
|
||||
orexp(handle_);
|
||||
break;
|
||||
case SEQUENCE:
|
||||
_token_stack.emplace(std::make_unique<token>(OREXP));
|
||||
break;
|
||||
case SUB:
|
||||
sub(handle_);
|
||||
break;
|
||||
case EXPRESSION:
|
||||
_token_stack.emplace(std::make_unique<token>(SUB));
|
||||
break;
|
||||
case REPEAT:
|
||||
repeat(handle_);
|
||||
break;
|
||||
case BOL:
|
||||
bol(handle_);
|
||||
break;
|
||||
case EOL:
|
||||
eol(handle_, nl_id_);
|
||||
break;
|
||||
case CHARSET:
|
||||
charset(handle_, compressed());
|
||||
break;
|
||||
case OPENPAREN:
|
||||
openparen(handle_);
|
||||
break;
|
||||
case OPT:
|
||||
case AOPT:
|
||||
optional(rhs_->_type == OPT);
|
||||
_token_stack.emplace(std::make_unique<token>(DUP));
|
||||
break;
|
||||
case ZEROORMORE:
|
||||
case AZEROORMORE:
|
||||
zero_or_more(rhs_->_type == ZEROORMORE);
|
||||
_token_stack.emplace(std::make_unique<token>(DUP));
|
||||
break;
|
||||
case ONEORMORE:
|
||||
case AONEORMORE:
|
||||
one_or_more(rhs_->_type == ONEORMORE);
|
||||
_token_stack.emplace(std::make_unique<token>(DUP));
|
||||
break;
|
||||
case REPEATN:
|
||||
case AREPEATN:
|
||||
repeatn(rhs_->_type == REPEATN, handle_.top().get());
|
||||
_token_stack.emplace(std::make_unique<token>(DUP));
|
||||
break;
|
||||
default:
|
||||
throw runtime_error
|
||||
("Internal error in regex_parser::reduce.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void orexp(token_stack &handle_)
|
||||
{
|
||||
assert(handle_.top()->_type == OREXP &&
|
||||
(handle_.size() == 1 || handle_.size() == 3));
|
||||
|
||||
if (handle_.size() == 1)
|
||||
{
|
||||
_token_stack.emplace(std::make_unique<token>(REGEX));
|
||||
}
|
||||
else
|
||||
{
|
||||
handle_.pop();
|
||||
assert(handle_.top()->_type == OR);
|
||||
handle_.pop();
|
||||
assert(handle_.top()->_type == SEQUENCE);
|
||||
perform_or();
|
||||
_token_stack.emplace(std::make_unique<token>(OREXP));
|
||||
}
|
||||
}
|
||||
|
||||
void perform_or()
|
||||
{
|
||||
// perform or
|
||||
observer_ptr<node> rhs_ = _tree_node_stack.top();
|
||||
|
||||
_tree_node_stack.pop();
|
||||
|
||||
observer_ptr<node> lhs_ = _tree_node_stack.top();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<selection_node>(lhs_, rhs_));
|
||||
_tree_node_stack.top() = _node_ptr_vector.back().get();
|
||||
}
|
||||
|
||||
void sub(token_stack &handle_)
|
||||
{
|
||||
assert((handle_.top()->_type == SUB &&
|
||||
handle_.size() == 1) || handle_.size() == 2);
|
||||
|
||||
if (handle_.size() == 1)
|
||||
{
|
||||
_token_stack.emplace(std::make_unique<token>(SEQUENCE));
|
||||
}
|
||||
else
|
||||
{
|
||||
handle_.pop();
|
||||
assert(handle_.top()->_type == EXPRESSION);
|
||||
// perform join
|
||||
sequence();
|
||||
_token_stack.emplace(std::make_unique<token>(SUB));
|
||||
}
|
||||
}
|
||||
|
||||
void repeat(token_stack &handle_)
|
||||
{
|
||||
assert(handle_.top()->_type == REPEAT &&
|
||||
handle_.size() >= 1 && handle_.size() <= 3);
|
||||
|
||||
if (handle_.size() == 1)
|
||||
{
|
||||
_token_stack.emplace(std::make_unique<token>(EXPRESSION));
|
||||
}
|
||||
else
|
||||
{
|
||||
handle_.pop();
|
||||
assert(handle_.top()->_type == DUP);
|
||||
_token_stack.emplace(std::make_unique<token>(REPEAT));
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void bol(token_stack &handle_)
|
||||
#else
|
||||
void bol(token_stack &)
|
||||
#endif
|
||||
{
|
||||
assert(handle_.top()->_type == BOL &&
|
||||
handle_.size() == 1);
|
||||
|
||||
// store charset
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<leaf_node>(bol_token(), true));
|
||||
_tree_node_stack.push(_node_ptr_vector.back().get());
|
||||
_token_stack.emplace(std::make_unique<token>(REPEAT));
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void eol(token_stack &handle_, id_type &nl_id_)
|
||||
#else
|
||||
void eol(token_stack &, id_type &nl_id_)
|
||||
#endif
|
||||
{
|
||||
const string_token nl_('\n');
|
||||
const id_type temp_nl_id_ = lookup(nl_);
|
||||
|
||||
assert(handle_.top()->_type == EOL &&
|
||||
handle_.size() == 1);
|
||||
|
||||
if (temp_nl_id_ != ~static_cast<id_type>(0))
|
||||
{
|
||||
nl_id_ = temp_nl_id_;
|
||||
}
|
||||
|
||||
// store charset
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<leaf_node>(eol_token(), true));
|
||||
_tree_node_stack.push(_node_ptr_vector.back().get());
|
||||
_token_stack.emplace(std::make_unique<token>(REPEAT));
|
||||
}
|
||||
|
||||
// Uncompressed
|
||||
void charset(token_stack &handle_, const std::false_type &)
|
||||
{
|
||||
assert(handle_.top()->_type == CHARSET &&
|
||||
handle_.size() == 1);
|
||||
|
||||
const id_type id_ = lookup(handle_.top()->_str);
|
||||
|
||||
// store charset
|
||||
_node_ptr_vector.emplace_back(std::make_unique<leaf_node>(id_, true));
|
||||
_tree_node_stack.push(_node_ptr_vector.back().get());
|
||||
_token_stack.emplace(std::make_unique<token>(REPEAT));
|
||||
}
|
||||
|
||||
// Compressed
|
||||
void charset(token_stack &handle_, const std::true_type &)
|
||||
{
|
||||
assert(handle_.top()->_type == CHARSET &&
|
||||
handle_.size() == 1);
|
||||
|
||||
std::unique_ptr<token> token_(handle_.top().release());
|
||||
|
||||
handle_.pop();
|
||||
create_sequence(token_);
|
||||
}
|
||||
|
||||
// Slice wchar_t into sequence of char.
|
||||
void create_sequence(std::unique_ptr<token> &token_)
|
||||
{
|
||||
string_token_vector data_[char_24_bit ? 3 : 2];
|
||||
|
||||
for (const input_range &range_ : token_->_str._ranges)
|
||||
{
|
||||
slice_range(range_, data_,
|
||||
std::integral_constant<bool, char_24_bit>());
|
||||
}
|
||||
|
||||
push_ranges(data_, std::integral_constant<bool, char_24_bit>());
|
||||
|
||||
_token_stack.emplace(std::make_unique<token>(OPENPAREN));
|
||||
_token_stack.emplace(std::make_unique<token>(REGEX));
|
||||
_token_stack.emplace(std::make_unique<token>(CLOSEPAREN));
|
||||
}
|
||||
|
||||
// 16 bit unicode
|
||||
void slice_range(const input_range &range_, string_token_vector data_[2],
|
||||
const std::false_type &)
|
||||
{
|
||||
const unsigned char first_msb_ = static_cast<unsigned char>
|
||||
((range_.first >> 8) & 0xff);
|
||||
const unsigned char first_lsb_ = static_cast<unsigned char>
|
||||
(range_.first & 0xff);
|
||||
const unsigned char second_msb_ = static_cast<unsigned char>
|
||||
((range_.second >> 8) & 0xff);
|
||||
const unsigned char second_lsb_ = static_cast<unsigned char>
|
||||
(range_.second & 0xff);
|
||||
|
||||
if (first_msb_ == second_msb_)
|
||||
{
|
||||
insert_range(first_msb_, first_msb_, first_lsb_,
|
||||
second_lsb_, data_);
|
||||
}
|
||||
else
|
||||
{
|
||||
insert_range(first_msb_, first_msb_, first_lsb_, 0xff, data_);
|
||||
|
||||
if (second_msb_ > first_msb_ + 1)
|
||||
{
|
||||
insert_range(first_msb_ + 1, second_msb_ - 1, 0, 0xff, data_);
|
||||
}
|
||||
|
||||
insert_range(second_msb_, second_msb_, 0, second_lsb_, data_);
|
||||
}
|
||||
}
|
||||
|
||||
// 24 bit unicode
|
||||
void slice_range(const input_range &range_, string_token_vector data_[3],
|
||||
const std::true_type &)
|
||||
{
|
||||
const unsigned char first_msb_ = static_cast<unsigned char>
|
||||
((range_.first >> 16) & 0xff);
|
||||
const unsigned char first_mid_ = static_cast<unsigned char>
|
||||
((range_.first >> 8) & 0xff);
|
||||
const unsigned char first_lsb_ = static_cast<unsigned char>
|
||||
(range_.first & 0xff);
|
||||
const unsigned char second_msb_ = static_cast<unsigned char>
|
||||
((range_.second >> 16) & 0xff);
|
||||
const unsigned char second_mid_ = static_cast<unsigned char>
|
||||
((range_.second >> 8) & 0xff);
|
||||
const unsigned char second_lsb_ = static_cast<unsigned char>
|
||||
(range_.second & 0xff);
|
||||
|
||||
if (first_msb_ == second_msb_)
|
||||
{
|
||||
string_token_vector data2_[2];
|
||||
|
||||
// Re-use 16 bit slice function
|
||||
slice_range(range_, data2_, std::false_type());
|
||||
|
||||
for (std::size_t i_ = 0, size_ = data2_[0].size();
|
||||
i_ < size_; ++i_)
|
||||
{
|
||||
insert_range(string_token(first_msb_, first_msb_),
|
||||
*data2_[0][i_], *data2_[1][i_], data_);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
insert_range(first_msb_, first_msb_,
|
||||
first_mid_, first_mid_,
|
||||
first_lsb_, 0xff, data_);
|
||||
|
||||
if (first_mid_ != 0xff)
|
||||
{
|
||||
insert_range(first_msb_, first_msb_,
|
||||
first_mid_ + 1, 0xff,
|
||||
0, 0xff, data_);
|
||||
}
|
||||
|
||||
if (second_msb_ > first_msb_ + 1)
|
||||
{
|
||||
insert_range(first_mid_ + 1, second_mid_ - 1,
|
||||
0, 0xff,
|
||||
0, 0xff, data_);
|
||||
}
|
||||
|
||||
if (second_mid_ != 0)
|
||||
{
|
||||
insert_range(second_msb_, second_msb_,
|
||||
0, second_mid_ - 1,
|
||||
0, 0xff, data_);
|
||||
insert_range(second_msb_, second_msb_,
|
||||
second_mid_, second_mid_,
|
||||
0, second_lsb_, data_);
|
||||
}
|
||||
else
|
||||
{
|
||||
insert_range(second_msb_, second_msb_,
|
||||
0, second_mid_,
|
||||
0, second_lsb_, data_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 16 bit unicode
|
||||
void insert_range(const unsigned char first_, const unsigned char second_,
|
||||
const unsigned char first2_, const unsigned char second2_,
|
||||
string_token_vector data_[2])
|
||||
{
|
||||
const string_token token_(first_ > second_ ? second_ : first_,
|
||||
first_ > second_ ? first_ : second_);
|
||||
const string_token token2_(first2_ > second2_ ? second2_ : first2_,
|
||||
first2_ > second2_ ? first2_ : second2_);
|
||||
|
||||
insert_range(token_, token2_, data_);
|
||||
}
|
||||
|
||||
void insert_range(const string_token &token_, const string_token &token2_,
|
||||
string_token_vector data_[2])
|
||||
{
|
||||
typename string_token_vector::const_iterator iter_ =
|
||||
std::find_if(data_[0].begin(), data_[0].end(),
|
||||
[&token_](const std::unique_ptr<string_token> &rhs_)
|
||||
{
|
||||
return token_ == *rhs_.get();
|
||||
});
|
||||
|
||||
if (iter_ == data_[0].end())
|
||||
{
|
||||
data_[0].emplace_back(std::make_unique<string_token>(token_));
|
||||
data_[1].emplace_back(std::make_unique<string_token>(token2_));
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::size_t index_ = iter_ - data_[0].begin();
|
||||
|
||||
data_[1][index_]->insert(token2_);
|
||||
}
|
||||
}
|
||||
|
||||
// 24 bit unicode
|
||||
void insert_range(const unsigned char first_, const unsigned char second_,
|
||||
const unsigned char first2_, const unsigned char second2_,
|
||||
const unsigned char first3_, const unsigned char second3_,
|
||||
string_token_vector data_[3])
|
||||
{
|
||||
const string_token token_(first_ > second_ ? second_ : first_,
|
||||
first_ > second_ ? first_ : second_);
|
||||
const string_token token2_(first2_ > second2_ ? second2_ : first2_,
|
||||
first2_ > second2_ ? first2_ : second2_);
|
||||
const string_token token3_(first3_ > second3_ ? second3_ : first3_,
|
||||
first3_ > second3_ ? first3_ : second3_);
|
||||
|
||||
insert_range(token_, token2_, token3_, data_);
|
||||
}
|
||||
|
||||
void insert_range(const string_token &token_, const string_token &token2_,
|
||||
const string_token &token3_, string_token_vector data_[3])
|
||||
{
|
||||
auto iter_ = data_[0].cbegin();
|
||||
auto end_ = data_[0].cend();
|
||||
bool finished_ = false;
|
||||
|
||||
do
|
||||
{
|
||||
iter_ = std::find_if(iter_, end_,
|
||||
[&token_](const std::unique_ptr<string_token> &rhs_)
|
||||
{
|
||||
return token_ == *rhs_.get();
|
||||
});
|
||||
|
||||
if (iter_ == end_)
|
||||
{
|
||||
data_[0].emplace_back(std::make_unique<string_token>(token_));
|
||||
data_[1].emplace_back(std::make_unique<string_token>(token2_));
|
||||
data_[2].emplace_back(std::make_unique<string_token>(token3_));
|
||||
finished_ = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
const std::size_t index_ = iter_ - data_[0].begin();
|
||||
|
||||
if (*data_[1][index_] == token2_)
|
||||
{
|
||||
data_[2][index_]->insert(token3_);
|
||||
finished_ = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
++iter_;
|
||||
}
|
||||
}
|
||||
} while (!finished_);
|
||||
}
|
||||
|
||||
// 16 bit unicode
|
||||
void push_ranges(string_token_vector data_[2], const std::false_type &)
|
||||
{
|
||||
auto viter_ = data_[0].cbegin();
|
||||
auto vend_ = data_[0].cend();
|
||||
auto viter2_ = data_[1].cbegin();
|
||||
|
||||
push_range(viter_++->get());
|
||||
push_range(viter2_++->get());
|
||||
sequence();
|
||||
|
||||
while (viter_ != vend_)
|
||||
{
|
||||
push_range(viter_++->get());
|
||||
push_range(viter2_++->get());
|
||||
sequence();
|
||||
perform_or();
|
||||
}
|
||||
}
|
||||
|
||||
// 24 bit unicode
|
||||
void push_ranges(string_token_vector data_[3], const std::true_type &)
|
||||
{
|
||||
auto viter_ = data_[0].cbegin();
|
||||
auto vend_ = data_[0].cend();
|
||||
auto viter2_ = data_[1].cbegin();
|
||||
auto viter3_ = data_[2].cbegin();
|
||||
|
||||
push_range(viter_++->get());
|
||||
push_range(viter2_++->get());
|
||||
sequence();
|
||||
push_range(viter3_++->get());
|
||||
sequence();
|
||||
|
||||
while (viter_ != vend_)
|
||||
{
|
||||
push_range(viter_++->get());
|
||||
push_range(viter2_++->get());
|
||||
sequence();
|
||||
push_range(viter3_++->get());
|
||||
sequence();
|
||||
perform_or();
|
||||
}
|
||||
}
|
||||
|
||||
void push_range(observer_ptr<const string_token> token_)
|
||||
{
|
||||
const id_type id_ = lookup(*token_);
|
||||
|
||||
_node_ptr_vector.emplace_back(std::make_unique<leaf_node>(id_, true));
|
||||
_tree_node_stack.push(_node_ptr_vector.back().get());
|
||||
}
|
||||
|
||||
id_type lookup(const string_token &charset_)
|
||||
{
|
||||
// Converted to id_type below.
|
||||
std::size_t id_ = sm_traits::npos();
|
||||
|
||||
if (static_cast<id_type>(id_) < id_)
|
||||
{
|
||||
throw runtime_error("id_type is not large enough "
|
||||
"to hold all ids.");
|
||||
}
|
||||
|
||||
typename charset_map::const_iterator iter_ =
|
||||
_charset_map.find(charset_);
|
||||
|
||||
if (iter_ == _charset_map.end())
|
||||
{
|
||||
id_ = _charset_map.size();
|
||||
_charset_map.insert(charset_pair(charset_,
|
||||
static_cast<id_type>(id_)));
|
||||
}
|
||||
else
|
||||
{
|
||||
id_ = iter_->second;
|
||||
}
|
||||
|
||||
return static_cast<id_type>(id_);
|
||||
}
|
||||
|
||||
void openparen(token_stack &handle_)
|
||||
{
|
||||
assert(handle_.top()->_type == OPENPAREN &&
|
||||
handle_.size() == 3);
|
||||
|
||||
handle_.pop();
|
||||
assert(handle_.top()->_type == REGEX);
|
||||
handle_.pop();
|
||||
assert(handle_.top()->_type == CLOSEPAREN);
|
||||
_token_stack.emplace(std::make_unique<token>(REPEAT));
|
||||
}
|
||||
|
||||
void sequence()
|
||||
{
|
||||
observer_ptr<node> rhs_ = _tree_node_stack.top();
|
||||
|
||||
_tree_node_stack.pop();
|
||||
|
||||
observer_ptr<node> lhs_ = _tree_node_stack.top();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<sequence_node>(lhs_, rhs_));
|
||||
_tree_node_stack.top() = _node_ptr_vector.back().get();
|
||||
}
|
||||
|
||||
void optional(const bool greedy_)
|
||||
{
|
||||
// perform ?
|
||||
observer_ptr<node> lhs_ = _tree_node_stack.top();
|
||||
// Don't know if lhs_ is a leaf_node, so get firstpos.
|
||||
auto &firstpos_ = lhs_->firstpos();
|
||||
|
||||
for (observer_ptr<node> node_ : firstpos_)
|
||||
{
|
||||
// These are leaf_nodes!
|
||||
node_->greedy(greedy_);
|
||||
}
|
||||
|
||||
_node_ptr_vector.emplace_back(std::make_unique<leaf_node>
|
||||
(node::null_token(), greedy_));
|
||||
|
||||
observer_ptr<node> rhs_ = _node_ptr_vector.back().get();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<selection_node>(lhs_, rhs_));
|
||||
_tree_node_stack.top() = _node_ptr_vector.back().get();
|
||||
}
|
||||
|
||||
void zero_or_more(const bool greedy_)
|
||||
{
|
||||
// perform *
|
||||
observer_ptr<node> ptr_ = _tree_node_stack.top();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<iteration_node>(ptr_, greedy_));
|
||||
_tree_node_stack.top() = _node_ptr_vector.back().get();
|
||||
}
|
||||
|
||||
void one_or_more(const bool greedy_)
|
||||
{
|
||||
// perform +
|
||||
observer_ptr<node> lhs_ = _tree_node_stack.top();
|
||||
observer_ptr<node> copy_ = lhs_->copy(_node_ptr_vector);
|
||||
|
||||
_node_ptr_vector.emplace_back(std::make_unique<iteration_node>
|
||||
(copy_, greedy_));
|
||||
|
||||
observer_ptr<node> rhs_ = _node_ptr_vector.back().get();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<sequence_node>(lhs_, rhs_));
|
||||
_tree_node_stack.top() = _node_ptr_vector.back().get();
|
||||
}
|
||||
|
||||
// perform {n[,[m]]}
|
||||
// Semantic checks have already been performed.
|
||||
// {0,} = *
|
||||
// {0,1} = ?
|
||||
// {1,} = +
|
||||
// therefore we do not check for these cases.
|
||||
void repeatn(const bool greedy_, observer_ptr<const token> token_)
|
||||
{
|
||||
const rules_char_type *str_ = token_->_extra.c_str();
|
||||
std::size_t min_ = 0;
|
||||
bool comma_ = false;
|
||||
std::size_t max_ = 0;
|
||||
|
||||
while (*str_>= '0' && *str_ <= '9')
|
||||
{
|
||||
min_ *= 10;
|
||||
min_ += *str_ - '0';
|
||||
++str_;
|
||||
}
|
||||
|
||||
comma_ = *str_ == ',';
|
||||
|
||||
if (comma_) ++str_;
|
||||
|
||||
while (*str_>= '0' && *str_ <= '9')
|
||||
{
|
||||
max_ *= 10;
|
||||
max_ += *str_ - '0';
|
||||
++str_;
|
||||
}
|
||||
|
||||
if (!(min_ == 1 && !comma_))
|
||||
{
|
||||
const std::size_t top_ = min_ > 0 ? min_ : max_;
|
||||
|
||||
if (min_ == 0)
|
||||
{
|
||||
optional(greedy_);
|
||||
}
|
||||
|
||||
observer_ptr<node> prev_ = _tree_node_stack.top()->
|
||||
copy(_node_ptr_vector);
|
||||
observer_ptr<node> curr_ = nullptr;
|
||||
|
||||
for (std::size_t i_ = 2; i_ < top_; ++i_)
|
||||
{
|
||||
curr_ = prev_->copy(_node_ptr_vector);
|
||||
_tree_node_stack.push(prev_);
|
||||
sequence();
|
||||
prev_ = curr_;
|
||||
}
|
||||
|
||||
if (comma_ && min_ > 0)
|
||||
{
|
||||
if (min_ > 1)
|
||||
{
|
||||
curr_ = prev_->copy(_node_ptr_vector);
|
||||
_tree_node_stack.push(prev_);
|
||||
sequence();
|
||||
prev_ = curr_;
|
||||
}
|
||||
|
||||
if (comma_ && max_)
|
||||
{
|
||||
_tree_node_stack.push(prev_);
|
||||
optional(greedy_);
|
||||
prev_ = _tree_node_stack.top();
|
||||
_tree_node_stack.pop();
|
||||
|
||||
const std::size_t count_ = max_ - min_;
|
||||
|
||||
for (std::size_t i_ = 1; i_ < count_; ++i_)
|
||||
{
|
||||
curr_ = prev_->copy(_node_ptr_vector);
|
||||
_tree_node_stack.push(prev_);
|
||||
sequence();
|
||||
prev_ = curr_;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_tree_node_stack.push(prev_);
|
||||
zero_or_more(greedy_);
|
||||
prev_ = _tree_node_stack.top();
|
||||
_tree_node_stack.pop();
|
||||
}
|
||||
}
|
||||
|
||||
_tree_node_stack.push(prev_);
|
||||
sequence();
|
||||
}
|
||||
}
|
||||
|
||||
void fixup_bol(observer_ptr<node> &root_)const
|
||||
{
|
||||
const auto &first_ = root_->firstpos();
|
||||
bool found_ = false;
|
||||
|
||||
for (observer_ptr<const node> node_ : first_)
|
||||
{
|
||||
found_ = !node_->end_state() && node_->token() == bol_token();
|
||||
|
||||
if (found_) break;
|
||||
}
|
||||
|
||||
if (!found_)
|
||||
{
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<leaf_node>(bol_token(), true));
|
||||
|
||||
observer_ptr<node> lhs_ = _node_ptr_vector.back().get();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<leaf_node>(node::null_token(), true));
|
||||
|
||||
observer_ptr<node> rhs_ = _node_ptr_vector.back().get();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<selection_node>(lhs_, rhs_));
|
||||
lhs_ = _node_ptr_vector.back().get();
|
||||
|
||||
_node_ptr_vector.emplace_back
|
||||
(std::make_unique<sequence_node>(lhs_, root_));
|
||||
root_ = _node_ptr_vector.back().get();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
100
YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp
Normal file
100
YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp
Normal file
@ -0,0 +1,100 @@
|
||||
// re_token.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_RE_TOKEN_HPP
|
||||
#define LEXERTL_RE_TOKEN_HPP
|
||||
|
||||
#include "../../string_token.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
// Note that tokens following END are never seen by parser.hpp.
|
||||
enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
|
||||
DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
|
||||
ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
|
||||
END, DIFF};
|
||||
|
||||
template<typename input_char_type, typename char_type>
|
||||
struct basic_re_token
|
||||
{
|
||||
using string_token = basic_string_token<char_type>;
|
||||
using string = std::basic_string<input_char_type>;
|
||||
|
||||
token_type _type;
|
||||
string _extra;
|
||||
string_token _str;
|
||||
|
||||
basic_re_token(const token_type type_ = BEGIN) :
|
||||
_type(type_),
|
||||
_extra(),
|
||||
_str()
|
||||
{
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
_type = BEGIN;
|
||||
_extra.clear();
|
||||
_str.clear();
|
||||
}
|
||||
|
||||
void swap(basic_re_token &rhs_)
|
||||
{
|
||||
std::swap(_type, rhs_._type);
|
||||
_extra.swap(rhs_._extra);
|
||||
_str.swap(rhs_._str);
|
||||
}
|
||||
|
||||
char precedence(const token_type type_) const
|
||||
{
|
||||
// Moved in here for Solaris compiler.
|
||||
static const char precedence_table_[END + 1][END + 1] = {
|
||||
// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, BOL, EOL, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END
|
||||
/*BEGIN*/{ ' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/*REGEX*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/*OREXP*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* SEQ */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* SUB */{ ' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/*EXPRE*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* RPT */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>' },
|
||||
/*DUPLI*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* | */{ ' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' },
|
||||
/*CHARA*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
|
||||
/* BOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
|
||||
/* EOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
|
||||
/*MACRO*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
|
||||
/* ( */{ ' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' },
|
||||
/* ) */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
|
||||
/* ? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* ?? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* * */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* *? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* + */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* +? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/*{n,m}*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/*{nm}?*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
|
||||
/* END */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }
|
||||
};
|
||||
|
||||
return precedence_table_[_type][type_];
|
||||
}
|
||||
|
||||
const char *precedence_string() const
|
||||
{
|
||||
// Moved in here for Solaris compiler.
|
||||
static const char *precedence_strings_[END + 1] =
|
||||
{"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
|
||||
"REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")",
|
||||
"?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
|
||||
|
||||
return precedence_strings_[_type];
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
778
YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp
Normal file
778
YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp
Normal file
@ -0,0 +1,778 @@
|
||||
// tokeniser.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_RE_TOKENISER_HPP
|
||||
#define LEXERTL_RE_TOKENISER_HPP
|
||||
|
||||
#include <cstring>
|
||||
#include "re_token.hpp"
|
||||
#include "../../runtime_error.hpp"
|
||||
#include <sstream>
|
||||
#include "../../string_token.hpp"
|
||||
#include "re_tokeniser_helper.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename rules_char_type, typename char_type, typename id_type>
|
||||
class basic_re_tokeniser
|
||||
{
|
||||
public:
|
||||
using re_token = basic_re_token<rules_char_type, char_type>;
|
||||
using tokeniser_helper =
|
||||
basic_re_tokeniser_helper<rules_char_type, char_type, id_type>;
|
||||
using char_state = typename tokeniser_helper::char_state;
|
||||
using state = typename tokeniser_helper::state;
|
||||
using string_token = basic_string_token<char_type>;
|
||||
|
||||
static void next(re_token &lhs_, state &state_, re_token &token_)
|
||||
{
|
||||
rules_char_type ch_ = 0;
|
||||
bool eos_ = state_.next(ch_);
|
||||
bool skipped_ = false;
|
||||
|
||||
token_.clear();
|
||||
|
||||
do
|
||||
{
|
||||
// string begin/end
|
||||
while (!eos_ && ch_ == '"')
|
||||
{
|
||||
state_._in_string ^= 1;
|
||||
eos_ = state_.next(ch_);
|
||||
}
|
||||
|
||||
if (eos_) break;
|
||||
|
||||
// (?# ...)
|
||||
skipped_ = comment(eos_, ch_, state_);
|
||||
|
||||
if (eos_) break;
|
||||
|
||||
// skip_ws set
|
||||
skipped_ |= skip(eos_, ch_, state_);
|
||||
} while (!eos_ && skipped_);
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
if (state_._in_string)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing '\"')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
if (state_._paren_count)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing ')')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
token_._type = END;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ch_ == '\\')
|
||||
{
|
||||
// Even if we are in a string, respect escape sequences...
|
||||
token_._type = CHARSET;
|
||||
escape(state_, token_._str);
|
||||
}
|
||||
else if (state_._in_string)
|
||||
{
|
||||
// All other meta characters lose their special meaning
|
||||
// inside a string.
|
||||
token_._type = CHARSET;
|
||||
add_char(ch_, state_, token_._str);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Not an escape sequence and not inside a string, so
|
||||
// check for meta characters.
|
||||
switch (ch_)
|
||||
{
|
||||
case '(':
|
||||
token_._type = OPENPAREN;
|
||||
++state_._paren_count;
|
||||
read_options(state_);
|
||||
break;
|
||||
case ')':
|
||||
--state_._paren_count;
|
||||
|
||||
if (state_._paren_count < 0)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Number of open parenthesis < 0 "
|
||||
"at index " << state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
token_._type = CLOSEPAREN;
|
||||
|
||||
if (!state_._flags_stack.empty())
|
||||
{
|
||||
state_._flags = state_._flags_stack.top();
|
||||
state_._flags_stack.pop();
|
||||
}
|
||||
|
||||
break;
|
||||
case '?':
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AOPT;
|
||||
state_.increment();
|
||||
}
|
||||
else
|
||||
{
|
||||
token_._type = OPT;
|
||||
}
|
||||
|
||||
break;
|
||||
case '*':
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AZEROORMORE;
|
||||
state_.increment();
|
||||
}
|
||||
else
|
||||
{
|
||||
token_._type = ZEROORMORE;
|
||||
}
|
||||
|
||||
break;
|
||||
case '+':
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AONEORMORE;
|
||||
state_.increment();
|
||||
}
|
||||
else
|
||||
{
|
||||
token_._type = ONEORMORE;
|
||||
}
|
||||
|
||||
break;
|
||||
case '{':
|
||||
open_curly(lhs_, state_, token_);
|
||||
break;
|
||||
case '|':
|
||||
token_._type = OR;
|
||||
break;
|
||||
case '^':
|
||||
if (!state_._macro_name &&
|
||||
state_._curr - 1 == state_._start)
|
||||
{
|
||||
token_._type = BOL;
|
||||
}
|
||||
else
|
||||
{
|
||||
token_._type = CHARSET;
|
||||
token_._str.insert(range(ch_, ch_));
|
||||
}
|
||||
|
||||
break;
|
||||
case '$':
|
||||
if (!state_._macro_name && state_._curr == state_._end)
|
||||
{
|
||||
token_._type = EOL;
|
||||
}
|
||||
else
|
||||
{
|
||||
token_._type = CHARSET;
|
||||
token_._str.insert(range(ch_, ch_));
|
||||
}
|
||||
|
||||
break;
|
||||
case '.':
|
||||
{
|
||||
token_._type = CHARSET;
|
||||
|
||||
if (state_._flags & dot_not_newline)
|
||||
{
|
||||
token_._str.insert(range('\n', '\n'));
|
||||
}
|
||||
else if (state_._flags & dot_not_cr_lf)
|
||||
{
|
||||
token_._str.insert(range('\n', '\n'));
|
||||
token_._str.insert(range('\r', '\r'));
|
||||
}
|
||||
|
||||
token_._str.negate();
|
||||
break;
|
||||
}
|
||||
case '[':
|
||||
{
|
||||
token_._type = CHARSET;
|
||||
tokeniser_helper::charset(state_, token_._str);
|
||||
break;
|
||||
}
|
||||
case '/':
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Lookahead ('/') is not supported yet";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
break;
|
||||
}
|
||||
default:
|
||||
token_._type = CHARSET;
|
||||
add_char(ch_, state_, token_._str);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
using range = typename string_token::range;
|
||||
|
||||
static bool comment(bool &eos_, rules_char_type &ch_, state &state_)
|
||||
{
|
||||
bool skipped_ = false;
|
||||
|
||||
if (!state_._in_string && ch_ == '(' && !state_.eos() &&
|
||||
*state_._curr == '?' && state_._curr + 1 < state_._end &&
|
||||
*(state_._curr + 1) == '#')
|
||||
{
|
||||
std::size_t paren_count_ = 1;
|
||||
|
||||
state_.increment();
|
||||
state_.increment();
|
||||
|
||||
do
|
||||
{
|
||||
eos_ = state_.next(ch_);
|
||||
|
||||
if (ch_ == '(')
|
||||
{
|
||||
++paren_count_;
|
||||
}
|
||||
else if (ch_ == ')')
|
||||
{
|
||||
--paren_count_;
|
||||
}
|
||||
} while (!eos_ && !(ch_ == ')' && paren_count_ == 0));
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (unterminated comment)";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
else
|
||||
{
|
||||
eos_ = state_.next(ch_);
|
||||
}
|
||||
|
||||
skipped_ = true;
|
||||
}
|
||||
|
||||
return skipped_;
|
||||
}
|
||||
|
||||
static bool skip(bool &eos_, rules_char_type &ch_, state &state_)
|
||||
{
|
||||
bool skipped_ = false;
|
||||
|
||||
if ((state_._flags & skip_ws) && !state_._in_string)
|
||||
{
|
||||
bool c_comment_ = false;
|
||||
bool skip_ws_ = false;
|
||||
|
||||
do
|
||||
{
|
||||
c_comment_ = ch_ == '/' && !state_.eos() &&
|
||||
*state_._curr == '*';
|
||||
skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' ||
|
||||
ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v');
|
||||
|
||||
if (c_comment_)
|
||||
{
|
||||
state_.increment();
|
||||
eos_ = state_.next(ch_);
|
||||
|
||||
while (!eos_ && !(ch_ == '*' && !state_.eos() &&
|
||||
*state_._curr == '/'))
|
||||
{
|
||||
eos_ = state_.next(ch_);
|
||||
}
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (unterminated C style comment)";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
else
|
||||
{
|
||||
state_.increment();
|
||||
eos_ = state_.next(ch_);
|
||||
}
|
||||
|
||||
skipped_ = true;
|
||||
}
|
||||
else if (skip_ws_)
|
||||
{
|
||||
eos_ = state_.next(ch_);
|
||||
skipped_ = true;
|
||||
}
|
||||
} while (!eos_ && (c_comment_ || skip_ws_));
|
||||
}
|
||||
|
||||
return skipped_;
|
||||
}
|
||||
|
||||
static void read_options(state &state_)
|
||||
{
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
rules_char_type ch_ = 0;
|
||||
bool eos_ = false;
|
||||
bool negate_ = false;
|
||||
|
||||
state_.increment();
|
||||
eos_ = state_.next(ch_);
|
||||
state_._flags_stack.push(state_._flags);
|
||||
|
||||
while (!eos_ && ch_ != ':')
|
||||
{
|
||||
switch (ch_)
|
||||
{
|
||||
case '-':
|
||||
negate_ ^= 1;
|
||||
break;
|
||||
case 'i':
|
||||
if (negate_)
|
||||
{
|
||||
state_._flags = state_._flags & ~icase;
|
||||
}
|
||||
else
|
||||
{
|
||||
state_._flags = state_._flags | icase;
|
||||
}
|
||||
|
||||
negate_ = false;
|
||||
break;
|
||||
case 's':
|
||||
if (negate_)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
state_._flags = state_._flags | dot_not_cr_lf;
|
||||
#else
|
||||
state_._flags = state_._flags | dot_not_newline;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef _WIN32
|
||||
state_._flags = state_._flags & ~dot_not_cr_lf;
|
||||
#else
|
||||
state_._flags = state_._flags & ~dot_not_newline;
|
||||
#endif
|
||||
}
|
||||
|
||||
negate_ = false;
|
||||
break;
|
||||
case 'x':
|
||||
if (negate_)
|
||||
{
|
||||
state_._flags = state_._flags & ~skip_ws;
|
||||
}
|
||||
else
|
||||
{
|
||||
state_._flags = state_._flags | skip_ws;
|
||||
}
|
||||
|
||||
negate_ = false;
|
||||
break;
|
||||
default:
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Unknown option at index " <<
|
||||
state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
}
|
||||
|
||||
eos_ = state_.next(ch_);
|
||||
}
|
||||
|
||||
// End of string handler will handle early termination
|
||||
}
|
||||
else if (!state_._flags_stack.empty())
|
||||
{
|
||||
state_._flags_stack.push(state_._flags);
|
||||
}
|
||||
}
|
||||
|
||||
static void escape(state &state_, string_token &token_)
|
||||
{
|
||||
char_type ch_ = 0;
|
||||
std::size_t str_len_ = 0;
|
||||
const char *str_ = tokeniser_helper::escape_sequence(state_,
|
||||
ch_, str_len_);
|
||||
|
||||
if (str_)
|
||||
{
|
||||
char_state state2_(str_ + 1, str_ + str_len_, state_._id,
|
||||
state_._flags, state_._locale, 0);
|
||||
|
||||
tokeniser_helper::charset(state2_, token_);
|
||||
}
|
||||
else
|
||||
{
|
||||
add_char(ch_, state_, token_);
|
||||
}
|
||||
}
|
||||
|
||||
static void add_char(const char_type ch_, const state &state_,
|
||||
string_token &token_)
|
||||
{
|
||||
range range_(ch_, ch_);
|
||||
|
||||
token_.insert(range_);
|
||||
|
||||
if (state_._flags & icase)
|
||||
{
|
||||
string_token folded_;
|
||||
|
||||
tokeniser_helper::fold(range_, state_._locale,
|
||||
folded_, typename tokeniser_helper::template
|
||||
size<sizeof(char_type)>());
|
||||
|
||||
if (!folded_.empty())
|
||||
{
|
||||
token_.insert(folded_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void open_curly(re_token &lhs_, state &state_,
|
||||
re_token &token_)
|
||||
{
|
||||
if (state_.eos())
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing '}')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
else if (*state_._curr == '-' || *state_._curr == '+')
|
||||
{
|
||||
rules_char_type ch_ = 0;
|
||||
|
||||
if (lhs_._type != CHARSET)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "CHARSET must precede {" <<
|
||||
state_._curr << "} at index " <<
|
||||
state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
state_.next(ch_);
|
||||
token_._type = DIFF;
|
||||
token_._extra = ch_;
|
||||
|
||||
if (state_.next(ch_))
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing '}')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
if (ch_ != '}')
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Missing '}' at index " << state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
}
|
||||
else if (*state_._curr >= '0' && *state_._curr <= '9')
|
||||
{
|
||||
repeat_n(state_, token_);
|
||||
}
|
||||
else
|
||||
{
|
||||
macro(state_, token_);
|
||||
}
|
||||
}
|
||||
|
||||
// SYNTAX:
|
||||
// {n[,[n]]}
|
||||
// SEMANTIC RULES:
|
||||
// {0} - INVALID (throw exception)
|
||||
// {0,} = *
|
||||
// {0,0} - INVALID (throw exception)
|
||||
// {0,1} = ?
|
||||
// {1,} = +
|
||||
// {min,max} where min == max - {min}
|
||||
// {min,max} where max < min - INVALID (throw exception)
|
||||
static void repeat_n(state &state_, re_token &token_)
|
||||
{
|
||||
rules_char_type ch_ = 0;
|
||||
bool eos_ = state_.next(ch_);
|
||||
std::size_t min_ = 0;
|
||||
std::size_t max_ = 0;
|
||||
|
||||
while (!eos_ && ch_ >= '0' && ch_ <= '9')
|
||||
{
|
||||
min_ *= 10;
|
||||
min_ += ch_ - '0';
|
||||
token_._extra += ch_;
|
||||
eos_ = state_.next(ch_);
|
||||
}
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing repeat terminator '}')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
bool min_max_ = false;
|
||||
bool repeatn_ = true;
|
||||
|
||||
if (ch_ == ',')
|
||||
{
|
||||
token_._extra += ch_;
|
||||
eos_ = state_.next(ch_);
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing repeat terminator '}')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
if (ch_ == '}')
|
||||
{
|
||||
// Small optimisation: Check for '*' equivalency.
|
||||
if (min_ == 0)
|
||||
{
|
||||
token_._type = ZEROORMORE;
|
||||
repeatn_ = false;
|
||||
}
|
||||
// Small optimisation: Check for '+' equivalency.
|
||||
else if (min_ == 1)
|
||||
{
|
||||
token_._type = ONEORMORE;
|
||||
repeatn_ = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ch_ < '0' || ch_ > '9')
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Missing repeat terminator '}' at index " <<
|
||||
state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
min_max_ = true;
|
||||
|
||||
do
|
||||
{
|
||||
max_ *= 10;
|
||||
max_ += ch_ - '0';
|
||||
token_._extra += ch_;
|
||||
eos_ = state_.next(ch_);
|
||||
} while (!eos_ && ch_ >= '0' && ch_ <= '9');
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing repeat terminator '}')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
// Small optimisation: Check for '?' equivalency.
|
||||
if (min_ == 0 && max_ == 1)
|
||||
{
|
||||
token_._type = OPT;
|
||||
repeatn_ = false;
|
||||
}
|
||||
// Small optimisation: if min == max, then min.
|
||||
else if (min_ == max_)
|
||||
{
|
||||
token_._extra.erase(token_._extra.find(','));
|
||||
min_max_ = false;
|
||||
max_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ch_ != '}')
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Missing repeat terminator '}' at index " <<
|
||||
state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
if (repeatn_)
|
||||
{
|
||||
// SEMANTIC VALIDATION follows:
|
||||
// NOTE: {0,} has already become *
|
||||
// therefore we don't check for a comma.
|
||||
if (min_ == 0 && max_ == 0)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Cannot have exactly zero repeats preceding index " <<
|
||||
state_.index();
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
if (min_max_ && max_ < min_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Max less than min preceding index " <<
|
||||
state_.index();
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AREPEATN;
|
||||
state_.increment();
|
||||
}
|
||||
else
|
||||
{
|
||||
token_._type = REPEATN;
|
||||
}
|
||||
}
|
||||
else if (token_._type == ZEROORMORE)
|
||||
{
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AZEROORMORE;
|
||||
state_.increment();
|
||||
}
|
||||
}
|
||||
else if (token_._type == ONEORMORE)
|
||||
{
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AONEORMORE;
|
||||
state_.increment();
|
||||
}
|
||||
}
|
||||
else if (token_._type == OPT)
|
||||
{
|
||||
if (!state_.eos() && *state_._curr == '?')
|
||||
{
|
||||
token_._type = AOPT;
|
||||
state_.increment();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void macro(state &state_, re_token &token_)
|
||||
{
|
||||
rules_char_type ch_ = 0;
|
||||
bool eos_ = false;
|
||||
|
||||
state_.next(ch_);
|
||||
|
||||
if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
|
||||
!(ch_ >= 'a' && ch_ <= 'z'))
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Invalid MACRO name at index " << state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
token_._extra += ch_;
|
||||
eos_ = state_.next(ch_);
|
||||
|
||||
if (eos_)
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
// Pointless returning index if at end of string
|
||||
state_.unexpected_end(ss_);
|
||||
ss_ << " (missing MACRO name terminator '}')";
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
} while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
|
||||
(ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
|
||||
|
||||
if (ch_ != '}')
|
||||
{
|
||||
std::ostringstream ss_;
|
||||
|
||||
ss_ << "Missing MACRO name terminator '}' at index " <<
|
||||
state_.index() - 1;
|
||||
state_.error(ss_);
|
||||
throw runtime_error(ss_.str());
|
||||
}
|
||||
|
||||
token_._type = MACRO;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
3157
YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
Normal file
3157
YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp
Normal file
File diff suppressed because it is too large
Load Diff
136
YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp
Normal file
136
YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp
Normal file
@ -0,0 +1,136 @@
|
||||
// tokeniser_state.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_RE_TOKENISER_STATE_HPP
|
||||
#define LEXERTL_RE_TOKENISER_STATE_HPP
|
||||
|
||||
#include "../../char_traits.hpp"
|
||||
#include "../../enums.hpp"
|
||||
#include <locale>
|
||||
#include "../../narrow.hpp"
|
||||
#include <stack>
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename ch_type, typename id_type>
|
||||
struct basic_re_tokeniser_state
|
||||
{
|
||||
using char_type = ch_type;
|
||||
using index_type = typename basic_char_traits<char_type>::index_type;
|
||||
|
||||
const char_type * const _start;
|
||||
const char_type * const _end;
|
||||
const char_type *_curr;
|
||||
id_type _id;
|
||||
std::size_t _flags;
|
||||
std::stack<std::size_t> _flags_stack;
|
||||
std::locale _locale;
|
||||
const char_type *_macro_name;
|
||||
long _paren_count;
|
||||
bool _in_string;
|
||||
id_type _nl_id;
|
||||
|
||||
basic_re_tokeniser_state(const char_type *start_,
|
||||
const char_type * const end_, id_type id_, const std::size_t flags_,
|
||||
const std::locale locale_, const char_type *macro_name_) :
|
||||
_start(start_),
|
||||
_end(end_),
|
||||
_curr(start_),
|
||||
_id(id_),
|
||||
_flags(flags_),
|
||||
_flags_stack(),
|
||||
_locale(locale_),
|
||||
_macro_name(macro_name_),
|
||||
_paren_count(0),
|
||||
_in_string(false),
|
||||
_nl_id(static_cast<id_type>(~0))
|
||||
{
|
||||
}
|
||||
|
||||
basic_re_tokeniser_state(const basic_re_tokeniser_state &rhs_)
|
||||
{
|
||||
assign(rhs_);
|
||||
}
|
||||
|
||||
// prevent VC++ 7.1 warning:
|
||||
const basic_re_tokeniser_state &operator =
|
||||
(const basic_re_tokeniser_state &rhs_)
|
||||
{
|
||||
return assign(rhs_);
|
||||
}
|
||||
|
||||
basic_re_tokeniser_state &assign(const basic_re_tokeniser_state &rhs_)
|
||||
{
|
||||
_start = rhs_._start;
|
||||
_end = rhs_._end;
|
||||
_curr = rhs_._curr;
|
||||
_id = rhs_._id;
|
||||
_flags = rhs_._flags;
|
||||
_flags_stack = rhs_._flags_stack;
|
||||
_locale = rhs_._locale;
|
||||
_macro_name = rhs_._macro_name;
|
||||
_paren_count = rhs_._paren_count;
|
||||
_in_string = rhs_._in_string;
|
||||
_nl_id = rhs_._nl_id;
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline bool next(char_type &ch_)
|
||||
{
|
||||
if (_curr >= _end)
|
||||
{
|
||||
ch_ = 0;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
ch_ = *_curr;
|
||||
increment();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
inline void increment()
|
||||
{
|
||||
++_curr;
|
||||
}
|
||||
|
||||
inline std::size_t index()
|
||||
{
|
||||
return _curr - _start;
|
||||
}
|
||||
|
||||
inline bool eos()
|
||||
{
|
||||
return _curr >= _end;
|
||||
}
|
||||
|
||||
inline void unexpected_end(std::ostringstream &ss_)
|
||||
{
|
||||
ss_ << "Unexpected end of regex";
|
||||
}
|
||||
|
||||
inline void error(std::ostringstream &ss_)
|
||||
{
|
||||
ss_ << " in ";
|
||||
|
||||
if (_macro_name)
|
||||
{
|
||||
ss_ << "MACRO '";
|
||||
narrow(_macro_name, ss_);
|
||||
ss_ << "'.";
|
||||
}
|
||||
else
|
||||
{
|
||||
ss_ << "rule id " << _id << '.';
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
111
YACReaderLibrary/lexertl/parser/tree/end_node.hpp
Normal file
111
YACReaderLibrary/lexertl/parser/tree/end_node.hpp
Normal file
@ -0,0 +1,111 @@
|
||||
// end_node.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_END_NODE_HPP
|
||||
#define LEXERTL_END_NODE_HPP
|
||||
|
||||
#include "node.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename id_type>
|
||||
class basic_end_node : public basic_node<id_type>
|
||||
{
|
||||
public:
|
||||
using node = basic_node<id_type>;
|
||||
using bool_stack = typename node::bool_stack;
|
||||
using const_node_stack = typename node::const_node_stack;
|
||||
using node_ptr_vector = typename node::node_ptr_vector;
|
||||
using node_stack = typename node::node_stack;
|
||||
using node_type = typename node::node_type;
|
||||
using node_vector = typename node::node_vector;
|
||||
|
||||
basic_end_node(const id_type id_, const id_type user_id_,
|
||||
const id_type next_dfa_, const id_type push_dfa_,
|
||||
const bool pop_dfa_) :
|
||||
node(false),
|
||||
_id(id_),
|
||||
_user_id(user_id_),
|
||||
_next_dfa(next_dfa_),
|
||||
_push_dfa(push_dfa_),
|
||||
_pop_dfa(pop_dfa_),
|
||||
_followpos()
|
||||
{
|
||||
node::_firstpos.push_back(this);
|
||||
node::_lastpos.push_back(this);
|
||||
}
|
||||
|
||||
virtual ~basic_end_node() override
|
||||
{
|
||||
}
|
||||
|
||||
virtual node_type what_type() const override
|
||||
{
|
||||
return node::END;
|
||||
}
|
||||
|
||||
virtual bool traverse(const_node_stack &/*node_stack_*/,
|
||||
bool_stack &/*perform_op_stack_*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual const node_vector &followpos() const override
|
||||
{
|
||||
// _followpos is always empty..!
|
||||
return _followpos;
|
||||
}
|
||||
|
||||
virtual bool end_state() const override
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual id_type id() const override
|
||||
{
|
||||
return _id;
|
||||
}
|
||||
|
||||
virtual id_type user_id() const override
|
||||
{
|
||||
return _user_id;
|
||||
}
|
||||
|
||||
virtual id_type next_dfa() const override
|
||||
{
|
||||
return _next_dfa;
|
||||
}
|
||||
|
||||
virtual id_type push_dfa() const override
|
||||
{
|
||||
return _push_dfa;
|
||||
}
|
||||
|
||||
virtual bool pop_dfa() const override
|
||||
{
|
||||
return _pop_dfa;
|
||||
}
|
||||
|
||||
private:
|
||||
id_type _id;
|
||||
id_type _user_id;
|
||||
id_type _next_dfa;
|
||||
id_type _push_dfa;
|
||||
bool _pop_dfa;
|
||||
node_vector _followpos;
|
||||
|
||||
virtual void copy_node(node_ptr_vector &/*node_ptr_vector_*/,
|
||||
node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/,
|
||||
bool &/*down_*/) const override
|
||||
{
|
||||
// Nothing to do, as end_nodes are not copied.
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
96
YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp
Normal file
96
YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp
Normal file
@ -0,0 +1,96 @@
|
||||
// iteration_node.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_ITERATION_NODE_HPP
|
||||
#define LEXERTL_ITERATION_NODE_HPP
|
||||
|
||||
#include "node.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename id_type>
|
||||
class basic_iteration_node : public basic_node<id_type>
|
||||
{
|
||||
public:
|
||||
using node = basic_node<id_type>;
|
||||
using bool_stack = typename node::bool_stack;
|
||||
using const_node_stack = typename node::const_node_stack;
|
||||
using node_ptr_vector = typename node::node_ptr_vector;
|
||||
using node_stack = typename node::node_stack;
|
||||
using node_type = typename node::node_type;
|
||||
using node_vector = typename node::node_vector;
|
||||
|
||||
basic_iteration_node(observer_ptr<node> next_, const bool greedy_) :
|
||||
node(true),
|
||||
_next(next_),
|
||||
_greedy(greedy_)
|
||||
{
|
||||
_next->append_firstpos(node::_firstpos);
|
||||
_next->append_lastpos(node::_lastpos);
|
||||
|
||||
for (observer_ptr<node> node_ : node::_lastpos)
|
||||
{
|
||||
node_->append_followpos(node::_firstpos);
|
||||
}
|
||||
|
||||
for (observer_ptr<node> node_ : node::_firstpos)
|
||||
{
|
||||
node_->greedy(greedy_);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~basic_iteration_node() override
|
||||
{
|
||||
}
|
||||
|
||||
virtual node_type what_type() const override
|
||||
{
|
||||
return node::ITERATION;
|
||||
}
|
||||
|
||||
virtual bool traverse(const_node_stack &node_stack_,
|
||||
bool_stack &perform_op_stack_) const override
|
||||
{
|
||||
perform_op_stack_.push(true);
|
||||
node_stack_.push(_next);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
observer_ptr<node> _next;
|
||||
bool _greedy;
|
||||
|
||||
virtual void copy_node(node_ptr_vector &node_ptr_vector_,
|
||||
node_stack &new_node_stack_, bool_stack &perform_op_stack_,
|
||||
bool &down_) const override
|
||||
{
|
||||
if (perform_op_stack_.top())
|
||||
{
|
||||
observer_ptr<node> ptr_ = new_node_stack_.top();
|
||||
|
||||
node_ptr_vector_.emplace_back
|
||||
(std::make_unique<basic_iteration_node>(ptr_, _greedy));
|
||||
new_node_stack_.top() = node_ptr_vector_.back().get();
|
||||
}
|
||||
else
|
||||
{
|
||||
down_ = true;
|
||||
}
|
||||
|
||||
perform_op_stack_.pop();
|
||||
}
|
||||
|
||||
// No copy construction.
|
||||
basic_iteration_node(const basic_iteration_node &) = delete;
|
||||
// No assignment.
|
||||
const basic_iteration_node &operator =
|
||||
(const basic_iteration_node &) = delete;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
110
YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp
Normal file
110
YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp
Normal file
@ -0,0 +1,110 @@
|
||||
// leaf_node.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_LEAF_NODE_HPP
|
||||
#define LEXERTL_LEAF_NODE_HPP
|
||||
|
||||
#include "../../enums.hpp" // null_token
|
||||
#include "node.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename id_type>
|
||||
class basic_leaf_node : public basic_node<id_type>
|
||||
{
|
||||
public:
|
||||
using node = basic_node<id_type>;
|
||||
using bool_stack = typename node::bool_stack;
|
||||
using const_node_stack = typename node::const_node_stack;
|
||||
using node_ptr_vector = typename node::node_ptr_vector;
|
||||
using node_stack = typename node::node_stack;
|
||||
using node_type = typename node::node_type;
|
||||
using node_vector = typename node::node_vector;
|
||||
|
||||
basic_leaf_node(const id_type token_, const bool greedy_) :
|
||||
node(token_ == node::null_token()),
|
||||
_token(token_),
|
||||
_set_greedy(!greedy_),
|
||||
_greedy(greedy_),
|
||||
_followpos()
|
||||
{
|
||||
if (!node::_nullable)
|
||||
{
|
||||
node::_firstpos.push_back(this);
|
||||
node::_lastpos.push_back(this);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~basic_leaf_node() override
|
||||
{
|
||||
}
|
||||
|
||||
virtual void append_followpos(const node_vector &followpos_) override
|
||||
{
|
||||
_followpos.insert(_followpos.end(),
|
||||
followpos_.begin(), followpos_.end());
|
||||
}
|
||||
|
||||
virtual node_type what_type() const override
|
||||
{
|
||||
return node::LEAF;
|
||||
}
|
||||
|
||||
virtual bool traverse(const_node_stack &/*node_stack_*/,
|
||||
bool_stack &/*perform_op_stack_*/) const override
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual id_type token() const override
|
||||
{
|
||||
return _token;
|
||||
}
|
||||
|
||||
virtual void greedy(const bool greedy_) override
|
||||
{
|
||||
if (!_set_greedy)
|
||||
{
|
||||
_greedy = greedy_;
|
||||
_set_greedy = true;
|
||||
}
|
||||
}
|
||||
|
||||
virtual bool greedy() const override
|
||||
{
|
||||
return _greedy;
|
||||
}
|
||||
|
||||
virtual const node_vector &followpos() const override
|
||||
{
|
||||
return _followpos;
|
||||
}
|
||||
|
||||
virtual node_vector &followpos() override
|
||||
{
|
||||
return _followpos;
|
||||
}
|
||||
|
||||
private:
|
||||
id_type _token;
|
||||
bool _set_greedy;
|
||||
bool _greedy;
|
||||
node_vector _followpos;
|
||||
|
||||
virtual void copy_node(node_ptr_vector &node_ptr_vector_,
|
||||
node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/,
|
||||
bool &/*down_*/) const override
|
||||
{
|
||||
node_ptr_vector_.emplace_back(std::make_unique<basic_leaf_node>
|
||||
(_token, _greedy));
|
||||
new_node_stack_.push(node_ptr_vector_.back().get());
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
242
YACReaderLibrary/lexertl/parser/tree/node.hpp
Normal file
242
YACReaderLibrary/lexertl/parser/tree/node.hpp
Normal file
@ -0,0 +1,242 @@
|
||||
// node.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_NODE_HPP
|
||||
#define LEXERTL_NODE_HPP
|
||||
|
||||
#include <assert.h>
|
||||
#include <memory>
|
||||
#include "../../observer_ptr.hpp"
|
||||
#include "../../runtime_error.hpp"
|
||||
#include <stack>
|
||||
#include <vector>
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename id_type>
|
||||
class basic_node
|
||||
{
|
||||
public:
|
||||
enum node_type {LEAF, SEQUENCE, SELECTION, ITERATION, END};
|
||||
|
||||
using bool_stack = std::stack<bool>;
|
||||
using node_stack = std::stack<observer_ptr<basic_node>>;
|
||||
using const_node_stack = std::stack<observer_ptr<const basic_node>>;
|
||||
using node_vector = std::vector<observer_ptr<basic_node>>;
|
||||
using node_ptr_vector = std::vector<std::unique_ptr<basic_node>>;
|
||||
|
||||
basic_node() :
|
||||
_nullable(false),
|
||||
_firstpos(),
|
||||
_lastpos()
|
||||
{
|
||||
}
|
||||
|
||||
basic_node(const bool nullable_) :
|
||||
_nullable(nullable_),
|
||||
_firstpos(),
|
||||
_lastpos()
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~basic_node()
|
||||
{
|
||||
}
|
||||
|
||||
static id_type null_token()
|
||||
{
|
||||
return static_cast<id_type>(~0);
|
||||
}
|
||||
|
||||
bool nullable() const
|
||||
{
|
||||
return _nullable;
|
||||
}
|
||||
|
||||
void append_firstpos(node_vector &firstpos_) const
|
||||
{
|
||||
firstpos_.insert(firstpos_.end(),
|
||||
_firstpos.begin(), _firstpos.end());
|
||||
}
|
||||
|
||||
void append_lastpos(node_vector &lastpos_) const
|
||||
{
|
||||
lastpos_.insert(lastpos_.end(),
|
||||
_lastpos.begin(), _lastpos.end());
|
||||
}
|
||||
|
||||
virtual void append_followpos(const node_vector &/*followpos_*/)
|
||||
{
|
||||
throw runtime_error("Internal error node::append_followpos().");
|
||||
}
|
||||
|
||||
observer_ptr<basic_node> copy(node_ptr_vector &node_ptr_vector_) const
|
||||
{
|
||||
observer_ptr<basic_node> new_root_ = nullptr;
|
||||
const_node_stack node_stack_;
|
||||
bool_stack perform_op_stack_;
|
||||
bool down_ = true;
|
||||
node_stack new_node_stack_;
|
||||
|
||||
node_stack_.push(this);
|
||||
|
||||
while (!node_stack_.empty())
|
||||
{
|
||||
while (down_)
|
||||
{
|
||||
down_ = node_stack_.top()->traverse(node_stack_,
|
||||
perform_op_stack_);
|
||||
}
|
||||
|
||||
while (!down_ && !node_stack_.empty())
|
||||
{
|
||||
observer_ptr<const basic_node> top_ = node_stack_.top();
|
||||
|
||||
top_->copy_node(node_ptr_vector_, new_node_stack_,
|
||||
perform_op_stack_, down_);
|
||||
|
||||
if (!down_) node_stack_.pop();
|
||||
}
|
||||
}
|
||||
|
||||
assert(new_node_stack_.size() == 1);
|
||||
new_root_ = new_node_stack_.top();
|
||||
new_node_stack_.pop();
|
||||
return new_root_;
|
||||
}
|
||||
|
||||
virtual node_type what_type() const = 0;
|
||||
|
||||
virtual bool traverse(const_node_stack &node_stack_,
|
||||
bool_stack &perform_op_stack_) const = 0;
|
||||
|
||||
node_vector &firstpos()
|
||||
{
|
||||
return _firstpos;
|
||||
}
|
||||
|
||||
const node_vector &firstpos() const
|
||||
{
|
||||
return _firstpos;
|
||||
}
|
||||
|
||||
// _lastpos modified externally, so not const &
|
||||
node_vector &lastpos()
|
||||
{
|
||||
return _lastpos;
|
||||
}
|
||||
|
||||
virtual bool end_state() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
virtual id_type id() const
|
||||
{
|
||||
throw runtime_error("Internal error node::id().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return id_type();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual id_type user_id() const
|
||||
{
|
||||
throw runtime_error("Internal error node::user_id().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return id_type();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual id_type next_dfa() const
|
||||
{
|
||||
throw runtime_error("Internal error node::next_dfa().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return id_type();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual id_type push_dfa() const
|
||||
{
|
||||
throw runtime_error("Internal error node::push_dfa().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return id_type();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual bool pop_dfa() const
|
||||
{
|
||||
throw runtime_error("Internal error node::pop_dfa().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual id_type token() const
|
||||
{
|
||||
throw runtime_error("Internal error node::token().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return id_type();
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual void greedy(const bool /*greedy_*/)
|
||||
{
|
||||
throw runtime_error("Internal error node::greedy(bool).");
|
||||
}
|
||||
|
||||
virtual bool greedy() const
|
||||
{
|
||||
throw runtime_error("Internal error node::greedy().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual const node_vector &followpos() const
|
||||
{
|
||||
throw runtime_error("Internal error node::followpos().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return firstpos;
|
||||
#endif
|
||||
}
|
||||
|
||||
virtual node_vector &followpos()
|
||||
{
|
||||
throw runtime_error("Internal error node::followpos().");
|
||||
#ifdef __SUNPRO_CC
|
||||
// Stop bogus Solaris compiler warning
|
||||
return firstpos;
|
||||
#endif
|
||||
}
|
||||
|
||||
protected:
|
||||
const bool _nullable;
|
||||
node_vector _firstpos;
|
||||
node_vector _lastpos;
|
||||
|
||||
virtual void copy_node(node_ptr_vector &node_ptr_vector_,
|
||||
node_stack &new_node_stack_, bool_stack &perform_op_stack_,
|
||||
bool &down_) const = 0;
|
||||
|
||||
private:
|
||||
// No copy construction.
|
||||
basic_node(const basic_node &) = delete;
|
||||
// No assignment.
|
||||
const basic_node &operator =(const basic_node &) = delete;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
104
YACReaderLibrary/lexertl/parser/tree/selection_node.hpp
Normal file
104
YACReaderLibrary/lexertl/parser/tree/selection_node.hpp
Normal file
@ -0,0 +1,104 @@
|
||||
// selection_node.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_SELECTION_NODE_HPP
|
||||
#define LEXERTL_SELECTION_NODE_HPP
|
||||
|
||||
#include "node.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename id_type>
|
||||
class basic_selection_node : public basic_node<id_type>
|
||||
{
|
||||
public:
|
||||
using node = basic_node<id_type>;
|
||||
using bool_stack = typename node::bool_stack;
|
||||
using const_node_stack = typename node::const_node_stack;
|
||||
using node_ptr_vector = typename node::node_ptr_vector;
|
||||
using node_stack = typename node::node_stack;
|
||||
using node_type = typename node::node_type;
|
||||
|
||||
basic_selection_node(observer_ptr<node> left_, observer_ptr<node> right_) :
|
||||
node(left_->nullable() || right_->nullable()),
|
||||
_left(left_),
|
||||
_right(right_)
|
||||
{
|
||||
_left->append_firstpos(node::_firstpos);
|
||||
_right->append_firstpos(node::_firstpos);
|
||||
_left->append_lastpos(node::_lastpos);
|
||||
_right->append_lastpos(node::_lastpos);
|
||||
}
|
||||
|
||||
virtual ~basic_selection_node() override
|
||||
{
|
||||
}
|
||||
|
||||
virtual node_type what_type() const override
|
||||
{
|
||||
return node::SELECTION;
|
||||
}
|
||||
|
||||
virtual bool traverse(const_node_stack &node_stack_,
|
||||
bool_stack &perform_op_stack_) const override
|
||||
{
|
||||
perform_op_stack_.push(true);
|
||||
|
||||
switch (_right->what_type())
|
||||
{
|
||||
case node::SEQUENCE:
|
||||
case node::SELECTION:
|
||||
case node::ITERATION:
|
||||
perform_op_stack_.push(false);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
node_stack_.push(_right);
|
||||
node_stack_.push(_left);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
observer_ptr<node> _left;
|
||||
observer_ptr<node> _right;
|
||||
|
||||
virtual void copy_node(node_ptr_vector &node_ptr_vector_,
|
||||
node_stack &new_node_stack_, bool_stack &perform_op_stack_,
|
||||
bool &down_) const override
|
||||
{
|
||||
if (perform_op_stack_.top())
|
||||
{
|
||||
observer_ptr<node> rhs_ = new_node_stack_.top();
|
||||
|
||||
new_node_stack_.pop();
|
||||
|
||||
observer_ptr<node> lhs_ = new_node_stack_.top();
|
||||
|
||||
node_ptr_vector_.emplace_back
|
||||
(std::make_unique<basic_selection_node>(lhs_, rhs_));
|
||||
new_node_stack_.top() = node_ptr_vector_.back().get();
|
||||
}
|
||||
else
|
||||
{
|
||||
down_ = true;
|
||||
}
|
||||
|
||||
perform_op_stack_.pop();
|
||||
}
|
||||
|
||||
// No copy construction.
|
||||
basic_selection_node(const basic_selection_node &) = delete;
|
||||
// No assignment.
|
||||
const basic_selection_node &operator =
|
||||
(const basic_selection_node &) = delete;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
121
YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp
Normal file
121
YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp
Normal file
@ -0,0 +1,121 @@
|
||||
// sequence_node.hpp
|
||||
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
|
||||
//
|
||||
// Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||
#ifndef LEXERTL_SEQUENCE_NODE_HPP
|
||||
#define LEXERTL_SEQUENCE_NODE_HPP
|
||||
|
||||
#include "node.hpp"
|
||||
|
||||
namespace lexertl
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template<typename id_type>
|
||||
class basic_sequence_node : public basic_node<id_type>
|
||||
{
|
||||
public:
|
||||
using node = basic_node<id_type>;
|
||||
using bool_stack = typename node::bool_stack;
|
||||
using const_node_stack = typename node::const_node_stack;
|
||||
using node_ptr_vector = typename node::node_ptr_vector;
|
||||
using node_stack = typename node::node_stack;
|
||||
using node_type = typename node::node_type;
|
||||
using node_vector = typename node::node_vector;
|
||||
|
||||
basic_sequence_node(observer_ptr<node> left_, observer_ptr<node> right_) :
|
||||
node(left_->nullable() && right_->nullable()),
|
||||
_left(left_),
|
||||
_right(right_)
|
||||
{
|
||||
_left->append_firstpos(node::_firstpos);
|
||||
|
||||
if (_left->nullable())
|
||||
{
|
||||
_right->append_firstpos(node::_firstpos);
|
||||
}
|
||||
|
||||
if (_right->nullable())
|
||||
{
|
||||
_left->append_lastpos(node::_lastpos);
|
||||
}
|
||||
|
||||
_right->append_lastpos(node::_lastpos);
|
||||
|
||||
node_vector &lastpos_ = _left->lastpos();
|
||||
const node_vector &firstpos_ = _right->firstpos();
|
||||
|
||||
for (observer_ptr<node> node_ : lastpos_)
|
||||
{
|
||||
node_->append_followpos(firstpos_);
|
||||
}
|
||||
}
|
||||
|
||||
virtual ~basic_sequence_node() override
|
||||
{
|
||||
}
|
||||
|
||||
virtual node_type what_type() const override
|
||||
{
|
||||
return node::SEQUENCE;
|
||||
}
|
||||
|
||||
virtual bool traverse(const_node_stack &node_stack_,
|
||||
bool_stack &perform_op_stack_) const override
|
||||
{
|
||||
perform_op_stack_.push(true);
|
||||
|
||||
switch (_right->what_type())
|
||||
{
|
||||
case node::SEQUENCE:
|
||||
case node::SELECTION:
|
||||
case node::ITERATION:
|
||||
perform_op_stack_.push(false);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
node_stack_.push(_right);
|
||||
node_stack_.push(_left);
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
observer_ptr<node> _left;
|
||||
observer_ptr<node> _right;
|
||||
|
||||
virtual void copy_node(node_ptr_vector &node_ptr_vector_,
|
||||
node_stack &new_node_stack_, bool_stack &perform_op_stack_,
|
||||
bool &down_) const override
|
||||
{
|
||||
if (perform_op_stack_.top())
|
||||
{
|
||||
observer_ptr<node> rhs_ = new_node_stack_.top();
|
||||
|
||||
new_node_stack_.pop();
|
||||
|
||||
observer_ptr<node> lhs_ = new_node_stack_.top();
|
||||
|
||||
node_ptr_vector_.emplace_back
|
||||
(std::make_unique<basic_sequence_node>(lhs_, rhs_));
|
||||
new_node_stack_.top() = node_ptr_vector_.back().get();
|
||||
}
|
||||
else
|
||||
{
|
||||
down_ = true;
|
||||
}
|
||||
|
||||
perform_op_stack_.pop();
|
||||
}
|
||||
|
||||
// No copy construction.
|
||||
basic_sequence_node(const basic_sequence_node &) = delete;
|
||||
// No assignment.
|
||||
const basic_sequence_node &operator =(const basic_sequence_node &) = delete;
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user