Add commit 43aab01 of BenHanson/lexertl14 from github

This commit is contained in:
Iain Benson
2018-11-16 22:34:49 +00:00
committed by Luis Ángel San Martín
parent c4f792bd40
commit d3de52ca82
37 changed files with 12723 additions and 1 deletions

View File

@ -0,0 +1,100 @@
// re_token.hpp
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef LEXERTL_RE_TOKEN_HPP
#define LEXERTL_RE_TOKEN_HPP
#include "../../string_token.hpp"
namespace lexertl
{
namespace detail
{
// Note that tokens following END are never seen by parser.hpp.
enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT,
DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT,
ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN,
END, DIFF};
template<typename input_char_type, typename char_type>
struct basic_re_token
{
using string_token = basic_string_token<char_type>;
using string = std::basic_string<input_char_type>;
token_type _type;
string _extra;
string_token _str;
basic_re_token(const token_type type_ = BEGIN) :
_type(type_),
_extra(),
_str()
{
}
void clear()
{
_type = BEGIN;
_extra.clear();
_str.clear();
}
void swap(basic_re_token &rhs_)
{
std::swap(_type, rhs_._type);
_extra.swap(rhs_._extra);
_str.swap(rhs_._str);
}
char precedence(const token_type type_) const
{
// Moved in here for Solaris compiler.
static const char precedence_table_[END + 1][END + 1] = {
// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, BOL, EOL, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END
/*BEGIN*/{ ' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/*REGEX*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/*OREXP*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* SEQ */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* SUB */{ ' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/*EXPRE*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* RPT */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>' },
/*DUPLI*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* | */{ ' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' },
/*CHARA*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
/* BOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
/* EOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
/*MACRO*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
/* ( */{ ' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' },
/* ) */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' },
/* ? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* ?? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* * */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* *? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* + */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* +? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/*{n,m}*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/*{nm}?*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' },
/* END */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }
};
return precedence_table_[_type][type_];
}
const char *precedence_string() const
{
// Moved in here for Solaris compiler.
static const char *precedence_strings_[END + 1] =
{"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION",
"REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")",
"?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"};
return precedence_strings_[_type];
}
};
}
}
#endif

View File

@ -0,0 +1,778 @@
// tokeniser.hpp
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef LEXERTL_RE_TOKENISER_HPP
#define LEXERTL_RE_TOKENISER_HPP
#include <cstring>
#include "re_token.hpp"
#include "../../runtime_error.hpp"
#include <sstream>
#include "../../string_token.hpp"
#include "re_tokeniser_helper.hpp"
namespace lexertl
{
namespace detail
{
template<typename rules_char_type, typename char_type, typename id_type>
class basic_re_tokeniser
{
public:
using re_token = basic_re_token<rules_char_type, char_type>;
using tokeniser_helper =
basic_re_tokeniser_helper<rules_char_type, char_type, id_type>;
using char_state = typename tokeniser_helper::char_state;
using state = typename tokeniser_helper::state;
using string_token = basic_string_token<char_type>;
static void next(re_token &lhs_, state &state_, re_token &token_)
{
rules_char_type ch_ = 0;
bool eos_ = state_.next(ch_);
bool skipped_ = false;
token_.clear();
do
{
// string begin/end
while (!eos_ && ch_ == '"')
{
state_._in_string ^= 1;
eos_ = state_.next(ch_);
}
if (eos_) break;
// (?# ...)
skipped_ = comment(eos_, ch_, state_);
if (eos_) break;
// skip_ws set
skipped_ |= skip(eos_, ch_, state_);
} while (!eos_ && skipped_);
if (eos_)
{
if (state_._in_string)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing '\"')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
if (state_._paren_count)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing ')')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
token_._type = END;
}
else
{
if (ch_ == '\\')
{
// Even if we are in a string, respect escape sequences...
token_._type = CHARSET;
escape(state_, token_._str);
}
else if (state_._in_string)
{
// All other meta characters lose their special meaning
// inside a string.
token_._type = CHARSET;
add_char(ch_, state_, token_._str);
}
else
{
// Not an escape sequence and not inside a string, so
// check for meta characters.
switch (ch_)
{
case '(':
token_._type = OPENPAREN;
++state_._paren_count;
read_options(state_);
break;
case ')':
--state_._paren_count;
if (state_._paren_count < 0)
{
std::ostringstream ss_;
ss_ << "Number of open parenthesis < 0 "
"at index " << state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
token_._type = CLOSEPAREN;
if (!state_._flags_stack.empty())
{
state_._flags = state_._flags_stack.top();
state_._flags_stack.pop();
}
break;
case '?':
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AOPT;
state_.increment();
}
else
{
token_._type = OPT;
}
break;
case '*':
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AZEROORMORE;
state_.increment();
}
else
{
token_._type = ZEROORMORE;
}
break;
case '+':
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AONEORMORE;
state_.increment();
}
else
{
token_._type = ONEORMORE;
}
break;
case '{':
open_curly(lhs_, state_, token_);
break;
case '|':
token_._type = OR;
break;
case '^':
if (!state_._macro_name &&
state_._curr - 1 == state_._start)
{
token_._type = BOL;
}
else
{
token_._type = CHARSET;
token_._str.insert(range(ch_, ch_));
}
break;
case '$':
if (!state_._macro_name && state_._curr == state_._end)
{
token_._type = EOL;
}
else
{
token_._type = CHARSET;
token_._str.insert(range(ch_, ch_));
}
break;
case '.':
{
token_._type = CHARSET;
if (state_._flags & dot_not_newline)
{
token_._str.insert(range('\n', '\n'));
}
else if (state_._flags & dot_not_cr_lf)
{
token_._str.insert(range('\n', '\n'));
token_._str.insert(range('\r', '\r'));
}
token_._str.negate();
break;
}
case '[':
{
token_._type = CHARSET;
tokeniser_helper::charset(state_, token_._str);
break;
}
case '/':
{
std::ostringstream ss_;
ss_ << "Lookahead ('/') is not supported yet";
state_.error(ss_);
throw runtime_error(ss_.str());
break;
}
default:
token_._type = CHARSET;
add_char(ch_, state_, token_._str);
break;
}
}
}
}
private:
using range = typename string_token::range;
static bool comment(bool &eos_, rules_char_type &ch_, state &state_)
{
bool skipped_ = false;
if (!state_._in_string && ch_ == '(' && !state_.eos() &&
*state_._curr == '?' && state_._curr + 1 < state_._end &&
*(state_._curr + 1) == '#')
{
std::size_t paren_count_ = 1;
state_.increment();
state_.increment();
do
{
eos_ = state_.next(ch_);
if (ch_ == '(')
{
++paren_count_;
}
else if (ch_ == ')')
{
--paren_count_;
}
} while (!eos_ && !(ch_ == ')' && paren_count_ == 0));
if (eos_)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (unterminated comment)";
state_.error(ss_);
throw runtime_error(ss_.str());
}
else
{
eos_ = state_.next(ch_);
}
skipped_ = true;
}
return skipped_;
}
static bool skip(bool &eos_, rules_char_type &ch_, state &state_)
{
bool skipped_ = false;
if ((state_._flags & skip_ws) && !state_._in_string)
{
bool c_comment_ = false;
bool skip_ws_ = false;
do
{
c_comment_ = ch_ == '/' && !state_.eos() &&
*state_._curr == '*';
skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' ||
ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v');
if (c_comment_)
{
state_.increment();
eos_ = state_.next(ch_);
while (!eos_ && !(ch_ == '*' && !state_.eos() &&
*state_._curr == '/'))
{
eos_ = state_.next(ch_);
}
if (eos_)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (unterminated C style comment)";
state_.error(ss_);
throw runtime_error(ss_.str());
}
else
{
state_.increment();
eos_ = state_.next(ch_);
}
skipped_ = true;
}
else if (skip_ws_)
{
eos_ = state_.next(ch_);
skipped_ = true;
}
} while (!eos_ && (c_comment_ || skip_ws_));
}
return skipped_;
}
static void read_options(state &state_)
{
if (!state_.eos() && *state_._curr == '?')
{
rules_char_type ch_ = 0;
bool eos_ = false;
bool negate_ = false;
state_.increment();
eos_ = state_.next(ch_);
state_._flags_stack.push(state_._flags);
while (!eos_ && ch_ != ':')
{
switch (ch_)
{
case '-':
negate_ ^= 1;
break;
case 'i':
if (negate_)
{
state_._flags = state_._flags & ~icase;
}
else
{
state_._flags = state_._flags | icase;
}
negate_ = false;
break;
case 's':
if (negate_)
{
#ifdef _WIN32
state_._flags = state_._flags | dot_not_cr_lf;
#else
state_._flags = state_._flags | dot_not_newline;
#endif
}
else
{
#ifdef _WIN32
state_._flags = state_._flags & ~dot_not_cr_lf;
#else
state_._flags = state_._flags & ~dot_not_newline;
#endif
}
negate_ = false;
break;
case 'x':
if (negate_)
{
state_._flags = state_._flags & ~skip_ws;
}
else
{
state_._flags = state_._flags | skip_ws;
}
negate_ = false;
break;
default:
{
std::ostringstream ss_;
ss_ << "Unknown option at index " <<
state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
}
eos_ = state_.next(ch_);
}
// End of string handler will handle early termination
}
else if (!state_._flags_stack.empty())
{
state_._flags_stack.push(state_._flags);
}
}
static void escape(state &state_, string_token &token_)
{
char_type ch_ = 0;
std::size_t str_len_ = 0;
const char *str_ = tokeniser_helper::escape_sequence(state_,
ch_, str_len_);
if (str_)
{
char_state state2_(str_ + 1, str_ + str_len_, state_._id,
state_._flags, state_._locale, 0);
tokeniser_helper::charset(state2_, token_);
}
else
{
add_char(ch_, state_, token_);
}
}
static void add_char(const char_type ch_, const state &state_,
string_token &token_)
{
range range_(ch_, ch_);
token_.insert(range_);
if (state_._flags & icase)
{
string_token folded_;
tokeniser_helper::fold(range_, state_._locale,
folded_, typename tokeniser_helper::template
size<sizeof(char_type)>());
if (!folded_.empty())
{
token_.insert(folded_);
}
}
}
static void open_curly(re_token &lhs_, state &state_,
re_token &token_)
{
if (state_.eos())
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing '}')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
else if (*state_._curr == '-' || *state_._curr == '+')
{
rules_char_type ch_ = 0;
if (lhs_._type != CHARSET)
{
std::ostringstream ss_;
ss_ << "CHARSET must precede {" <<
state_._curr << "} at index " <<
state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
state_.next(ch_);
token_._type = DIFF;
token_._extra = ch_;
if (state_.next(ch_))
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing '}')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
if (ch_ != '}')
{
std::ostringstream ss_;
ss_ << "Missing '}' at index " << state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
}
else if (*state_._curr >= '0' && *state_._curr <= '9')
{
repeat_n(state_, token_);
}
else
{
macro(state_, token_);
}
}
// SYNTAX:
// {n[,[n]]}
// SEMANTIC RULES:
// {0} - INVALID (throw exception)
// {0,} = *
// {0,0} - INVALID (throw exception)
// {0,1} = ?
// {1,} = +
// {min,max} where min == max - {min}
// {min,max} where max < min - INVALID (throw exception)
static void repeat_n(state &state_, re_token &token_)
{
rules_char_type ch_ = 0;
bool eos_ = state_.next(ch_);
std::size_t min_ = 0;
std::size_t max_ = 0;
while (!eos_ && ch_ >= '0' && ch_ <= '9')
{
min_ *= 10;
min_ += ch_ - '0';
token_._extra += ch_;
eos_ = state_.next(ch_);
}
if (eos_)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing repeat terminator '}')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
bool min_max_ = false;
bool repeatn_ = true;
if (ch_ == ',')
{
token_._extra += ch_;
eos_ = state_.next(ch_);
if (eos_)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing repeat terminator '}')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
if (ch_ == '}')
{
// Small optimisation: Check for '*' equivalency.
if (min_ == 0)
{
token_._type = ZEROORMORE;
repeatn_ = false;
}
// Small optimisation: Check for '+' equivalency.
else if (min_ == 1)
{
token_._type = ONEORMORE;
repeatn_ = false;
}
}
else
{
if (ch_ < '0' || ch_ > '9')
{
std::ostringstream ss_;
ss_ << "Missing repeat terminator '}' at index " <<
state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
min_max_ = true;
do
{
max_ *= 10;
max_ += ch_ - '0';
token_._extra += ch_;
eos_ = state_.next(ch_);
} while (!eos_ && ch_ >= '0' && ch_ <= '9');
if (eos_)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing repeat terminator '}')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
// Small optimisation: Check for '?' equivalency.
if (min_ == 0 && max_ == 1)
{
token_._type = OPT;
repeatn_ = false;
}
// Small optimisation: if min == max, then min.
else if (min_ == max_)
{
token_._extra.erase(token_._extra.find(','));
min_max_ = false;
max_ = 0;
}
}
}
if (ch_ != '}')
{
std::ostringstream ss_;
ss_ << "Missing repeat terminator '}' at index " <<
state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
if (repeatn_)
{
// SEMANTIC VALIDATION follows:
// NOTE: {0,} has already become *
// therefore we don't check for a comma.
if (min_ == 0 && max_ == 0)
{
std::ostringstream ss_;
ss_ << "Cannot have exactly zero repeats preceding index " <<
state_.index();
state_.error(ss_);
throw runtime_error(ss_.str());
}
if (min_max_ && max_ < min_)
{
std::ostringstream ss_;
ss_ << "Max less than min preceding index " <<
state_.index();
state_.error(ss_);
throw runtime_error(ss_.str());
}
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AREPEATN;
state_.increment();
}
else
{
token_._type = REPEATN;
}
}
else if (token_._type == ZEROORMORE)
{
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AZEROORMORE;
state_.increment();
}
}
else if (token_._type == ONEORMORE)
{
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AONEORMORE;
state_.increment();
}
}
else if (token_._type == OPT)
{
if (!state_.eos() && *state_._curr == '?')
{
token_._type = AOPT;
state_.increment();
}
}
}
static void macro(state &state_, re_token &token_)
{
rules_char_type ch_ = 0;
bool eos_ = false;
state_.next(ch_);
if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') &&
!(ch_ >= 'a' && ch_ <= 'z'))
{
std::ostringstream ss_;
ss_ << "Invalid MACRO name at index " << state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
do
{
token_._extra += ch_;
eos_ = state_.next(ch_);
if (eos_)
{
std::ostringstream ss_;
// Pointless returning index if at end of string
state_.unexpected_end(ss_);
ss_ << " (missing MACRO name terminator '}')";
state_.error(ss_);
throw runtime_error(ss_.str());
}
} while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') ||
(ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9'));
if (ch_ != '}')
{
std::ostringstream ss_;
ss_ << "Missing MACRO name terminator '}' at index " <<
state_.index() - 1;
state_.error(ss_);
throw runtime_error(ss_.str());
}
token_._type = MACRO;
}
};
}
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,136 @@
// tokeniser_state.hpp
// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/)
//
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
#ifndef LEXERTL_RE_TOKENISER_STATE_HPP
#define LEXERTL_RE_TOKENISER_STATE_HPP
#include "../../char_traits.hpp"
#include "../../enums.hpp"
#include <locale>
#include "../../narrow.hpp"
#include <stack>
namespace lexertl
{
namespace detail
{
template<typename ch_type, typename id_type>
struct basic_re_tokeniser_state
{
using char_type = ch_type;
using index_type = typename basic_char_traits<char_type>::index_type;
const char_type * const _start;
const char_type * const _end;
const char_type *_curr;
id_type _id;
std::size_t _flags;
std::stack<std::size_t> _flags_stack;
std::locale _locale;
const char_type *_macro_name;
long _paren_count;
bool _in_string;
id_type _nl_id;
basic_re_tokeniser_state(const char_type *start_,
const char_type * const end_, id_type id_, const std::size_t flags_,
const std::locale locale_, const char_type *macro_name_) :
_start(start_),
_end(end_),
_curr(start_),
_id(id_),
_flags(flags_),
_flags_stack(),
_locale(locale_),
_macro_name(macro_name_),
_paren_count(0),
_in_string(false),
_nl_id(static_cast<id_type>(~0))
{
}
basic_re_tokeniser_state(const basic_re_tokeniser_state &rhs_)
{
assign(rhs_);
}
// prevent VC++ 7.1 warning:
const basic_re_tokeniser_state &operator =
(const basic_re_tokeniser_state &rhs_)
{
return assign(rhs_);
}
basic_re_tokeniser_state &assign(const basic_re_tokeniser_state &rhs_)
{
_start = rhs_._start;
_end = rhs_._end;
_curr = rhs_._curr;
_id = rhs_._id;
_flags = rhs_._flags;
_flags_stack = rhs_._flags_stack;
_locale = rhs_._locale;
_macro_name = rhs_._macro_name;
_paren_count = rhs_._paren_count;
_in_string = rhs_._in_string;
_nl_id = rhs_._nl_id;
return *this;
}
inline bool next(char_type &ch_)
{
if (_curr >= _end)
{
ch_ = 0;
return true;
}
else
{
ch_ = *_curr;
increment();
return false;
}
}
inline void increment()
{
++_curr;
}
inline std::size_t index()
{
return _curr - _start;
}
inline bool eos()
{
return _curr >= _end;
}
inline void unexpected_end(std::ostringstream &ss_)
{
ss_ << "Unexpected end of regex";
}
inline void error(std::ostringstream &ss_)
{
ss_ << " in ";
if (_macro_name)
{
ss_ << "MACRO '";
narrow(_macro_name, ss_);
ss_ << "'.";
}
else
{
ss_ << "rule id " << _id << '.';
}
}
};
}
}
#endif