From d3de52ca82189421502c2fb53e53673480177cbc Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Fri, 16 Nov 2018 22:34:49 +0000 Subject: [PATCH 01/32] Add commit 43aab01 of BenHanson/lexertl14 from github --- YACReaderLibrary/YACReaderLibrary.pro | 40 +- YACReaderLibrary/lexertl/char_traits.hpp | 45 + YACReaderLibrary/lexertl/debug.hpp | 311 ++ YACReaderLibrary/lexertl/dot.hpp | 293 ++ YACReaderLibrary/lexertl/enums.hpp | 25 + YACReaderLibrary/lexertl/generate_cpp.hpp | 1123 ++++++ YACReaderLibrary/lexertl/generator.hpp | 738 ++++ YACReaderLibrary/lexertl/internals.hpp | 75 + YACReaderLibrary/lexertl/iterator.hpp | 135 + YACReaderLibrary/lexertl/licence_1_0.txt | 24 + YACReaderLibrary/lexertl/lookup.hpp | 491 +++ YACReaderLibrary/lexertl/match_results.hpp | 171 + YACReaderLibrary/lexertl/memory_file.hpp | 138 + YACReaderLibrary/lexertl/narrow.hpp | 25 + YACReaderLibrary/lexertl/observer_ptr.hpp | 16 + YACReaderLibrary/lexertl/parser/parser.hpp | 926 +++++ .../lexertl/parser/tokeniser/re_token.hpp | 100 + .../lexertl/parser/tokeniser/re_tokeniser.hpp | 778 ++++ .../parser/tokeniser/re_tokeniser_helper.hpp | 3157 +++++++++++++++++ .../parser/tokeniser/re_tokeniser_state.hpp | 136 + .../lexertl/parser/tree/end_node.hpp | 111 + .../lexertl/parser/tree/iteration_node.hpp | 96 + .../lexertl/parser/tree/leaf_node.hpp | 110 + YACReaderLibrary/lexertl/parser/tree/node.hpp | 242 ++ .../lexertl/parser/tree/selection_node.hpp | 104 + .../lexertl/parser/tree/sequence_node.hpp | 121 + .../lexertl/partition/charset.hpp | 72 + .../lexertl/partition/equivset.hpp | 135 + YACReaderLibrary/lexertl/rules.hpp | 1018 ++++++ YACReaderLibrary/lexertl/runtime_error.hpp | 23 + YACReaderLibrary/lexertl/serialise.hpp | 28 + YACReaderLibrary/lexertl/sm_to_csm.hpp | 53 + YACReaderLibrary/lexertl/sm_traits.hpp | 44 + YACReaderLibrary/lexertl/state_machine.hpp | 521 +++ .../lexertl/stream_shared_iterator.hpp | 352 ++ YACReaderLibrary/lexertl/string_token.hpp | 439 +++ YACReaderLibrary/lexertl/utf_iterators.hpp | 508 +++ 37 files changed, 12723 insertions(+), 1 deletion(-) create mode 100644 YACReaderLibrary/lexertl/char_traits.hpp create mode 100644 YACReaderLibrary/lexertl/debug.hpp create mode 100644 YACReaderLibrary/lexertl/dot.hpp create mode 100644 YACReaderLibrary/lexertl/enums.hpp create mode 100644 YACReaderLibrary/lexertl/generate_cpp.hpp create mode 100644 YACReaderLibrary/lexertl/generator.hpp create mode 100644 YACReaderLibrary/lexertl/internals.hpp create mode 100644 YACReaderLibrary/lexertl/iterator.hpp create mode 100644 YACReaderLibrary/lexertl/licence_1_0.txt create mode 100644 YACReaderLibrary/lexertl/lookup.hpp create mode 100644 YACReaderLibrary/lexertl/match_results.hpp create mode 100644 YACReaderLibrary/lexertl/memory_file.hpp create mode 100644 YACReaderLibrary/lexertl/narrow.hpp create mode 100644 YACReaderLibrary/lexertl/observer_ptr.hpp create mode 100644 YACReaderLibrary/lexertl/parser/parser.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tree/end_node.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tree/node.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tree/selection_node.hpp create mode 100644 YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp create mode 100644 YACReaderLibrary/lexertl/partition/charset.hpp create mode 100644 YACReaderLibrary/lexertl/partition/equivset.hpp create mode 100644 YACReaderLibrary/lexertl/rules.hpp create mode 100644 YACReaderLibrary/lexertl/runtime_error.hpp create mode 100644 YACReaderLibrary/lexertl/serialise.hpp create mode 100644 YACReaderLibrary/lexertl/sm_to_csm.hpp create mode 100644 YACReaderLibrary/lexertl/sm_traits.hpp create mode 100644 YACReaderLibrary/lexertl/state_machine.hpp create mode 100644 YACReaderLibrary/lexertl/stream_shared_iterator.hpp create mode 100644 YACReaderLibrary/lexertl/string_token.hpp create mode 100644 YACReaderLibrary/lexertl/utf_iterators.hpp diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index 2e6d6888..1ea60e8b 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -147,7 +147,42 @@ HEADERS += comic_flow.h \ yacreader_comics_selection_helper.h \ yacreader_comic_info_helper.h \ db/reading_list.h \ - current_comic_view_helper.h + current_comic_view_helper.h \ + lexertl/parser/tokeniser/re_token.hpp \ + lexertl/parser/tokeniser/re_tokeniser.hpp \ + lexertl/parser/tokeniser/re_tokeniser_helper.hpp \ + lexertl/parser/tokeniser/re_tokeniser_state.hpp \ + lexertl/parser/tree/end_node.hpp \ + lexertl/parser/tree/iteration_node.hpp \ + lexertl/parser/tree/leaf_node.hpp \ + lexertl/parser/tree/node.hpp \ + lexertl/parser/tree/selection_node.hpp \ + lexertl/parser/tree/sequence_node.hpp \ + lexertl/parser/parser.hpp \ + lexertl/partition/charset.hpp \ + lexertl/partition/equivset.hpp \ + lexertl/char_traits.hpp \ + lexertl/debug.hpp \ + lexertl/dot.hpp \ + lexertl/enums.hpp \ + lexertl/generate_cpp.hpp \ + lexertl/generator.hpp \ + lexertl/internals.hpp \ + lexertl/iterator.hpp \ + lexertl/lookup.hpp \ + lexertl/match_results.hpp \ + lexertl/memory_file.hpp \ + lexertl/narrow.hpp \ + lexertl/observer_ptr.hpp \ + lexertl/rules.hpp \ + lexertl/runtime_error.hpp \ + lexertl/serialise.hpp \ + lexertl/sm_to_csm.hpp \ + lexertl/sm_traits.hpp \ + lexertl/state_machine.hpp \ + lexertl/stream_shared_iterator.hpp \ + lexertl/string_token.hpp \ + lexertl/utf_iterators.hpp !CONFIG(no_opengl) { HEADERS += ../common/gl/yacreader_flow_gl.h @@ -324,3 +359,6 @@ translation.files = ../release/languages/yacreaderlibrary_* manpage.path = $$DATADIR/man/man1 manpage.files = ../YACReaderLibrary.1 } + +DISTFILES += \ + lexertl/licence_1_0.txt diff --git a/YACReaderLibrary/lexertl/char_traits.hpp b/YACReaderLibrary/lexertl/char_traits.hpp new file mode 100644 index 00000000..e06f399a --- /dev/null +++ b/YACReaderLibrary/lexertl/char_traits.hpp @@ -0,0 +1,45 @@ +// char_traits.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_CHAR_TRAITS_HPP +#define LEXERTL_CHAR_TRAITS_HPP + +#include + +namespace lexertl +{ +template +struct basic_char_traits +{ + using char_type = ch_type; + using index_type = ch_type; + + static index_type max_val() + { + const std::uint32_t max_ = 0x10ffff; + + return sizeof(char_type) > 2 ? + max_ : (max_ & 0xffff); + } +}; + +template<> +struct basic_char_traits +{ + using char_type = char; + using index_type = unsigned char; + + static index_type max_val() + { + // Prevent annoying warning (VC++) + index_type zero_ = 0; + + return ~zero_; + } +}; +} + +#endif diff --git a/YACReaderLibrary/lexertl/debug.hpp b/YACReaderLibrary/lexertl/debug.hpp new file mode 100644 index 00000000..1405f386 --- /dev/null +++ b/YACReaderLibrary/lexertl/debug.hpp @@ -0,0 +1,311 @@ +// debug.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_DEBUG_HPP +#define LEXERTL_DEBUG_HPP + +#include +#include +#include "rules.hpp" +#include "sm_to_csm.hpp" +#include "state_machine.hpp" +#include "string_token.hpp" +#include + +namespace lexertl +{ +template +class basic_debug +{ +public: + using char_state_machine = + basic_char_state_machine; + using ostream = std::basic_ostream; + using rules = basic_rules; + using string = std::basic_string; + + static void dump(const sm &sm_, rules &rules_, ostream &stream_) + { + char_state_machine csm_; + + sm_to_csm(sm_, csm_); + dump(csm_, rules_, stream_); + } + + static void dump(const sm &sm_, ostream &stream_) + { + char_state_machine csm_; + + sm_to_csm(sm_, csm_); + dump(csm_, stream_); + } + + static void dump(const char_state_machine &csm_, rules &rules_, + ostream &stream_) + { + for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) + { + lexer_state(stream_); + stream_ << rules_.state(dfa_) << std::endl << std::endl; + + dump_ex(csm_._sm_vector[dfa_], stream_); + } + } + + static void dump(const char_state_machine &csm_, ostream &stream_) + { + for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) + { + lexer_state(stream_); + stream_ << dfa_ << std::endl << std::endl; + + dump_ex(csm_._sm_vector[dfa_], stream_); + } + } + +protected: + using dfa_state = typename char_state_machine::state; + using string_token = typename dfa_state::string_token; + using stringstream = std::basic_stringstream; + + static void dump_ex(const typename char_state_machine::dfa &dfa_, + ostream &stream_) + { + const std::size_t states_ = dfa_._states.size(); + const id_type bol_index_ = dfa_._bol_index; + + for (std::size_t i_ = 0; i_ < states_; ++i_) + { + const dfa_state &state_ = dfa_._states[i_]; + + state(stream_); + stream_ << i_ << std::endl; + + if (state_._end_state) + { + end_state(stream_); + + if (state_._push_pop_dfa == dfa_state::push_dfa) + { + push(stream_); + stream_ << state_._push_dfa; + } + else if (state_._push_pop_dfa == dfa_state::pop_dfa) + { + pop(stream_); + } + + id(stream_); + stream_ << static_cast(state_._id); + user_id(stream_); + stream_ << static_cast(state_._user_id); + dfa(stream_); + stream_ << static_cast(state_._next_dfa); + stream_ << std::endl; + } + + if (i_ == 0 && bol_index_ != char_state_machine::npos()) + { + bol(stream_); + stream_ << static_cast(bol_index_) << std::endl; + } + + if (state_._eol_index != char_state_machine::npos()) + { + eol(stream_); + stream_ << static_cast(state_._eol_index) << + std::endl; + } + + for (const auto &tran_ : state_._transitions) + { + string_token token_ = tran_.second; + + open_bracket(stream_); + + if (!tran_.second.any() && tran_.second.negatable()) + { + token_.negate(); + negated(stream_); + } + + string chars_; + + for (const auto &range_ : token_._ranges) + { + if (range_.first == '-' || range_.first == '^' || + range_.first == ']') + { + stream_ << '\\'; + } + + chars_ = string_token::escape_char + (range_.first); + + if (range_.first != range_.second) + { + if (range_.first + 1 < range_.second) + { + chars_ += '-'; + } + + if (range_.second == '-' || range_.second == '^' || + range_.second == ']') + { + stream_ << '\\'; + } + + chars_ += string_token::escape_char(range_.second); + } + + stream_ << chars_; + } + + close_bracket(stream_); + stream_ << static_cast(tran_.first) << + std::endl; + } + + stream_ << std::endl; + } + } + + static void lexer_state(std::ostream &stream_) + { + stream_ << "Lexer state: "; + } + + static void lexer_state(std::wostream &stream_) + { + stream_ << L"Lexer state: "; + } + + static void state(std::ostream &stream_) + { + stream_ << "State: "; + } + + static void state(std::wostream &stream_) + { + stream_ << L"State: "; + } + + static void bol(std::ostream &stream_) + { + stream_ << " BOL -> "; + } + + static void bol(std::wostream &stream_) + { + stream_ << L" BOL -> "; + } + + static void eol(std::ostream &stream_) + { + stream_ << " EOL -> "; + } + + static void eol(std::wostream &stream_) + { + stream_ << L" EOL -> "; + } + + static void end_state(std::ostream &stream_) + { + stream_ << " END STATE"; + } + + static void end_state(std::wostream &stream_) + { + stream_ << L" END STATE"; + } + + static void id(std::ostream &stream_) + { + stream_ << ", Id = "; + } + + static void id(std::wostream &stream_) + { + stream_ << L", Id = "; + } + + static void push(std::ostream &stream_) + { + stream_ << ", PUSH "; + } + + static void push(std::wostream &stream_) + { + stream_ << L", PUSH "; + } + + static void pop(std::ostream &stream_) + { + stream_ << ", POP"; + } + + static void pop(std::wostream &stream_) + { + stream_ << L", POP"; + } + + static void user_id(std::ostream &stream_) + { + stream_ << ", User Id = "; + } + + static void user_id(std::wostream &stream_) + { + stream_ << L", User Id = "; + } + + static void open_bracket(std::ostream &stream_) + { + stream_ << " ["; + } + + static void open_bracket(std::wostream &stream_) + { + stream_ << L" ["; + } + + static void negated(std::ostream &stream_) + { + stream_ << "^"; + } + + static void negated(std::wostream &stream_) + { + stream_ << L"^"; + } + + static void close_bracket(std::ostream &stream_) + { + stream_ << "] -> "; + } + + static void close_bracket(std::wostream &stream_) + { + stream_ << L"] -> "; + } + + static void dfa(std::ostream &stream_) + { + stream_ << ", dfa = "; + } + + static void dfa(std::wostream &stream_) + { + stream_ << L", dfa = "; + } +}; + +using debug = basic_debug; +using wdebug = basic_debug; +} + +#endif diff --git a/YACReaderLibrary/lexertl/dot.hpp b/YACReaderLibrary/lexertl/dot.hpp new file mode 100644 index 00000000..cda4d6ac --- /dev/null +++ b/YACReaderLibrary/lexertl/dot.hpp @@ -0,0 +1,293 @@ +// dot.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// Copyright (c) 2013 Autodesk, Inc. All rights reserved. +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_DOT_HPP +#define LEXERTL_DOT_HPP + +#include +#include "rules.hpp" +#include "state_machine.hpp" +#include "sm_to_csm.hpp" + +namespace lexertl +{ +//! The class template basic_dot contains utility functions used to +//! dump a description of a finite state machine formatted in the +//! DOT language (http://www.graphviz.org/doc/info/lang.html). The +//! resulting directed graph can previewed by opening the ".dot" file +//! into the GraphViz application (http://www.graphviz.org). +template +class basic_dot +{ +public: + using char_state_machine = + basic_char_state_machine; + using rules = basic_rules; + using ostream = std::basic_ostream; + using string = std::basic_string; + + //! Dumps a description of the finite state machine expressed in + //! the DOT language to the given output stream. + static void dump(const sm &sm_, rules &rules_, ostream &stream_) + { + char_state_machine csm_; + + sm_to_csm(sm_, csm_); + dump(csm_, rules_, stream_); + } + + //! Dumps a description of the finite state machine expressed in + //! the DOT language to the given output stream. + static void dump(const char_state_machine &csm_, rules &rules_, + ostream &stream_) + { + header(stream_); + for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) + { + dump_ex(dfa_, csm_._sm_vector[dfa_], rules_, stream_); + } + trailer(stream_); + } + +protected: + using dfa_state = typename char_state_machine::state; + using string_token = typename dfa_state::string_token; + using stringstream = std::basic_stringstream; + + // Naming of nodes used in the DOT diagram. The naming is of the + // form: L_S. + static string node_name(id_type dfa_id_, id_type state_id_) + { + stringstream namestream_; + namestream_ << "L" << dfa_id_ << "_S" << state_id_; + return namestream_.str(); + } + + // Escape control characters twice. This is necessary when + // expressing character sets attached as to DOT nodes as + // labels. + static string double_escape_char(const id_type ch_) + { + stringstream out_; + + switch (ch_) + { + case '\0': + out_ << '\\'; + out_ << '\\'; + out_ << '0'; + break; + case '\a': + out_ << '\\'; + out_ << '\\'; + out_ << 'a'; + break; + case '\b': + out_ << '\\'; + out_ << '\\'; + out_ << 'b'; + break; + case '\f': + out_ << '\\'; + out_ << '\\'; + out_ << 'f'; + break; + case '\n': + out_ << '\\'; + out_ << '\\'; + out_ << 'n'; + break; + case '\r': + out_ << '\\'; + out_ << '\\'; + out_ << 'r'; + break; + case '\t': + out_ << '\\'; + out_ << '\\'; + out_ << 't'; + break; + case '\v': + out_ << '\\'; + out_ << '\\'; + out_ << 'v'; + break; + case '\\': + out_ << '\\'; + out_ << '\\'; + break; + case '"': + out_ << '\\'; + out_ << '\\'; + out_ << '"'; + break; + case '\'': + out_ << '\\'; + out_ << '\\'; + out_ << '\''; + break; + default: + { + if (ch_ < 32 || ch_ > 126) + { + out_ << '\\'; + out_ << 'x'; + out_ << std::hex << + static_cast(ch_); + } + else + { + out_ << char_type(ch_); + } + + break; + } + } + + return out_.str(); + } + + // Internal function actually performing the work of dumping the + // state machine in DOT. + static void dump_ex(id_type dfa_id_, + const typename char_state_machine::dfa &dfa_, + rules &rules_, + ostream &stream_) + { + const std::size_t states_ = dfa_._states.size(); + typename dfa_state::id_type_string_token_map::const_iterator iter_; + typename dfa_state::id_type_string_token_map::const_iterator end_; + + stream_ << std::endl; + + for (std::size_t i_ = 0; i_ < states_; ++i_) + { + const dfa_state &state_ = dfa_._states[i_]; + + const string name = node_name(dfa_id_, i_); + if (i_ == 0) + { + stream_ << " " << name << " [shape = doublecircle, xlabel=\"" + << rules_.state(dfa_id_) << "\"];" << std::endl; + } + else if (state_._end_state) + { + stream_ << " " << name << + " [shape = doublecircle, xlabel=\"id =" << + static_cast(state_._id) << "\"];" << + std::endl; + } + else { + stream_ << " " << name << " [shape = circle];" << std::endl; + } + } + + stream_ << std::endl; + + for (std::size_t i_ = 0; i_ < states_; ++i_) + { + const dfa_state &state_ = dfa_._states[i_]; + + iter_ = state_._transitions.begin(); + end_ = state_._transitions.end(); + + const string src_name = node_name(dfa_id_, i_); + + for (; iter_ != end_; ++iter_) + { + const string dst_name = node_name(dfa_id_, iter_->first); + stream_ << " " << src_name << " -> " << dst_name << + " [label = \""; + + string_token token_ = iter_->second; + + open_bracket(stream_); + + if (!iter_->second.any() && iter_->second.negatable()) + { + token_.negate(); + negated(stream_); + } + + string chars_; + auto ranges_iter_ = token_._ranges.cbegin(); + auto ranges_end_ = token_._ranges.cend(); + + for (; ranges_iter_ != ranges_end_; ++ranges_iter_) + { + if (ranges_iter_->first == '^' || + ranges_iter_->first == ']') + { + stream_ << "\\\\"; + } + + chars_ = double_escape_char(ranges_iter_->first); + + if (ranges_iter_->first != ranges_iter_->second) + { + if (ranges_iter_->first + 1 < ranges_iter_->second) + { + chars_ += '-'; + } + + if (ranges_iter_->second == '^' || + ranges_iter_->second == ']') + { + stream_ << "\\\\"; + } + + chars_ += double_escape_char(ranges_iter_->second); + } + + stream_ << chars_; + } + + close_bracket(stream_); + stream_ << "\"];" << std::endl; + } + + if (state_._end_state) { + const string dst_name = node_name(state_._next_dfa, 0); + stream_ << " " << src_name << " -> " << dst_name + << " [style = \"dashed\"];" << std::endl; + } + } + } + + static void header(ostream &stream_) + { + stream_ << "digraph DFAs {" << std::endl; + stream_ << " rankdir = LR;" << std::endl; + } + + static void trailer(ostream &stream_) + { + stream_ << "}" << std::endl; + } + + static void open_bracket(ostream &stream_) + { + stream_ << "["; + } + + static void negated(ostream &stream_) + { + stream_ << "^"; + } + + static void close_bracket(ostream &stream_) + { + stream_ << "]"; + } + +}; + +using dot = basic_dot, char>; +using wdot = basic_dot, wchar_t>; +} + +#endif diff --git a/YACReaderLibrary/lexertl/enums.hpp b/YACReaderLibrary/lexertl/enums.hpp new file mode 100644 index 00000000..31a6a969 --- /dev/null +++ b/YACReaderLibrary/lexertl/enums.hpp @@ -0,0 +1,25 @@ +// enums.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_ENUMS_HPP +#define LEXERTL_ENUMS_HPP + +namespace lexertl +{ + enum regex_flags {icase = 1, dot_not_newline = 2, dot_not_cr_lf = 4, + skip_ws = 8, match_zero_len = 16}; + // 0 = end state, 1 = id, 2 = user id, 3 = push_dfa_index + // 4 = next dfa, 5 = dead state, 6 = dfa_start + enum {end_state_index, id_index, user_id_index, push_dfa_index, + next_dfa_index, eol_index, dead_state_index, transitions_index}; + // Rule flags: + enum feature_flags {bol_bit = 1, eol_bit = 2, skip_bit = 4, again_bit = 8, + multi_state_bit = 16, recursive_bit = 32, advance_bit = 64}; + // End state flags: + enum {end_state_bit = 1, pop_dfa_bit = 2}; +} + +#endif diff --git a/YACReaderLibrary/lexertl/generate_cpp.hpp b/YACReaderLibrary/lexertl/generate_cpp.hpp new file mode 100644 index 00000000..3e6b28a6 --- /dev/null +++ b/YACReaderLibrary/lexertl/generate_cpp.hpp @@ -0,0 +1,1123 @@ +// generate_cpp.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_GENERATE_CPP_HPP +#define LEXERTL_GENERATE_CPP_HPP + +#include "enums.hpp" +#include +#include "state_machine.hpp" + +namespace lexertl +{ +class table_based_cpp +{ +public: + template + static void generate_cpp + (const std::string &name_, + const basic_state_machine &sm_, + const bool pointers_, std::ostream &os_) + { + using sm = basic_state_machine; + using internals = typename sm::internals; + const internals &internals_ = sm_.data(); + std::size_t additional_tabs_ = 0; + + os_ << "template\n"; + os_ << "void " << name_ << " (lexertl::"; + + if (internals_._features & recursive_bit) + { + os_ << "recursive_match_results"; + } + else + { + os_ << "match_results"; + } + + os_ << " &results_)\n"; + os_ << "{\n"; + os_ << " using results = lexertl::"; + + if (internals_._features & recursive_bit) + { + os_ << "recursive_match_results"; + } + else + { + os_ << "match_results"; + } + + os_ << ";\n"; + os_ << " using char_type = typename results::char_type;\n"; + os_ << " typename results::iter_type end_token_ = " + "results_.second;\n"; + + if (internals_._features & skip_bit) + { + os_ << "skip:\n"; + } + + os_ << " typename results::iter_type curr_ = results_.second;\n\n"; + os_ << " results_.first = curr_;\n\n"; + + if (internals_._features & again_bit) + { + os_ << "again:\n"; + } + + os_ << " if (curr_ == results_.eoi)\n"; + os_ << " {\n"; + // We want a number regardless of id_type. + os_ << " results_.id = " << static_cast + (internals_._eoi) << ";\n"; + os_ << " results_.user_id = results::npos();\n"; + os_ << " return;\n"; + os_ << " }\n\n"; + + if (internals_._features & bol_bit) + { + os_ << " bool bol_ = results_.bol;\n"; + } + + dump_tables(sm_, 1, pointers_, os_); + + if (internals_._dfa.size() > 1) + { + os_ << " const id_type *lookup_ = lookups_[results_.state];\n"; + os_ << " const id_type dfa_alphabet_ = dfa_alphabets_" + "[results_.state];\n"; + os_ << " const "; + + if (pointers_) + { + os_ << "void * const"; + } + else + { + os_ << "id_type"; + } + + os_ << " *dfa_ = dfas_[results_.state];\n"; + } + + os_ << " const "; + + if (pointers_) + { + os_ << "void * const"; + } + else + { + os_ << "id_type"; + } + + os_ << " *ptr_ = dfa_ + dfa_alphabet_;\n"; + os_ << " bool end_state_ = *ptr_ != 0;\n"; + + if (internals_._features & recursive_bit) + { + os_ << " bool pop_ = ("; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*ptr_"; + + if (pointers_) + { + os_ << ')'; + } + + os_ <<" & " << pop_dfa_bit; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ") != 0;\n"; + } + + os_ << " id_type id_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << id_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + os_ << " id_type uid_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << user_id_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + + if (internals_._features & recursive_bit) + { + os_ << " id_type push_dfa_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << push_dfa_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + } + + if (internals_._dfa.size() > 1) + { + os_ << " id_type start_state_ = results_.state;\n"; + } + + if (internals_._features & bol_bit) + { + os_ << " bool end_bol_ = bol_;\n"; + } + + if (internals_._features & eol_bit) + { + os_ << " "; + + if (pointers_) + { + os_ << "const void * const *"; + } + else + { + os_ << "id_type "; + } + + os_ << "EOL_state_ = 0;\n"; + } + + os_ << '\n'; + + if (internals_._features & bol_bit) + { + os_ << " if (bol_)\n"; + os_ << " {\n"; + os_ << " const "; + + if (pointers_) + { + os_ << "void *"; + } + else + { + os_ << "id_type "; + } + + os_ << "state_ = *dfa_;\n\n"; + os_ << " if (state_)\n"; + os_ << " {\n"; + os_ << " ptr_ = "; + + if (pointers_) + { + os_ << "reinterpret_cast(state_);\n"; + } + else + { + os_ << "&dfa_[state_ * dfa_alphabet_];\n"; + } + + os_ << " }\n"; + os_ << " }\n\n"; + } + + os_ << " while (curr_ != results_.eoi)\n"; + os_ << " {\n"; + + if (internals_._features & eol_bit) + { + os_ << " EOL_state_ = "; + + if (pointers_) + { + os_ << "reinterpret_cast("; + } + + os_ << "ptr_[" << eol_index << ']'; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ";\n\n"; + os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; + os_ << " {\n"; + os_ << " ptr_ = "; + + if (pointers_) + { + os_ << "EOL_state_"; + } + else + { + os_ << "&dfa_[EOL_state_ * dfa_alphabet_]"; + } + + os_ << ";\n"; + os_ << " }\n"; + os_ << " else\n"; + os_ << " {\n"; + ++additional_tabs_; + } + + output_char_loop(internals_._features, additional_tabs_, pointers_, + os_, std::integral_constant 1)>()); + + if (internals_._features & eol_bit) + { + output_tabs(additional_tabs_, os_); + os_ << " }\n"; + --additional_tabs_; + } + + os_ << '\n'; + os_ << " if (*ptr_)\n"; + os_ << " {\n"; + os_ << " end_state_ = true;\n"; + + + if (internals_._features & recursive_bit) + { + os_ << " pop_ = ("; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*ptr_"; + + if (pointers_) + { + os_ << ')'; + } + + os_ <<" & " << pop_dfa_bit; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ") != 0;\n"; + } + + os_ << " id_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << id_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + os_ << " uid_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << user_id_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + + if (internals_._features & recursive_bit) + { + os_ << " push_dfa_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << push_dfa_index << ')'; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + } + + if (internals_._dfa.size() > 1) + { + os_ << " start_state_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << next_dfa_index << ')'; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + } + + if (internals_._features & bol_bit) + { + os_ << " end_bol_ = bol_;\n"; + } + + os_ << " end_token_ = curr_;\n"; + os_ << " }\n"; + os_ << " }\n\n"; + output_quit(os_, std::integral_constant 1)>()); + + if (internals_._features & eol_bit) + { + os_ << " if (curr_ == results_.eoi)\n"; + os_ << " {\n"; + os_ << " EOL_state_ = "; + + if (pointers_) + { + os_ << "reinterpret_cast("; + } + + os_ << "ptr_[" << eol_index << ']'; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ";\n"; + os_ << "\n"; + os_ << " if (EOL_state_)\n"; + os_ << " {\n"; + os_ << " ptr_ = "; + + if (pointers_) + { + os_ << "EOL_state_"; + } + else + { + os_ << "&dfa_[EOL_state_ * dfa_alphabet_]"; + } + + os_ << ";\n\n"; + os_ << " if (*ptr_)\n"; + os_ << " {\n"; + os_ << " end_state_ = true;\n"; + + + if (internals_._features & recursive_bit) + { + os_ << " pop_ = ("; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*ptr_"; + + if (pointers_) + { + os_ << ')'; + } + + os_ <<" & " << pop_dfa_bit; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ") != 0;\n"; + } + + os_ << " id_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << id_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + os_ << " uid_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << user_id_index << ")"; + + if (pointers_) + { + os_ << "))"; + } + + os_ <<";\n"; + + if (internals_._features & recursive_bit) + { + os_ << " push_dfa_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << push_dfa_index << ')'; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + } + + if (internals_._dfa.size() > 1) + { + os_ << " start_state_ = "; + + if (pointers_) + { + // Done this way for GCC: + os_ << "static_cast(reinterpret_cast("; + } + + os_ << "*(ptr_ + " << next_dfa_index << ')'; + + if (pointers_) + { + os_ << "))"; + } + + os_ << ";\n"; + } + + if (internals_._features & bol_bit) + { + os_ << " end_bol_ = bol_;\n"; + } + + os_ << " end_token_ = curr_;\n"; + os_ << " }\n"; + os_ << " }\n"; + os_ << " }\n\n"; + } + + os_ << " if (end_state_)\n"; + os_ << " {\n"; + os_ << " // Return longest match\n"; + + if (internals_._features & recursive_bit) + { + os_ << " if (pop_)\n"; + os_ << " {\n"; + os_ << " start_state_ = results_." + "stack.top().first;\n"; + os_ << " results_.stack.pop();\n"; + os_ << " }\n"; + os_ << " else if (push_dfa_ != results_.npos())\n"; + os_ << " {\n"; + os_ << " results_.stack.push(typename results::" + "id_type_pair\n"; + os_ << " (push_dfa_, id_));\n"; + os_ << " }\n\n"; + } + + if (internals_._dfa.size() > 1) + { + os_ << " results_.state = start_state_;\n"; + } + + if (internals_._features & bol_bit) + { + os_ << " results_.bol = end_bol_;\n"; + } + + os_ << " results_.second = end_token_;\n"; + + if (internals_._features & skip_bit) + { + // We want a number regardless of id_type. + os_ << "\n if (id_ == results_.skip()) goto skip;\n"; + } + + if (internals_._features & again_bit) + { + // We want a number regardless of id_type. + os_ << "\n if (id_ == " + << static_cast(internals_._eoi); + + if (internals_._features & recursive_bit) + { + os_ << " || (pop_ && !results_.stack.empty() &&\n"; + // We want a number regardless of id_type. + os_ << " results_.stack.top().second == " + << static_cast(internals_._eoi) << ')'; + } + + os_ << ")\n"; + os_ << " {\n"; + os_ << " curr_ = end_token_;\n"; + os_ << " goto again;\n"; + os_ << " }\n"; + } + + os_ << " }\n"; + os_ << " else\n"; + os_ << " {\n"; + os_ << " // No match causes char to be skipped\n"; + os_ << " results_.second = end_token_;\n"; + + if (internals_._features & bol_bit) + { + os_ << " results_.bol = *results_.second == '\\n';\n"; + } + + os_ << " results_.first = results_.second;\n"; + os_ << " ++results_.second;\n"; + os_ << " id_ = results::npos();\n"; + os_ << " uid_ = results::npos();\n"; + os_ << " }\n\n"; + os_ << " results_.id = id_;\n"; + os_ << " results_.user_id = uid_;\n"; + os_ << "}\n"; + } + + template + static void dump_tables + (const basic_state_machine &sm_, + const std::size_t tabs_, const bool pointers_, std::ostream &os_) + { + const typename detail::basic_internals &internals_ = + sm_.data(); + const std::size_t lookup_divisor_ = 8; + // Lookup is always 256 entries long now + const std::size_t lookup_quotient_ = 256 / lookup_divisor_; + const std::size_t dfas_ = internals_._lookup.size(); + + output_tabs(tabs_, os_); + os_ << "static const id_type lookup"; + + if (dfas_ > 1) + { + os_ << "s_[][" << 256; + } + else + { + os_ << "_["; + } + + os_ << "] = \n"; + output_tabs(tabs_ + 1, os_); + + if (dfas_ > 1) + { + os_ << '{'; + } + + for (std::size_t l_ = 0; l_ < dfas_; ++l_) + { + const id_type *ptr_ = &internals_._lookup[l_].front(); + + // We want numbers regardless of id_type. + os_ << "{0x" << std::hex << static_cast(*ptr_++); + + for (std::size_t col_ = 1; col_ < lookup_divisor_; ++col_) + { + // We want numbers regardless of id_type. + os_ << ", 0x" << std::hex << static_cast(*ptr_++); + } + + for (std::size_t row_ = 1; row_ < lookup_quotient_; ++row_) + { + os_ << ",\n"; + output_tabs(tabs_ + 1, os_); + // We want numbers regardless of id_type. + os_ << "0x" << std::hex << static_cast(*ptr_++); + + for (std::size_t col_ = 1; col_ < lookup_divisor_; ++col_) + { + // We want numbers regardless of id_type. + os_ << ", 0x" << std::hex << + static_cast(*ptr_++); + } + } + + os_ << '}'; + + if (l_ + 1 < dfas_) + { + os_ << ",\n"; + output_tabs(tabs_ + 1, os_); + } + } + + if (dfas_ > 1) + { + os_ << '}'; + } + + os_ << ";\n"; + output_tabs(tabs_, os_); + os_ << "static const id_type dfa_alphabet"; + + if (dfas_ > 1) + { + os_ << "s_[" << std::dec << dfas_ << "] = {"; + } + else + { + os_ << "_ = "; + } + + // We want numbers regardless of id_type. + os_ << "0x" << std::hex << static_cast + (internals_._dfa_alphabet[0]); + + for (std::size_t col_ = 1; col_ < dfas_; ++col_) + { + // We want numbers regardless of id_type. + os_ << ", 0x" << std::hex << static_cast(internals_. + _dfa_alphabet[col_]); + } + + if (dfas_ > 1) + { + os_ << '}'; + } + + os_ << ";\n"; + + // DFAs are usually different sizes, so dump separately + for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) + { + const id_type dfa_alphabet_ = internals_._dfa_alphabet[dfa_]; + const std::size_t rows_ = internals_._dfa[dfa_].size() / + dfa_alphabet_; + const id_type *ptr_ = &internals_._dfa[dfa_].front(); + std::string dfa_name_ = "dfa"; + + output_tabs(tabs_, os_); + os_ << "static const "; + + if (pointers_) + { + os_ << "void *"; + } + else + { + os_ << "id_type "; + } + + os_ << dfa_name_; + + if (dfas_ > 1) + { + std::ostringstream ss_; + + ss_ << dfa_; + dfa_name_ += ss_.str(); + os_ << dfa_; + } + + dfa_name_ += '_'; + os_ << "_[] = {"; + + for (std::size_t row_ = 0; row_ < rows_; ++row_) + { + dump_row(row_ == 0, ptr_, dfa_name_, dfa_alphabet_, + pointers_, os_); + + if (row_ + 1 < rows_) + { + os_ << ",\n"; + output_tabs(tabs_ + 1, os_); + } + } + + os_ << "};\n"; + } + + if (dfas_ > 1) + { + output_tabs(tabs_, os_); + os_ << "static const "; + + if (pointers_) + { + os_ << "void * const"; + } + else + { + os_ << "id_type"; + } + + os_ << " *dfas_[] = {dfa0_"; + + for (std::size_t col_ = 1; col_ < dfas_; ++col_) + { + os_ << ", dfa" << col_ << '_'; + } + + os_ << "};\n"; + } + + os_ << std::dec; + } + +protected: + template + static void dump_row(const bool first_, const id_type * &ptr_, + const std::string &dfa_name_, const id_type dfa_alphabet_, + const bool pointers_, std::ostream &os_) + { + if (pointers_) + { + bool zero_ = *ptr_ == 0; + + if (first_) + { + // We want numbers regardless of id_type. + os_ << dfa_name_ << " + 0x" << std::hex << + static_cast(*ptr_++) * dfa_alphabet_; + } + else if (!zero_) + { + os_ << "reinterpret_cast(0x" + // We want numbers regardless of id_type. + << std::hex << static_cast(*ptr_++) << ')'; + } + else + { + // We want numbers regardless of id_type. + os_ << "0x" << std::hex << static_cast(*ptr_++); + } + + for (id_type id_index_ = id_index; id_index_ < transitions_index; + ++id_index_, ++ptr_) + { + os_ << ", "; + zero_ = *ptr_ == 0; + + if (!zero_) + { + os_ << "reinterpret_cast("; + } + + // We want numbers regardless of id_type. + os_ << "0x" << std::hex << static_cast(*ptr_); + + if (!zero_) + { + os_ << ')'; + } + } + + for (id_type alphabet_ = transitions_index; + alphabet_ < dfa_alphabet_; ++alphabet_, ++ptr_) + { + // We want numbers regardless of id_type. + os_ << ", "; + + if (*ptr_ == 0) + { + os_ << 0; + } + else + { + // We want numbers regardless of id_type. + os_ << dfa_name_ + " + 0x" << std::hex << + static_cast(*ptr_) * dfa_alphabet_; + } + } + } + else + { + // We want numbers regardless of id_type. + os_ << "0x" << std::hex << static_cast(*ptr_++); + + for (id_type alphabet_ = 1; alphabet_ < dfa_alphabet_; + ++alphabet_, ++ptr_) + { + // We want numbers regardless of id_type. + os_ << ", 0x" << std::hex << static_cast(*ptr_); + } + } + } + + static void output_tabs(const std::size_t tabs_, std::ostream &os_) + { + for (std::size_t i_ = 0; i_ < tabs_; ++i_) + { + os_ << " "; + } + } + + template + static void output_char_loop(const id_type features_, + const std::size_t additional_tabs_, const bool pointers_, + std::ostream &os_, const std::false_type &) + { + output_tabs(additional_tabs_, os_); + os_ << " const typename results::char_type prev_char_ = " + "*curr_++;\n"; + output_tabs(additional_tabs_, os_); + os_ << " const "; + + if (pointers_) + { + os_ << "void * const *"; + } + else + { + os_ << "id_type "; + } + + os_ << "state_ = "; + + if (pointers_) + { + os_ << "reinterpret_cast\n "; + output_tabs(additional_tabs_, os_); + os_ << '('; + } + + os_ << "ptr_[lookup_"; + + if (!pointers_) + { + os_ << "\n "; + output_tabs(additional_tabs_, os_); + } + + os_ << "[static_cast"; + + if (pointers_) + { + os_ << "\n "; + output_tabs(additional_tabs_, os_); + } + + os_ << "(prev_char_)]]"; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ";\n\n"; + + if (features_ & bol_bit) + { + output_tabs(additional_tabs_, os_); + os_ << " bol_ = prev_char_ == '\\n';\n\n"; + } + + output_tabs(additional_tabs_, os_); + os_ << " if (state_ == 0)\n"; + output_tabs(additional_tabs_, os_); + os_ << " {\n"; + + if (features_ & eol_bit) + { + output_tabs(additional_tabs_, os_); + os_ << " EOL_state_ = 0;\n"; + } + + output_tabs(additional_tabs_, os_); + os_ << " break;\n"; + output_tabs(additional_tabs_, os_); + os_ << " }\n\n"; + output_tabs(additional_tabs_, os_); + os_ << " ptr_ = "; + + if (pointers_) + { + os_ << "state_"; + } + else + { + os_ << "&dfa_[state_ * dfa_alphabet_]"; + } + + os_ << ";\n"; + } + + template + static void output_char_loop(const id_type features_, + const std::size_t additional_tabs_, const bool pointers_, + std::ostream &os_, const std::true_type &) + { + output_tabs(additional_tabs_, os_); + os_ << " const std::size_t bytes_ =\n"; + output_tabs(additional_tabs_, os_); + os_ << " sizeof(typename results::char_type) < 3 ?\n"; + output_tabs(additional_tabs_, os_); + os_ << " sizeof(typename results::char_type) : 3;\n"; + output_tabs(additional_tabs_, os_); + os_ << " const std::size_t shift_[] = {0, 8, 16};\n"; + output_tabs(additional_tabs_, os_); + os_ << " typename results::char_type prev_char_ = " + "*curr_++;\n\n"; + + if (features_ & bol_bit) + { + output_tabs(additional_tabs_, os_); + os_ << " bol_ = prev_char_ == '\\n';\n\n"; + } + + output_tabs(additional_tabs_, os_); + os_ << " for (std::size_t i_ = 0; i_ < bytes_; ++i_)\n"; + output_tabs(additional_tabs_, os_); + os_ << " {\n"; + output_tabs(additional_tabs_, os_); + os_ << " const "; + + if (pointers_) + { + os_ << "void * const *"; + } + else + { + os_ << "id_type "; + } + + os_ << "state_ = "; + + if (pointers_) + { + os_ << "reinterpret_cast\n "; + output_tabs(additional_tabs_, os_); + os_ << '('; + } + + os_ << "ptr_[lookup_[static_cast\n"; + output_tabs(additional_tabs_, os_); + os_ << " ((prev_char_ >>\n" + " shift_[bytes_ - 1 - i_]) & 0xff)]]"; + + if (pointers_) + { + os_ << ')'; + } + + os_ << ";\n\n"; + output_tabs(additional_tabs_, os_); + os_ << " if (state_ == 0)\n"; + output_tabs(additional_tabs_, os_); + os_ << " {\n"; + + if (features_ & eol_bit) + { + output_tabs(additional_tabs_, os_); + os_ << " EOL_state_ = 0;\n"; + } + + output_tabs(additional_tabs_, os_); + os_ << " goto quit;\n"; + output_tabs(additional_tabs_, os_); + os_ << " }\n\n"; + output_tabs(additional_tabs_, os_); + os_ << " ptr_ = "; + + if (pointers_) + { + os_ << "state_"; + } + else + { + os_ << "&dfa_[state_ * dfa_alphabet_]"; + } + + os_ << ";\n"; + output_tabs(additional_tabs_, os_); + os_ << " }\n"; + } + + static void output_quit(std::ostream &, const std::false_type &) + { + // Nothing to do + } + + static void output_quit(std::ostream &os_, const std::true_type &) + { + os_ << "quit:\n"; + } +}; +} + +#endif diff --git a/YACReaderLibrary/lexertl/generator.hpp b/YACReaderLibrary/lexertl/generator.hpp new file mode 100644 index 00000000..581cd6e9 --- /dev/null +++ b/YACReaderLibrary/lexertl/generator.hpp @@ -0,0 +1,738 @@ +// generator.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_GENERATOR_HPP +#define LEXERTL_GENERATOR_HPP + +#include +#include "partition/charset.hpp" +#include "char_traits.hpp" +#include "partition/equivset.hpp" +#include +#include +#include "parser/parser.hpp" +#include "rules.hpp" +#include "state_machine.hpp" +#include + +namespace lexertl +{ +template > +class basic_generator +{ +public: + using id_type = typename rules::id_type; + using rules_char_type = typename rules::rules_char_type; + using sm_traits = typename sm::traits; + using parser = detail::basic_parser; + using charset_map = typename parser::charset_map; + using node = typename parser::node; + using node_ptr_vector = typename parser::node_ptr_vector; + + static void build(const rules &rules_, sm &sm_) + { + const std::size_t size_ = rules_.statemap().size(); + // Strong exception guarantee + // http://www.boost.org/community/exception_safety.html + internals internals_; + sm temp_sm_; + node_ptr_vector node_ptr_vector_; + + internals_._eoi = rules_.eoi(); + internals_.add_states(size_); + + for (id_type index_ = 0; index_ < size_; ++index_) + { + if (rules_.regexes()[index_].empty()) + { + std::ostringstream ss_; + + ss_ << "Lexer states with no rules are not allowed " + "(lexer state " << index_ << ".)"; + throw runtime_error(ss_.str()); + } + else + { + // Note that the following variables are per DFA. + // Map of regex charset tokens (strings) to index + charset_map charset_map_; + // Used to fix up $ and \n clashes. + id_type nl_id_ = sm_traits::npos(); + // Regex syntax tree + observer_ptr root_ = build_tree(rules_, index_, + node_ptr_vector_, charset_map_, nl_id_); + + build_dfa(charset_map_, root_, internals_, temp_sm_, index_, + nl_id_); + + if (internals_._dfa[index_].size() / + internals_._dfa_alphabet[index_] >= sm_traits::npos()) + { + // Overflow + throw runtime_error("The data type you have chosen " + "cannot hold this many DFA rows."); + } + } + } + + // If you get a compile error here the id_type from rules and + // state machine do no match. + create(internals_, temp_sm_, rules_.features(), lookup()); + sm_.swap(temp_sm_); + } + + static observer_ptr build_tree(const rules &rules_, + const std::size_t dfa_, node_ptr_vector &node_ptr_vector_, + charset_map &charset_map_, id_type &nl_id_) + { + parser parser_(rules_.locale(), node_ptr_vector_, charset_map_, + rules_.eoi()); + const auto ®exes_ = rules_.regexes(); + auto regex_iter_ = regexes_[dfa_].cbegin(); + auto regex_iter_end_ = regexes_[dfa_].cend(); + const auto &ids_ = rules_.ids(); + const auto &user_ids_ = rules_.user_ids(); + auto id_iter_ = ids_[dfa_].cbegin(); + auto user_id_iter_ = user_ids_[dfa_].cbegin(); + const auto &next_dfas_ = rules_.next_dfas(); + const auto &pushes_ = rules_.pushes(); + const auto &pops_ = rules_.pops(); + auto next_dfa_iter_ = next_dfas_[dfa_].cbegin(); + auto push_dfa_iter_ = pushes_[dfa_].cbegin(); + auto pop_dfa_iter_ = pops_[dfa_].cbegin(); + const bool seen_bol_ = (rules_.features()[dfa_] & bol_bit) != 0; + observer_ptr root_ = nullptr; + + root_ = parser_.parse(*regex_iter_, *id_iter_, *user_id_iter_, + *next_dfa_iter_, *push_dfa_iter_, *pop_dfa_iter_, + rules_.flags(), nl_id_, seen_bol_); + ++regex_iter_; + ++id_iter_; + ++user_id_iter_; + ++next_dfa_iter_; + ++push_dfa_iter_; + ++pop_dfa_iter_; + + // Build syntax trees + while (regex_iter_ != regex_iter_end_) + { + observer_ptr rhs_ = parser_.parse(*regex_iter_, *id_iter_, + *user_id_iter_, *next_dfa_iter_, *push_dfa_iter_, + *pop_dfa_iter_, rules_.flags(), nl_id_, + (rules_.features()[dfa_] & bol_bit) != 0); + + node_ptr_vector_.emplace_back + (std::make_unique(root_, rhs_)); + root_ = node_ptr_vector_.back().get(); + + ++regex_iter_; + ++id_iter_; + ++user_id_iter_; + ++next_dfa_iter_; + ++push_dfa_iter_; + ++pop_dfa_iter_; + } + + return root_; + } + +protected: + using compressed = std::integral_constant; + using equivset = detail::basic_equivset; + using equivset_list = std::list>; + using equivset_ptr = std::unique_ptr; + using sm_char_type = typename sm_traits::char_type; + using charset = detail::basic_charset; + using charset_ptr = std::unique_ptr; + using charset_list = std::list>; + using internals = detail::basic_internals; + using id_type_set = typename std::set; + using id_type_vector = typename internals::id_type_vector; + using index_set = typename charset::index_set; + using index_set_vector = std::vector; + using is_dfa = std::integral_constant; + using lookup = std::integral_constant; + using node_set = std::set>; + using node_set_vector = std::vector>; + using node_vector = typename node::node_vector; + using node_vector_vector = std::vector>; + using selection_node = typename parser::selection_node; + using size_t_vector = typename std::vector; + using string_token = typename parser::string_token; + + static void build_dfa(const charset_map &charset_map_, + const observer_ptr root_, internals &internals_, sm &sm_, + const id_type dfa_index_, id_type &nl_id_) + { + // partitioned charset list + charset_list charset_list_; + // vector mapping token indexes to partitioned token index sets + index_set_vector set_mapping_; + auto &dfa_ = internals_._dfa[dfa_index_]; + std::size_t dfa_alphabet_ = 0; + const node_vector &followpos_ = root_->firstpos(); + node_set_vector seen_sets_; + node_vector_vector seen_vectors_; + size_t_vector hash_vector_; + id_type zero_id_ = sm_traits::npos(); + id_type_set eol_set_; + + set_mapping_.resize(charset_map_.size()); + partition_charsets(charset_map_, charset_list_, is_dfa()); + build_set_mapping(charset_list_, internals_, dfa_index_, + set_mapping_); + + if (nl_id_ != sm_traits::npos()) + { + nl_id_ = *set_mapping_[nl_id_].begin(); + zero_id_ = sm_traits::compressed ? + *set_mapping_[charset_map_.find(string_token(0, 0))-> + second].begin() : sm_traits::npos(); + } + + dfa_alphabet_ = charset_list_.size() + transitions_index + + (nl_id_ == sm_traits::npos() ? 0 : 1); + + if (dfa_alphabet_ > sm_traits::npos()) + { + // Overflow + throw runtime_error("The data type you have chosen cannot hold " + "the dfa alphabet."); + } + + internals_._dfa_alphabet[dfa_index_] = + static_cast(dfa_alphabet_); + // 'jam' state + dfa_.resize(dfa_alphabet_, 0); + closure(followpos_, seen_sets_, seen_vectors_, hash_vector_, + static_cast(dfa_alphabet_), dfa_); + + // Loop over states + for (id_type index_ = 0; index_ < static_cast + (seen_vectors_.size()); ++index_) + { + equivset_list equiv_list_; + + // Intersect charsets + build_equiv_list(*seen_vectors_[index_].get(), set_mapping_, + equiv_list_, is_dfa()); + + for (auto &equivset_ : equiv_list_) + { + const id_type transition_ = closure + (equivset_->_followpos, seen_sets_, seen_vectors_, + hash_vector_, static_cast(dfa_alphabet_), dfa_); + + if (transition_ != sm_traits::npos()) + { + observer_ptr ptr_ = &dfa_.front() + + ((index_ + 1) * dfa_alphabet_); + + // Prune abstemious transitions from end states. + if (*ptr_ && !equivset_->_greedy) continue; + + set_transitions(transition_, equivset_.get(), dfa_, ptr_, + index_, eol_set_); + } + } + } + + fix_clashes(eol_set_, nl_id_, zero_id_, dfa_, dfa_alphabet_, + compressed()); + append_dfa(charset_list_, internals_, sm_, dfa_index_, lookup()); + } + + static void set_transitions(const id_type transition_, equivset *equivset_, + typename internals::id_type_vector &dfa_, id_type *ptr_, + const id_type index_, id_type_set &eol_set_) + { + for (typename equivset::index_vector::const_iterator + equiv_iter_ = equivset_->_index_vector.begin(), + equiv_end_ = equivset_->_index_vector.end(); + equiv_iter_ != equiv_end_; ++equiv_iter_) + { + const id_type i_ = *equiv_iter_; + + if (i_ == parser::bol_token()) + { + dfa_.front() = transition_; + } + else if (i_ == parser::eol_token()) + { + ptr_[eol_index] = transition_; + eol_set_.insert(index_ + 1); + } + else + { + ptr_[i_ + transitions_index] = transition_; + } + } + } + + // Uncompressed + static void fix_clashes(const id_type_set &eol_set_, + const id_type nl_id_, const id_type /*zero_id_*/, + typename internals::id_type_vector &dfa_, + const std::size_t dfa_alphabet_, const std::false_type &) + { + for (const auto &eol_ : eol_set_) + { + observer_ptr ptr_ = &dfa_.front() + eol_ * dfa_alphabet_; + const id_type eol_state_ = ptr_[eol_index]; + const id_type nl_state_ = ptr_[nl_id_ + transitions_index]; + + if (nl_state_) + { + ptr_[transitions_index + nl_id_] = 0; + ptr_ = &dfa_.front() + eol_state_ * dfa_alphabet_; + + if (ptr_[transitions_index + nl_id_] == 0) + { + ptr_[transitions_index + nl_id_] = nl_state_; + } + } + } + } + + // Compressed + static void fix_clashes(const id_type_set &eol_set_, + const id_type nl_id_, const id_type zero_id_, + typename internals::id_type_vector &dfa_, + const std::size_t dfa_alphabet_, const std::true_type &) + { + std::size_t i_ = 0; + + for (const auto &eol_ : eol_set_) + { + observer_ptr ptr_ = &dfa_.front() + eol_ * dfa_alphabet_; + const id_type eol_state_ = ptr_[eol_index]; + id_type nl_state_ = 0; + + for (; i_ < (sm_traits::char_24_bit ? 2 : 1); ++i_) + { + ptr_ = &dfa_.front() + ptr_[transitions_index + zero_id_] * + dfa_alphabet_; + } + + nl_state_ = ptr_[transitions_index + nl_id_]; + + if (nl_state_) + { + ptr_ = &dfa_.front() + eol_state_ * dfa_alphabet_; + + if (ptr_[transitions_index + zero_id_] != 0) continue; + + ptr_[transitions_index + zero_id_] = + static_cast(dfa_.size() / dfa_alphabet_); + dfa_.resize(dfa_.size() + dfa_alphabet_, 0); + + for (i_ = 0; i_ < (sm_traits::char_24_bit ? 1 : 0); ++i_) + { + ptr_ = &dfa_.front() + dfa_.size() - dfa_alphabet_; + ptr_[transitions_index + zero_id_] = + static_cast(dfa_.size() / dfa_alphabet_); + dfa_.resize(dfa_.size() + dfa_alphabet_, 0); + } + + ptr_ = &dfa_.front() + dfa_.size() - dfa_alphabet_; + ptr_[transitions_index + nl_id_] = nl_state_; + } + } + } + + // char_state_machine version + static void append_dfa(const charset_list &charset_list_, + const internals &internals_, sm &sm_, const id_type dfa_index_, + const std::false_type &) + { + std::size_t size_ = charset_list_.size(); + typename sm::string_token_vector token_vector_; + + token_vector_.reserve(size_); + + for (const auto &charset_ : charset_list_) + { + token_vector_.push_back(charset_->_token); + } + + sm_.append(token_vector_, internals_, dfa_index_); + } + + // state_machine version + static void append_dfa(const charset_list &, const internals &, sm &, + const id_type, const std::true_type &) + { + // Nothing to do - will use create() instead + } + + // char_state_machine version + static void create(internals &, sm &, const id_type_vector &, + const std::false_type &) + { + // Nothing to do - will use append_dfa() instead + } + + // state_machine version + static void create(internals &internals_, sm &sm_, + const id_type_vector &features_, const std::true_type &) + { + for (std::size_t i_ = 0, size_ = internals_._dfa.size(); + i_ < size_; ++i_) + { + internals_._features |= features_[i_]; + } + + if (internals_._dfa.size() > 1) + { + internals_._features |= multi_state_bit; + } + + sm_.data().swap(internals_); + } + + // NFA version + static void partition_charsets(const charset_map &map_, + charset_list &lhs_, const std::false_type &) + { + fill_rhs_list(map_, lhs_); + } + + // DFA version + static void partition_charsets(const charset_map &map_, + charset_list &lhs_, const std::true_type &) + { + charset_list rhs_; + + fill_rhs_list(map_, rhs_); + + if (!rhs_.empty()) + { + typename charset_list::iterator iter_; + typename charset_list::iterator end_; + charset_ptr overlap_ = std::make_unique(); + + lhs_.emplace_back(std::move(rhs_.front())); + rhs_.pop_front(); + + while (!rhs_.empty()) + { + charset_ptr r_(rhs_.front().release()); + + rhs_.pop_front(); + iter_ = lhs_.begin(); + end_ = lhs_.end(); + + while (!r_->empty() && iter_ != end_) + { + auto l_iter_ = iter_; + + (*l_iter_)->intersect(*r_.get(), *overlap_.get()); + + if (overlap_->empty()) + { + ++iter_; + } + else if ((*l_iter_)->empty()) + { + l_iter_->reset(overlap_.release()); + overlap_ = std::make_unique(); + ++iter_; + } + else if (r_->empty()) + { + r_.reset(overlap_.release()); + overlap_ = std::make_unique(); + break; + } + else + { + iter_ = lhs_.insert(++iter_, charset_ptr()); + iter_->reset(overlap_.release()); + overlap_ = std::make_unique(); + ++iter_; + end_ = lhs_.end(); + } + } + + if (!r_->empty()) + { + lhs_.emplace_back(std::move(r_)); + } + } + } + } + + static void fill_rhs_list(const charset_map &map_, charset_list &list_) + { + for (const auto &pair_ : map_) + { + list_.emplace_back(std::make_unique + (pair_.first, pair_.second)); + } + } + + static void build_set_mapping(const charset_list &charset_list_, + internals &internals_, const id_type dfa_index_, + index_set_vector &set_mapping_) + { + auto iter_ = charset_list_.cbegin(); + auto end_ = charset_list_.cend(); + + for (id_type index_ = 0; iter_ != end_; ++iter_, ++index_) + { + observer_ptr cs_ = iter_->get(); + + fill_lookup(cs_->_token, &internals_._lookup[dfa_index_], + index_, lookup()); + + for (const id_type i_ : cs_->_index_set) + { + set_mapping_[i_].insert(index_); + } + } + } + + // char_state_machine version + static void fill_lookup(const string_token &, observer_ptr , + const id_type, const std::false_type &) + { + // Do nothing (lookup not used) + } + + // state_machine version + static void fill_lookup(const string_token &charset_, + observer_ptr lookup_, const id_type index_, + const std::true_type &) + { + observer_ptr ptr_ = &lookup_->front(); + + for (const auto &range_ : charset_._ranges) + { + for (typename char_traits::index_type char_ = range_.first; + char_ < range_.second; ++char_) + { + // Note char_ must be unsigned + ptr_[char_] = index_ + transitions_index; + } + + // Note range_.second must be unsigned + ptr_[range_.second] = index_ + transitions_index; + } + } + + static id_type closure(const node_vector &followpos_, + node_set_vector &seen_sets_, node_vector_vector &seen_vectors_, + size_t_vector &hash_vector_, const id_type size_, id_type_vector &dfa_) + { + bool end_state_ = false; + id_type id_ = 0; + id_type user_id_ = sm_traits::npos(); + id_type next_dfa_ = 0; + id_type push_dfa_ = sm_traits::npos(); + bool pop_dfa_ = false; + std::size_t hash_ = 0; + + if (followpos_.empty()) return sm_traits::npos(); + + id_type index_ = 0; + std::unique_ptr set_ptr_ = std::make_unique(); + std::unique_ptr vector_ptr_ = + std::make_unique(); + + for (observer_ptr node_ : followpos_) + { + closure_ex(node_, end_state_, id_, user_id_, next_dfa_, + push_dfa_, pop_dfa_, *set_ptr_.get(), + *vector_ptr_.get(), hash_); + } + + bool found_ = false; + auto hash_iter_ = hash_vector_.cbegin(); + auto hash_end_ = hash_vector_.cend(); + auto set_iter_ = seen_sets_.cbegin(); + + for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_) + { + found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_; + ++index_; + + if (found_) break; + } + + if (!found_) + { + seen_sets_.emplace_back(std::move(set_ptr_)); + seen_vectors_.emplace_back(std::move(vector_ptr_)); + hash_vector_.push_back(hash_); + // State 0 is the jam state... + index_ = static_cast(seen_sets_.size()); + + const std::size_t old_size_ = dfa_.size(); + + dfa_.resize(old_size_ + size_, 0); + + if (end_state_) + { + dfa_[old_size_] |= end_state_bit; + + if (pop_dfa_) + { + dfa_[old_size_] |= pop_dfa_bit; + } + + dfa_[old_size_ + id_index] = id_; + dfa_[old_size_ + user_id_index] = user_id_; + dfa_[old_size_ + push_dfa_index] = push_dfa_; + dfa_[old_size_ + next_dfa_index] = next_dfa_; + } + } + + return index_; + } + + static void closure_ex(observer_ptr node_, bool &end_state_, + id_type &id_, id_type &user_id_, id_type &next_dfa_, + id_type &push_dfa_, bool &pop_dfa_, node_set &set_ptr_, + node_vector &vector_ptr_, std::size_t &hash_) + { + const bool temp_end_state_ = node_->end_state(); + + if (temp_end_state_) + { + if (!end_state_) + { + end_state_ = true; + id_ = node_->id(); + user_id_ = node_->user_id(); + next_dfa_ = node_->next_dfa(); + push_dfa_ = node_->push_dfa(); + pop_dfa_ = node_->pop_dfa(); + } + } + + if (set_ptr_.insert(node_).second) + { + vector_ptr_.push_back(node_); + hash_ += reinterpret_cast(node_); + } + } + + // NFA version + static void build_equiv_list(const node_vector &vector_, + const index_set_vector &set_mapping_, equivset_list &lhs_, + const std::false_type &) + { + fill_rhs_list(vector_, set_mapping_, lhs_); + } + + // DFA version + static void build_equiv_list(const node_vector &vector_, + const index_set_vector &set_mapping_, equivset_list &lhs_, + const std::true_type &) + { + equivset_list rhs_; + + fill_rhs_list(vector_, set_mapping_, rhs_); + + if (!rhs_.empty()) + { + typename equivset_list::iterator iter_; + typename equivset_list::iterator end_; + equivset_ptr overlap_ = std::make_unique(); + + lhs_.emplace_back(std::move(rhs_.front())); + rhs_.pop_front(); + + while (!rhs_.empty()) + { + equivset_ptr r_(rhs_.front().release()); + + rhs_.pop_front(); + iter_ = lhs_.begin(); + end_ = lhs_.end(); + + while (!r_->empty() && iter_ != end_) + { + auto l_iter_ = iter_; + + (*l_iter_)->intersect(*r_.get(), *overlap_.get()); + + if (overlap_->empty()) + { + ++iter_; + } + else if ((*l_iter_)->empty()) + { + l_iter_->reset(overlap_.release()); + overlap_ = std::make_unique(); + ++iter_; + } + else if (r_->empty()) + { + r_.reset(overlap_.release()); + overlap_ = std::make_unique(); + break; + } + else + { + iter_ = lhs_.insert(++iter_, equivset_ptr()); + iter_->reset(overlap_.release()); + overlap_ = std::make_unique(); + ++iter_; + end_ = lhs_.end(); + } + } + + if (!r_->empty()) + { + lhs_.emplace_back(std::move(r_)); + } + } + } + } + + static void fill_rhs_list(const node_vector &vector_, + const index_set_vector &set_mapping_, equivset_list &list_) + { + for (observer_ptr node_ : vector_) + { + if (!node_->end_state()) + { + const id_type token_ = node_->token(); + + if (token_ != node::null_token()) + { + if (token_ == parser::bol_token() || + token_ == parser::eol_token()) + { + std::set index_set_; + + index_set_.insert(token_); + list_.emplace_back + (std::make_unique(index_set_, + token_, node_->greedy(), node_->followpos())); + } + else + { + list_.emplace_back(std::make_unique + (set_mapping_[token_], token_, node_->greedy(), + node_->followpos())); + } + } + } + } + } +}; + +using generator = basic_generator; +using wgenerator = basic_generator; +using u32generator = basic_generator; +using char_generator = basic_generator; +using wchar_generator = basic_generator; +using u32char_generator = basic_generator; +} + +#endif diff --git a/YACReaderLibrary/lexertl/internals.hpp b/YACReaderLibrary/lexertl/internals.hpp new file mode 100644 index 00000000..a5e1dfe0 --- /dev/null +++ b/YACReaderLibrary/lexertl/internals.hpp @@ -0,0 +1,75 @@ +// internals.hpp +// Copyright (c) 2009-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_INTERNALS_HPP +#define LEXERTL_INTERNALS_HPP + +#include "enums.hpp" +#include +#include + +namespace lexertl +{ +namespace detail +{ +template +struct basic_internals +{ + using id_type_vector = std::vector; + using id_type_vector_vector = std::vector; + + id_type _eoi; + id_type_vector_vector _lookup; + id_type_vector _dfa_alphabet; + id_type _features; + id_type_vector_vector _dfa; + + basic_internals() : + _eoi(0), + _lookup(), + _dfa_alphabet(), + _features(0), + _dfa() + { + } + + void clear() + { + _eoi = 0; + _lookup.clear(); + _dfa_alphabet.clear(); + _features = 0; + _dfa.clear(); + } + + bool empty() const + { + return _dfa.empty(); + } + + void add_states(const std::size_t num_) + { + for (std::size_t index_ = 0; index_ < num_; ++index_) + { + // lookup *always* has a size 256 now. + _lookup.push_back(id_type_vector(256, dead_state_index)); + _dfa_alphabet.push_back(0); + _dfa.push_back(id_type_vector()); + } + } + + void swap(basic_internals &internals_) + { + std::swap(_eoi, internals_._eoi); + _lookup.swap(internals_._lookup); + _dfa_alphabet.swap(internals_._dfa_alphabet); + std::swap(_features, internals_._features); + _dfa.swap(internals_._dfa); + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/iterator.hpp b/YACReaderLibrary/lexertl/iterator.hpp new file mode 100644 index 00000000..5820ee02 --- /dev/null +++ b/YACReaderLibrary/lexertl/iterator.hpp @@ -0,0 +1,135 @@ +// iterator.hpp +// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_ITERATOR_HPP +#define LEXERTL_ITERATOR_HPP + +#include +#include "lookup.hpp" +#include "state_machine.hpp" + +namespace lexertl +{ +template +class iterator +{ +public: + using value_type = results; + using difference_type = ptrdiff_t; + using pointer = const value_type *; + using reference = const value_type &; + using iterator_category = std::forward_iterator_tag; + + iterator() : + _results(iter(), iter()), + _sm(nullptr) + { + } + + iterator(const iter &start_, const iter &end_, const sm_type &sm) : + _results(start_, end_), + _sm(&sm) + { + lookup(); + } + + // Only need this because of warnings with gcc with -Weffc++ + iterator(const iterator &rhs_) + { + _results = rhs_._results; + _sm = rhs_._sm; + } + + // Only need this because of warnings with gcc with -Weffc++ + iterator &operator =(const iterator &rhs_) + { + if (&rhs_ != this) + { + _results = rhs_._results; + _sm = rhs_._sm; + } + + return *this; + } + + iterator &operator ++() + { + lookup(); + return *this; + } + + iterator operator ++(int) + { + iterator iter_ = *this; + + lookup(); + return iter_; + } + + const value_type &operator *() const + { + return _results; + } + + const value_type *operator ->() const + { + return &_results; + } + + bool operator ==(const iterator &rhs_) const + { + return _sm == rhs_._sm && (_sm == nullptr ? true : + _results == rhs_._results); + } + + bool operator !=(const iterator &rhs_) const + { + return !(*this == rhs_); + } + + const sm_type &sm() const + { + return *_sm; + } + +private: + value_type _results; + const sm_type *_sm; + + void lookup() + { + lexertl::lookup(*_sm, _results); + + if (_results.first == _results.eoi) + { + _sm = nullptr; + } + } +}; + +using siterator = + iterator; +using citerator = iterator; +using wsiterator = + iterator; +using wciterator = iterator; +using u32siterator = iterator; +using u32citerator = iterator; + +using sriterator = + iterator; +using criterator = iterator; +using wsriterator = + iterator; +using wcriterator = + iterator; +using u32sriterator = iterator; +using u32criterator = iterator; +} + +#endif diff --git a/YACReaderLibrary/lexertl/licence_1_0.txt b/YACReaderLibrary/lexertl/licence_1_0.txt new file mode 100644 index 00000000..7925d62e --- /dev/null +++ b/YACReaderLibrary/lexertl/licence_1_0.txt @@ -0,0 +1,24 @@ +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + diff --git a/YACReaderLibrary/lexertl/lookup.hpp b/YACReaderLibrary/lexertl/lookup.hpp new file mode 100644 index 00000000..903413cd --- /dev/null +++ b/YACReaderLibrary/lexertl/lookup.hpp @@ -0,0 +1,491 @@ +// lookup.hpp +// Copyright (c) 2009-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_LOOKUP_HPP +#define LEXERTL_LOOKUP_HPP + +#include +#include "match_results.hpp" +#include + +namespace lexertl +{ +namespace detail +{ +template +struct bol_state +{ + bol_state(const bool) + { + } +}; + +template<> +struct bol_state +{ + bool _bol; + bool _end_bol; + + bol_state(const bool bol_) : + _bol(bol_), + _end_bol(bol_) + { + } +}; + +template +struct eol_state +{ +}; + +template +struct eol_state +{ + id_type _EOL_state; + + eol_state() : + _EOL_state(0) + { + } +}; + +template +struct multi_state_state +{ + multi_state_state(const id_type) + { + } +}; + +template +struct multi_state_state +{ + id_type _start_state; + + multi_state_state(const id_type state_) : + _start_state(state_) + { + } +}; + +template +struct recursive_state +{ + recursive_state(const id_type *) + { + } +}; + +template +struct recursive_state +{ + bool _pop; + id_type _push_dfa; + + recursive_state(const id_type *ptr_) : + _pop((*ptr_ & pop_dfa_bit) != 0), + _push_dfa(*(ptr_ + push_dfa_index)) + { + } +}; + +template +struct lookup_state +{ + const id_type *_lookup; + id_type _dfa_alphabet; + const id_type *_dfa; + const id_type *_ptr; + bool _end_state; + id_type _id; + id_type _uid; + bol_state<(flags & bol_bit) != 0> _bol_state; + eol_state _eol_state; + multi_state_state + _multi_state_state; + recursive_state _recursive_state; + + lookup_state(const internals &internals_, const bool bol_, + const id_type state_) : + _lookup(&internals_._lookup[state_][0]), + _dfa_alphabet(internals_._dfa_alphabet[state_]), + _dfa(&internals_._dfa[state_][0]), + _ptr(_dfa + _dfa_alphabet), + _end_state(*_ptr != 0), + _id(*(_ptr + id_index)), + _uid(*(_ptr + user_id_index)), + _bol_state(bol_), + _eol_state(), + _multi_state_state(state_), + _recursive_state(_ptr) + { + } + + void reset_recursive(const std::false_type &) + { + // Do nothing + } + + void reset_recursive(const std::true_type &) + { + _recursive_state._pop = (*_ptr & pop_dfa_bit) != 0; + _recursive_state._push_dfa = *(_ptr + push_dfa_index); + } + + void bol_start_state(const std::false_type &) + { + // Do nothing + } + + void bol_start_state(const std::true_type &) + { + if (_bol_state._bol) + { + const id_type state_ = *_dfa; + + if (state_) + { + _ptr = &_dfa[state_ * _dfa_alphabet]; + } + } + } + + template + bool is_eol(const char_type, const std::false_type &) + { + return false; + } + + template + bool is_eol(const char_type curr_, const std::true_type &) + { + bool ret_ = false; + + _eol_state._EOL_state = _ptr[eol_index]; + ret_ = _eol_state._EOL_state && (curr_ == '\r' || curr_ == '\n'); + + if (ret_) + { + _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet]; + } + + return ret_; + } + + template + id_type next_char(const char_type prev_char_, const std::false_type &) + { + const id_type state_= _ptr[_lookup + [static_cast(prev_char_)]]; + + if (state_ != 0) + { + _ptr = &_dfa[state_ * _dfa_alphabet]; + } + + return state_; + } + + template + id_type next_char(const char_type prev_char_, const std::true_type &) + { + const std::size_t bytes_ = sizeof(char_type) < 3 ? + sizeof(char_type) : 3; + const std::size_t shift_[] = {0, 8, 16}; + id_type state_= 0; + + for (std::size_t i_ = 0; i_ < bytes_; ++i_) + { + state_ = _ptr[_lookup[static_cast((prev_char_ >> + shift_[bytes_ - 1 - i_]) & 0xff)]]; + + if (state_ == 0) + { + break; + } + + _ptr = &_dfa[state_ * _dfa_alphabet]; + } + + return state_; + } + + template + void bol(const char_type, const std::false_type &) + { + // Do nothing + } + + template + void bol(const char_type prev_char_, const std::true_type &) + { + _bol_state._bol = prev_char_ == '\n'; + } + + void eol(const id_type, const std::false_type &) + { + // Do nothing + } + + void eol(const id_type err_val_, const std::true_type &) + { + _eol_state._EOL_state = err_val_; + } + + void reset_start_state(const std::false_type &) + { + // Do nothing + } + + void reset_start_state(const std::true_type &) + { + _multi_state_state._start_state = *(_ptr + next_dfa_index); + } + + void reset_end_bol(const std::false_type &) + { + // Do nothing + } + + void reset_end_bol(const std::true_type &) + { + _bol_state._end_bol = _bol_state._bol; + } + + template + void end_state(iter_type &end_token_, iter_type &curr_) + { + if (*_ptr) + { + _end_state = true; + reset_end_bol + (std::integral_constant()); + _id = *(_ptr + id_index); + _uid = *(_ptr + user_id_index); + reset_recursive + (std::integral_constant()); + reset_start_state(std::integral_constant()); + end_token_ = curr_; + } + } + + template + void check_eol(iter_type &, iter_type &, const id_type, + const char_type, const std::false_type &) + { + // Do nothing + } + + template + void check_eol(iter_type &end_token_, iter_type &curr_, + const id_type npos, const char_type eoi_, const std::true_type &) + { + if (_eol_state._EOL_state != npos && curr_ == eoi_) + { + _eol_state._EOL_state = _ptr[eol_index]; + + if (_eol_state._EOL_state) + { + _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet]; + end_state(end_token_, curr_); + } + } + } + + template + void pop(results &, const std::false_type &) + { + // Nothing to do + } + + template + void pop(results &results_, const std::true_type &) + { + if (_recursive_state._pop) + { + _multi_state_state._start_state = results_.stack.top().first; + results_.stack.pop(); + } + else if (_recursive_state._push_dfa != results::npos()) + { + results_.stack.push(typename results::id_type_pair + (_recursive_state._push_dfa, _id)); + } + } + + template + bool is_id_eoi(const id_type eoi_, const results &, const std::false_type &) + { + return _id == eoi_; + } + + template + bool is_id_eoi(const id_type eoi_, const results &results_, + const std::true_type &) + { + return _id == eoi_ || (_recursive_state._pop && + !results_.stack.empty() && results_.stack.top().second == eoi_); + } + + void start_state(id_type &, const std::false_type &) + { + // Do nothing + } + + void start_state(id_type &start_state_, const std::true_type &) + { + start_state_ = _multi_state_state._start_state; + } + + void bol(bool &, const std::false_type &) + { + // Do nothing + } + + void bol(bool &end_bol_, const std::true_type &) + { + end_bol_ = _bol_state._end_bol; + } +}; + +template +void inc_end(results &, const std::false_type &) +{ + // Do nothing +} + +template +void inc_end(results &results_, const std::true_type &) +{ + ++results_.second; +} + +template +void next(const sm_type &sm_, results &results_, + const std::integral_constant &compressed_, + const std::integral_constant &recursive_, + const std::forward_iterator_tag &) +{ + using id_type = typename sm_type::id_type; + const auto &internals_ = sm_.data(); + auto end_token_ = results_.second; + +skip: + auto curr_ = results_.second; + + results_.first = curr_; + +again: + if (curr_ == results_.eoi) + { + results_.id = internals_._eoi; + results_.user_id = results::npos(); + return; + } + + lookup_state lu_state_ + (internals_, results_.bol, results_.state); + lu_state_.bol_start_state + (std::integral_constant()); + + while (curr_ != results_.eoi) + { + if (!lu_state_.is_eol(*curr_, + std::integral_constant())) + { + const auto prev_char_ = *curr_; + const id_type state_ = lu_state_.next_char(prev_char_, + compressed_); + + ++curr_; + lu_state_.bol(prev_char_, + std::integral_constant()); + + if (state_ == 0) + { + lu_state_.is_eol(results::npos(), + std::integral_constant()); + break; + } + } + + lu_state_.end_state(end_token_, curr_); + } + + lu_state_.check_eol(end_token_, curr_, results::npos(), results_.eoi, + std::integral_constant()); + + if (lu_state_._end_state) + { + // Return longest match + lu_state_.pop(results_, recursive_); + + lu_state_.start_state(results_.state, + std::integral_constant()); + lu_state_.bol(results_.bol, + std::integral_constant()); + results_.second = end_token_; + + if (lu_state_._id == sm_.skip()) goto skip; + + if (lu_state_.is_id_eoi(internals_._eoi, results_, recursive_)) + { + curr_ = end_token_; + goto again; + } + } + else + { + results_.second = end_token_; + results_.bol = *results_.second == '\n'; + results_.first = results_.second; + // No match causes char to be skipped + inc_end(results_, + std::integral_constant()); + lu_state_._id = results::npos(); + lu_state_._uid = results::npos(); + } + + results_.id = lu_state_._id; + results_.user_id = lu_state_._uid; +} +} + +template +void lookup(const sm_type &sm_, match_results &results_) +{ + using value_type = typename std::iterator_traits::value_type; + using cat = typename std::iterator_traits::iterator_category; + + // If this asserts, you have either not defined all the correct + // flags, or you should be using recursive_match_results instead + // of match_results. + assert((sm_.data()._features & flags) == sm_.data()._features); + detail::next(sm_, results_, + std::integral_constant 1)>(), + std::false_type(), cat()); +} + +template +void lookup(const sm_type &sm_, recursive_match_results &results_) +{ + using value_type = typename std::iterator_traits::value_type; + using cat = typename std::iterator_traits::iterator_category; + + // If this asserts, you have not defined all the correct flags + assert((sm_.data()._features & flags) == sm_.data()._features); + detail::next(sm_, results_, + std::integral_constant 1)>(), + std::true_type(), cat()); +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/match_results.hpp b/YACReaderLibrary/lexertl/match_results.hpp new file mode 100644 index 00000000..078d8df1 --- /dev/null +++ b/YACReaderLibrary/lexertl/match_results.hpp @@ -0,0 +1,171 @@ +// match_results.hpp +// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_MATCH_RESULTS_HPP +#define LEXERTL_MATCH_RESULTS_HPP + +#include "char_traits.hpp" +#include "enums.hpp" +#include +#include +#include + +namespace lexertl +{ +template +struct match_results +{ + using iter_type = iter; + using char_type = typename std::iterator_traits::value_type; + using index_type = typename basic_char_traits::index_type; + using string = std::basic_string; + + id_type id; + id_type user_id; + iter_type first; + iter_type second; + iter_type eoi; + bool bol; + id_type state; + + match_results() : + id(0), + user_id(npos()), + first(iter_type()), + second(iter_type()), + eoi(iter_type()), + bol(true), + state(0) + { + } + + match_results(const iter_type &start_, const iter_type &end_) : + id(0), + user_id(npos()), + first(start_), + second(start_), + eoi(end_), + bol(true), + state(0) + { + } + + virtual ~match_results() + { + } + + string str() const + { + return string(first, second); + } + + string substr(const std::size_t soffset_, const std::size_t eoffset_) const + { + return string(first + soffset_, second - eoffset_); + } + + virtual void clear() + { + id = 0; + user_id = npos(); + first = eoi; + second = eoi; + bol = true; + state = 0; + } + + virtual void reset(const iter_type &start_, const iter_type &end_) + { + id = 0; + user_id = npos(); + first = start_; + second = start_; + eoi = end_; + bol = true; + state = 0; + } + + static id_type npos() + { + return static_cast(~0); + } + + static id_type skip() + { + return static_cast(~1); + } + + bool operator ==(const match_results &rhs_) const + { + return id == rhs_.id && + user_id == rhs_.user_id && + first == rhs_.first && + second == rhs_.second && + eoi == rhs_.eoi && + bol == rhs_.bol && + state == rhs_.state; + } +}; + +template +struct recursive_match_results : public match_results +{ + using id_type_pair = std::pair; + std::stack stack; + + recursive_match_results() : + match_results(), + stack() + { + } + + recursive_match_results(const iter &start_, const iter &end_) : + match_results(start_, end_), + stack() + { + } + + virtual ~recursive_match_results() override + { + } + + virtual void clear() override + { + match_results::clear(); + + while (!stack.empty()) stack.pop(); + } + + virtual void reset(const iter &start_, const iter &end_) override + { + match_results::reset(start_, end_); + + while (!stack.empty()) stack.pop(); + } +}; + +using smatch = match_results; +using cmatch = match_results; +using wsmatch = match_results; +using wcmatch = match_results; +using u32smatch = match_results; +using u32cmatch = match_results; + +using srmatch = + recursive_match_results; +using crmatch = recursive_match_results; +using wsrmatch = + recursive_match_results; +using wcrmatch = recursive_match_results; +using u32srmatch = + recursive_match_results; +using u32crmatch = recursive_match_results; +} + +#endif diff --git a/YACReaderLibrary/lexertl/memory_file.hpp b/YACReaderLibrary/lexertl/memory_file.hpp new file mode 100644 index 00000000..4ea42a4d --- /dev/null +++ b/YACReaderLibrary/lexertl/memory_file.hpp @@ -0,0 +1,138 @@ +// memory_file.hpp +// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) +// Inspired by http://en.wikibooks.org/wiki/Optimizing_C%2B%2B/ +// General_optimization_techniques/Input/Output#Memory-mapped_file +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_MEMORY_FILE_HPP +#define LEXERTL_MEMORY_FILE_HPP + +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#endif + +// Only files small enough to fit into memory are supported. +namespace lexertl +{ +template +class basic_memory_file +{ +public: + basic_memory_file() + { + } + + basic_memory_file(const char *pathname_) + { + open(pathname_); + } + + ~basic_memory_file() + { + close(); + } + + void open(const char *pathname_) + { + if (_data) close(); + +#ifdef _WIN32 + _fh = ::CreateFileA(pathname_, GENERIC_READ, FILE_SHARE_READ, 0, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); + _fmh = 0; + + if (_fh != INVALID_HANDLE_VALUE) + { + _fmh = ::CreateFileMapping(_fh, 0, PAGE_READONLY, 0, 0, 0); + + if (_fmh != 0) + { + _data = static_cast(::MapViewOfFile + (_fmh, FILE_MAP_READ, 0, 0, 0)); + + if (_data) _size = ::GetFileSize(_fh, 0) / sizeof(char_type); + } + } +#else + _fh = ::open(pathname_, O_RDONLY); + + if (_fh > -1) + { + struct stat sbuf_; + + if (::fstat(_fh, &sbuf_) > -1) + { + _data = static_cast + (::mmap(0, sbuf_.st_size, PROT_READ, MAP_SHARED, _fh, 0)); + + if (_data == MAP_FAILED) + { + _data = nullptr; + } + else + { + _size = sbuf_.st_size / sizeof(char_type); + } + } + } +#endif + } + + const char_type *data() const + { + return _data; + } + + std::size_t size() const + { + return _size; + } + + void close() + { +#ifdef _WIN32 + ::UnmapViewOfFile(_data); + ::CloseHandle(_fmh); + ::CloseHandle(_fh); +#else + ::munmap(const_cast(_data), _size); + ::close(_fh); +#endif + _data = nullptr; + _size = 0; + _fh = 0; +#ifdef _WIN32 + _fmh = 0; +#endif + } + +private: + const char_type *_data = nullptr; + std::size_t _size = 0; +#ifdef _WIN32 + HANDLE _fh = 0; + HANDLE _fmh = 0; +#else + int _fh = 0; +#endif + + // No copy construction. + basic_memory_file(const basic_memory_file &) = delete; + // No assignment. + basic_memory_file &operator =(const basic_memory_file &) = delete; +}; + +using memory_file = basic_memory_file; +using wmemory_file = basic_memory_file; +} + +#endif diff --git a/YACReaderLibrary/lexertl/narrow.hpp b/YACReaderLibrary/lexertl/narrow.hpp new file mode 100644 index 00000000..94b2f6c2 --- /dev/null +++ b/YACReaderLibrary/lexertl/narrow.hpp @@ -0,0 +1,25 @@ +// narrow.hpp +// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_NARROW_HPP +#define LEXERTL_NARROW_HPP + +#include + +namespace lexertl +{ +template +void narrow(const char_type *str_, std::ostringstream &ss_) +{ + while (*str_) + { + // Safe to simply cast to char. + // when string only contains ASCII. + ss_ << static_cast(*str_++); + } +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/observer_ptr.hpp b/YACReaderLibrary/lexertl/observer_ptr.hpp new file mode 100644 index 00000000..9ecd8255 --- /dev/null +++ b/YACReaderLibrary/lexertl/observer_ptr.hpp @@ -0,0 +1,16 @@ +// observer_ptr.hpp +// Copyright (c) 2017-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_OBSERVER_PTR_HPP +#define LEXERTL_OBSERVER_PTR_HPP + +namespace lexertl +{ + template + using observer_ptr = T *; +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/parser.hpp b/YACReaderLibrary/lexertl/parser/parser.hpp new file mode 100644 index 00000000..002f8d87 --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/parser.hpp @@ -0,0 +1,926 @@ +// parser.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_PARSER_HPP +#define LEXERTL_PARSER_HPP + +#include +#include +#include "tree/end_node.hpp" +#include "tree/iteration_node.hpp" +#include "tree/leaf_node.hpp" +#include +#include "tokeniser/re_tokeniser.hpp" +#include "../runtime_error.hpp" +#include "tree/selection_node.hpp" +#include "tree/sequence_node.hpp" +#include +#include + +namespace lexertl +{ +namespace detail +{ +/* + General principles of regex parsing: + - Every regex is a sequence of sub-regexes. + - Regexes consist of operands and operators + - All operators decompose to sequence, selection ('|') and iteration ('*') + - Regex tokens are stored on a stack. + - When a complete sequence of regex tokens is on the stack it is processed. + +Grammar: + + -> + -> | '|' + -> + -> | + -> + -> charset | macro | '('')' | + -> '?' | '??' | '*' | '*?' | '+' | '+?' | '{n[,[m]]}' | + '{n[,[m]]}?' +*/ + +template +class basic_parser +{ +public: + enum {char_24_bit = sm_traits::char_24_bit}; + using char_type = typename sm_traits::char_type; + using id_type = typename sm_traits::id_type; + using end_node = basic_end_node; + using input_char_type = typename sm_traits::input_char_type; + using input_string_token = basic_string_token; + using iteration_node = basic_iteration_node; + using leaf_node = basic_leaf_node; + using tokeniser = + basic_re_tokeniser; + using node = basic_node; + using node_ptr_vector = typename node::node_ptr_vector; + using string = std::basic_string; + using string_token = basic_string_token; + using selection_node = basic_selection_node; + using sequence_node = basic_sequence_node; + using charset_map = std::map; + using charset_pair = std::pair; + using compressed = std::integral_constant; + using token = basic_re_token; + static_assert(std::is_move_assignable::value && + std::is_move_constructible::value, + "token is not movable."); + using token_vector = std::vector; + + basic_parser(const std::locale &locale_, + node_ptr_vector &node_ptr_vector_, + charset_map &charset_map_, const id_type eoi_) : + _locale(locale_), + _node_ptr_vector(node_ptr_vector_), + _charset_map(charset_map_), + _eoi(eoi_), + _token_stack(), + _tree_node_stack() + { + } + + observer_ptr parse(const token_vector ®ex_, const id_type id_, + const id_type user_id_, const id_type next_dfa_, + const id_type push_dfa_, const bool pop_dfa_, + const std::size_t flags_, id_type &nl_id_, const bool seen_bol_) + { + auto iter_ = regex_.cbegin(); + auto end_ = regex_.cend(); + observer_ptr root_ = nullptr; + observer_ptr lhs_token_ = nullptr; + // There cannot be less than 2 tokens + auto rhs_token_ = std::make_unique(*iter_++); + char action_ = 0; + + _token_stack.emplace(std::move(rhs_token_)); + rhs_token_ = std::make_unique(*iter_); + + if (iter_ + 1 != end_) ++iter_; + + do + { + lhs_token_ = _token_stack.top().get(); + action_ = lhs_token_->precedence(rhs_token_->_type); + + switch (action_) + { + case '<': + case '=': + _token_stack.emplace(std::move(rhs_token_)); + rhs_token_ = std::make_unique(*iter_); + + if (iter_ + 1 != end_) ++iter_; + + break; + case '>': + reduce(nl_id_); + break; + default: + { + std::ostringstream ss_; + + ss_ << "A syntax error occurred: '" << + lhs_token_->precedence_string() << + "' against '" << rhs_token_->precedence_string() << + " in rule id " << id_ << '.'; + throw runtime_error(ss_.str()); + break; + } + } + } while (!_token_stack.empty()); + + if (_tree_node_stack.empty()) + { + std::ostringstream ss_; + + ss_ << "Empty rules are not allowed in rule id " << + id_ << '.'; + throw runtime_error(ss_.str()); + } + + assert(_tree_node_stack.size() == 1); + + observer_ptr lhs_node_ = _tree_node_stack.top(); + + _tree_node_stack.pop(); + _node_ptr_vector.emplace_back(std::make_unique + (id_, user_id_, next_dfa_, push_dfa_, pop_dfa_)); + + observer_ptr rhs_node_ = _node_ptr_vector.back().get(); + + _node_ptr_vector.emplace_back(std::make_unique + (lhs_node_, rhs_node_)); + root_ = _node_ptr_vector.back().get(); + + if (seen_bol_) + { + fixup_bol(root_); + } + + if ((flags_ & match_zero_len) == 0) + { + const auto &firstpos_ = root_->firstpos(); + + for (observer_ptr node_ : firstpos_) + { + if (node_->end_state()) + { + std::ostringstream ss_; + + ss_ << "Rules that match zero characters are not allowed " + "as this can cause an infinite loop in user code. The " + "match_zero_len flag overrides this check. Rule id " << + id_ << '.'; + throw runtime_error(ss_.str()); + } + } + } + + return root_; + } + + static id_type bol_token() + { + return static_cast(~1); + } + + static id_type eol_token() + { + return static_cast(~2); + } + +private: + using input_range = typename input_string_token::range; + using range = typename string_token::range; + using string_token_vector = std::vector>; + using token_stack = std::stack>; + using tree_node_stack = typename node::node_stack; + + const std::locale &_locale; + node_ptr_vector &_node_ptr_vector; + charset_map &_charset_map; + id_type _eoi; + token_stack _token_stack; + tree_node_stack _tree_node_stack; + + void reduce(id_type &nl_id_) + { + observer_ptr lhs_ = nullptr; + observer_ptr rhs_ = nullptr; + token_stack handle_; + char action_ = 0; + + do + { + handle_.emplace(); + rhs_ = _token_stack.top().release(); + handle_.top().reset(rhs_); + _token_stack.pop(); + + if (!_token_stack.empty()) + { + lhs_ = _token_stack.top().get(); + action_ = lhs_->precedence(rhs_->_type); + } + } while (!_token_stack.empty() && action_ == '='); + + assert(_token_stack.empty() || action_ == '<'); + + switch (rhs_->_type) + { + case BEGIN: + // finished processing so exit + break; + case REGEX: + // finished parsing, nothing to do + break; + case OREXP: + orexp(handle_); + break; + case SEQUENCE: + _token_stack.emplace(std::make_unique(OREXP)); + break; + case SUB: + sub(handle_); + break; + case EXPRESSION: + _token_stack.emplace(std::make_unique(SUB)); + break; + case REPEAT: + repeat(handle_); + break; + case BOL: + bol(handle_); + break; + case EOL: + eol(handle_, nl_id_); + break; + case CHARSET: + charset(handle_, compressed()); + break; + case OPENPAREN: + openparen(handle_); + break; + case OPT: + case AOPT: + optional(rhs_->_type == OPT); + _token_stack.emplace(std::make_unique(DUP)); + break; + case ZEROORMORE: + case AZEROORMORE: + zero_or_more(rhs_->_type == ZEROORMORE); + _token_stack.emplace(std::make_unique(DUP)); + break; + case ONEORMORE: + case AONEORMORE: + one_or_more(rhs_->_type == ONEORMORE); + _token_stack.emplace(std::make_unique(DUP)); + break; + case REPEATN: + case AREPEATN: + repeatn(rhs_->_type == REPEATN, handle_.top().get()); + _token_stack.emplace(std::make_unique(DUP)); + break; + default: + throw runtime_error + ("Internal error in regex_parser::reduce."); + break; + } + } + + void orexp(token_stack &handle_) + { + assert(handle_.top()->_type == OREXP && + (handle_.size() == 1 || handle_.size() == 3)); + + if (handle_.size() == 1) + { + _token_stack.emplace(std::make_unique(REGEX)); + } + else + { + handle_.pop(); + assert(handle_.top()->_type == OR); + handle_.pop(); + assert(handle_.top()->_type == SEQUENCE); + perform_or(); + _token_stack.emplace(std::make_unique(OREXP)); + } + } + + void perform_or() + { + // perform or + observer_ptr rhs_ = _tree_node_stack.top(); + + _tree_node_stack.pop(); + + observer_ptr lhs_ = _tree_node_stack.top(); + + _node_ptr_vector.emplace_back + (std::make_unique(lhs_, rhs_)); + _tree_node_stack.top() = _node_ptr_vector.back().get(); + } + + void sub(token_stack &handle_) + { + assert((handle_.top()->_type == SUB && + handle_.size() == 1) || handle_.size() == 2); + + if (handle_.size() == 1) + { + _token_stack.emplace(std::make_unique(SEQUENCE)); + } + else + { + handle_.pop(); + assert(handle_.top()->_type == EXPRESSION); + // perform join + sequence(); + _token_stack.emplace(std::make_unique(SUB)); + } + } + + void repeat(token_stack &handle_) + { + assert(handle_.top()->_type == REPEAT && + handle_.size() >= 1 && handle_.size() <= 3); + + if (handle_.size() == 1) + { + _token_stack.emplace(std::make_unique(EXPRESSION)); + } + else + { + handle_.pop(); + assert(handle_.top()->_type == DUP); + _token_stack.emplace(std::make_unique(REPEAT)); + } + } + +#ifndef NDEBUG + void bol(token_stack &handle_) +#else + void bol(token_stack &) +#endif + { + assert(handle_.top()->_type == BOL && + handle_.size() == 1); + + // store charset + _node_ptr_vector.emplace_back + (std::make_unique(bol_token(), true)); + _tree_node_stack.push(_node_ptr_vector.back().get()); + _token_stack.emplace(std::make_unique(REPEAT)); + } + +#ifndef NDEBUG + void eol(token_stack &handle_, id_type &nl_id_) +#else + void eol(token_stack &, id_type &nl_id_) +#endif + { + const string_token nl_('\n'); + const id_type temp_nl_id_ = lookup(nl_); + + assert(handle_.top()->_type == EOL && + handle_.size() == 1); + + if (temp_nl_id_ != ~static_cast(0)) + { + nl_id_ = temp_nl_id_; + } + + // store charset + _node_ptr_vector.emplace_back + (std::make_unique(eol_token(), true)); + _tree_node_stack.push(_node_ptr_vector.back().get()); + _token_stack.emplace(std::make_unique(REPEAT)); + } + + // Uncompressed + void charset(token_stack &handle_, const std::false_type &) + { + assert(handle_.top()->_type == CHARSET && + handle_.size() == 1); + + const id_type id_ = lookup(handle_.top()->_str); + + // store charset + _node_ptr_vector.emplace_back(std::make_unique(id_, true)); + _tree_node_stack.push(_node_ptr_vector.back().get()); + _token_stack.emplace(std::make_unique(REPEAT)); + } + + // Compressed + void charset(token_stack &handle_, const std::true_type &) + { + assert(handle_.top()->_type == CHARSET && + handle_.size() == 1); + + std::unique_ptr token_(handle_.top().release()); + + handle_.pop(); + create_sequence(token_); + } + + // Slice wchar_t into sequence of char. + void create_sequence(std::unique_ptr &token_) + { + string_token_vector data_[char_24_bit ? 3 : 2]; + + for (const input_range &range_ : token_->_str._ranges) + { + slice_range(range_, data_, + std::integral_constant()); + } + + push_ranges(data_, std::integral_constant()); + + _token_stack.emplace(std::make_unique(OPENPAREN)); + _token_stack.emplace(std::make_unique(REGEX)); + _token_stack.emplace(std::make_unique(CLOSEPAREN)); + } + + // 16 bit unicode + void slice_range(const input_range &range_, string_token_vector data_[2], + const std::false_type &) + { + const unsigned char first_msb_ = static_cast + ((range_.first >> 8) & 0xff); + const unsigned char first_lsb_ = static_cast + (range_.first & 0xff); + const unsigned char second_msb_ = static_cast + ((range_.second >> 8) & 0xff); + const unsigned char second_lsb_ = static_cast + (range_.second & 0xff); + + if (first_msb_ == second_msb_) + { + insert_range(first_msb_, first_msb_, first_lsb_, + second_lsb_, data_); + } + else + { + insert_range(first_msb_, first_msb_, first_lsb_, 0xff, data_); + + if (second_msb_ > first_msb_ + 1) + { + insert_range(first_msb_ + 1, second_msb_ - 1, 0, 0xff, data_); + } + + insert_range(second_msb_, second_msb_, 0, second_lsb_, data_); + } + } + + // 24 bit unicode + void slice_range(const input_range &range_, string_token_vector data_[3], + const std::true_type &) + { + const unsigned char first_msb_ = static_cast + ((range_.first >> 16) & 0xff); + const unsigned char first_mid_ = static_cast + ((range_.first >> 8) & 0xff); + const unsigned char first_lsb_ = static_cast + (range_.first & 0xff); + const unsigned char second_msb_ = static_cast + ((range_.second >> 16) & 0xff); + const unsigned char second_mid_ = static_cast + ((range_.second >> 8) & 0xff); + const unsigned char second_lsb_ = static_cast + (range_.second & 0xff); + + if (first_msb_ == second_msb_) + { + string_token_vector data2_[2]; + + // Re-use 16 bit slice function + slice_range(range_, data2_, std::false_type()); + + for (std::size_t i_ = 0, size_ = data2_[0].size(); + i_ < size_; ++i_) + { + insert_range(string_token(first_msb_, first_msb_), + *data2_[0][i_], *data2_[1][i_], data_); + } + } + else + { + insert_range(first_msb_, first_msb_, + first_mid_, first_mid_, + first_lsb_, 0xff, data_); + + if (first_mid_ != 0xff) + { + insert_range(first_msb_, first_msb_, + first_mid_ + 1, 0xff, + 0, 0xff, data_); + } + + if (second_msb_ > first_msb_ + 1) + { + insert_range(first_mid_ + 1, second_mid_ - 1, + 0, 0xff, + 0, 0xff, data_); + } + + if (second_mid_ != 0) + { + insert_range(second_msb_, second_msb_, + 0, second_mid_ - 1, + 0, 0xff, data_); + insert_range(second_msb_, second_msb_, + second_mid_, second_mid_, + 0, second_lsb_, data_); + } + else + { + insert_range(second_msb_, second_msb_, + 0, second_mid_, + 0, second_lsb_, data_); + } + } + } + + // 16 bit unicode + void insert_range(const unsigned char first_, const unsigned char second_, + const unsigned char first2_, const unsigned char second2_, + string_token_vector data_[2]) + { + const string_token token_(first_ > second_ ? second_ : first_, + first_ > second_ ? first_ : second_); + const string_token token2_(first2_ > second2_ ? second2_ : first2_, + first2_ > second2_ ? first2_ : second2_); + + insert_range(token_, token2_, data_); + } + + void insert_range(const string_token &token_, const string_token &token2_, + string_token_vector data_[2]) + { + typename string_token_vector::const_iterator iter_ = + std::find_if(data_[0].begin(), data_[0].end(), + [&token_](const std::unique_ptr &rhs_) + { + return token_ == *rhs_.get(); + }); + + if (iter_ == data_[0].end()) + { + data_[0].emplace_back(std::make_unique(token_)); + data_[1].emplace_back(std::make_unique(token2_)); + } + else + { + const std::size_t index_ = iter_ - data_[0].begin(); + + data_[1][index_]->insert(token2_); + } + } + + // 24 bit unicode + void insert_range(const unsigned char first_, const unsigned char second_, + const unsigned char first2_, const unsigned char second2_, + const unsigned char first3_, const unsigned char second3_, + string_token_vector data_[3]) + { + const string_token token_(first_ > second_ ? second_ : first_, + first_ > second_ ? first_ : second_); + const string_token token2_(first2_ > second2_ ? second2_ : first2_, + first2_ > second2_ ? first2_ : second2_); + const string_token token3_(first3_ > second3_ ? second3_ : first3_, + first3_ > second3_ ? first3_ : second3_); + + insert_range(token_, token2_, token3_, data_); + } + + void insert_range(const string_token &token_, const string_token &token2_, + const string_token &token3_, string_token_vector data_[3]) + { + auto iter_ = data_[0].cbegin(); + auto end_ = data_[0].cend(); + bool finished_ = false; + + do + { + iter_ = std::find_if(iter_, end_, + [&token_](const std::unique_ptr &rhs_) + { + return token_ == *rhs_.get(); + }); + + if (iter_ == end_) + { + data_[0].emplace_back(std::make_unique(token_)); + data_[1].emplace_back(std::make_unique(token2_)); + data_[2].emplace_back(std::make_unique(token3_)); + finished_ = true; + } + else + { + const std::size_t index_ = iter_ - data_[0].begin(); + + if (*data_[1][index_] == token2_) + { + data_[2][index_]->insert(token3_); + finished_ = true; + } + else + { + ++iter_; + } + } + } while (!finished_); + } + + // 16 bit unicode + void push_ranges(string_token_vector data_[2], const std::false_type &) + { + auto viter_ = data_[0].cbegin(); + auto vend_ = data_[0].cend(); + auto viter2_ = data_[1].cbegin(); + + push_range(viter_++->get()); + push_range(viter2_++->get()); + sequence(); + + while (viter_ != vend_) + { + push_range(viter_++->get()); + push_range(viter2_++->get()); + sequence(); + perform_or(); + } + } + + // 24 bit unicode + void push_ranges(string_token_vector data_[3], const std::true_type &) + { + auto viter_ = data_[0].cbegin(); + auto vend_ = data_[0].cend(); + auto viter2_ = data_[1].cbegin(); + auto viter3_ = data_[2].cbegin(); + + push_range(viter_++->get()); + push_range(viter2_++->get()); + sequence(); + push_range(viter3_++->get()); + sequence(); + + while (viter_ != vend_) + { + push_range(viter_++->get()); + push_range(viter2_++->get()); + sequence(); + push_range(viter3_++->get()); + sequence(); + perform_or(); + } + } + + void push_range(observer_ptr token_) + { + const id_type id_ = lookup(*token_); + + _node_ptr_vector.emplace_back(std::make_unique(id_, true)); + _tree_node_stack.push(_node_ptr_vector.back().get()); + } + + id_type lookup(const string_token &charset_) + { + // Converted to id_type below. + std::size_t id_ = sm_traits::npos(); + + if (static_cast(id_) < id_) + { + throw runtime_error("id_type is not large enough " + "to hold all ids."); + } + + typename charset_map::const_iterator iter_ = + _charset_map.find(charset_); + + if (iter_ == _charset_map.end()) + { + id_ = _charset_map.size(); + _charset_map.insert(charset_pair(charset_, + static_cast(id_))); + } + else + { + id_ = iter_->second; + } + + return static_cast(id_); + } + + void openparen(token_stack &handle_) + { + assert(handle_.top()->_type == OPENPAREN && + handle_.size() == 3); + + handle_.pop(); + assert(handle_.top()->_type == REGEX); + handle_.pop(); + assert(handle_.top()->_type == CLOSEPAREN); + _token_stack.emplace(std::make_unique(REPEAT)); + } + + void sequence() + { + observer_ptr rhs_ = _tree_node_stack.top(); + + _tree_node_stack.pop(); + + observer_ptr lhs_ = _tree_node_stack.top(); + + _node_ptr_vector.emplace_back + (std::make_unique(lhs_, rhs_)); + _tree_node_stack.top() = _node_ptr_vector.back().get(); + } + + void optional(const bool greedy_) + { + // perform ? + observer_ptr lhs_ = _tree_node_stack.top(); + // Don't know if lhs_ is a leaf_node, so get firstpos. + auto &firstpos_ = lhs_->firstpos(); + + for (observer_ptr node_ : firstpos_) + { + // These are leaf_nodes! + node_->greedy(greedy_); + } + + _node_ptr_vector.emplace_back(std::make_unique + (node::null_token(), greedy_)); + + observer_ptr rhs_ = _node_ptr_vector.back().get(); + + _node_ptr_vector.emplace_back + (std::make_unique(lhs_, rhs_)); + _tree_node_stack.top() = _node_ptr_vector.back().get(); + } + + void zero_or_more(const bool greedy_) + { + // perform * + observer_ptr ptr_ = _tree_node_stack.top(); + + _node_ptr_vector.emplace_back + (std::make_unique(ptr_, greedy_)); + _tree_node_stack.top() = _node_ptr_vector.back().get(); + } + + void one_or_more(const bool greedy_) + { + // perform + + observer_ptr lhs_ = _tree_node_stack.top(); + observer_ptr copy_ = lhs_->copy(_node_ptr_vector); + + _node_ptr_vector.emplace_back(std::make_unique + (copy_, greedy_)); + + observer_ptr rhs_ = _node_ptr_vector.back().get(); + + _node_ptr_vector.emplace_back + (std::make_unique(lhs_, rhs_)); + _tree_node_stack.top() = _node_ptr_vector.back().get(); + } + + // perform {n[,[m]]} + // Semantic checks have already been performed. + // {0,} = * + // {0,1} = ? + // {1,} = + + // therefore we do not check for these cases. + void repeatn(const bool greedy_, observer_ptr token_) + { + const rules_char_type *str_ = token_->_extra.c_str(); + std::size_t min_ = 0; + bool comma_ = false; + std::size_t max_ = 0; + + while (*str_>= '0' && *str_ <= '9') + { + min_ *= 10; + min_ += *str_ - '0'; + ++str_; + } + + comma_ = *str_ == ','; + + if (comma_) ++str_; + + while (*str_>= '0' && *str_ <= '9') + { + max_ *= 10; + max_ += *str_ - '0'; + ++str_; + } + + if (!(min_ == 1 && !comma_)) + { + const std::size_t top_ = min_ > 0 ? min_ : max_; + + if (min_ == 0) + { + optional(greedy_); + } + + observer_ptr prev_ = _tree_node_stack.top()-> + copy(_node_ptr_vector); + observer_ptr curr_ = nullptr; + + for (std::size_t i_ = 2; i_ < top_; ++i_) + { + curr_ = prev_->copy(_node_ptr_vector); + _tree_node_stack.push(prev_); + sequence(); + prev_ = curr_; + } + + if (comma_ && min_ > 0) + { + if (min_ > 1) + { + curr_ = prev_->copy(_node_ptr_vector); + _tree_node_stack.push(prev_); + sequence(); + prev_ = curr_; + } + + if (comma_ && max_) + { + _tree_node_stack.push(prev_); + optional(greedy_); + prev_ = _tree_node_stack.top(); + _tree_node_stack.pop(); + + const std::size_t count_ = max_ - min_; + + for (std::size_t i_ = 1; i_ < count_; ++i_) + { + curr_ = prev_->copy(_node_ptr_vector); + _tree_node_stack.push(prev_); + sequence(); + prev_ = curr_; + } + } + else + { + _tree_node_stack.push(prev_); + zero_or_more(greedy_); + prev_ = _tree_node_stack.top(); + _tree_node_stack.pop(); + } + } + + _tree_node_stack.push(prev_); + sequence(); + } + } + + void fixup_bol(observer_ptr &root_)const + { + const auto &first_ = root_->firstpos(); + bool found_ = false; + + for (observer_ptr node_ : first_) + { + found_ = !node_->end_state() && node_->token() == bol_token(); + + if (found_) break; + } + + if (!found_) + { + _node_ptr_vector.emplace_back + (std::make_unique(bol_token(), true)); + + observer_ptr lhs_ = _node_ptr_vector.back().get(); + + _node_ptr_vector.emplace_back + (std::make_unique(node::null_token(), true)); + + observer_ptr rhs_ = _node_ptr_vector.back().get(); + + _node_ptr_vector.emplace_back + (std::make_unique(lhs_, rhs_)); + lhs_ = _node_ptr_vector.back().get(); + + _node_ptr_vector.emplace_back + (std::make_unique(lhs_, root_)); + root_ = _node_ptr_vector.back().get(); + } + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp new file mode 100644 index 00000000..271a7b1a --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp @@ -0,0 +1,100 @@ +// re_token.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_RE_TOKEN_HPP +#define LEXERTL_RE_TOKEN_HPP + +#include "../../string_token.hpp" + +namespace lexertl +{ +namespace detail +{ +// Note that tokens following END are never seen by parser.hpp. +enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT, + DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT, + ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN, + END, DIFF}; + +template +struct basic_re_token +{ + using string_token = basic_string_token; + using string = std::basic_string; + + token_type _type; + string _extra; + string_token _str; + + basic_re_token(const token_type type_ = BEGIN) : + _type(type_), + _extra(), + _str() + { + } + + void clear() + { + _type = BEGIN; + _extra.clear(); + _str.clear(); + } + + void swap(basic_re_token &rhs_) + { + std::swap(_type, rhs_._type); + _extra.swap(rhs_._extra); + _str.swap(rhs_._str); + } + + char precedence(const token_type type_) const + { + // Moved in here for Solaris compiler. + static const char precedence_table_[END + 1][END + 1] = { +// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, BOL, EOL, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END +/*BEGIN*/{ ' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/*REGEX*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/*OREXP*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* SEQ */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* SUB */{ ' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/*EXPRE*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* RPT */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>' }, +/*DUPLI*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* | */{ ' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, +/*CHARA*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, +/* BOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, +/* EOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, +/*MACRO*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, +/* ( */{ ' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, +/* ) */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, +/* ? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* ?? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* * */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* *? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* + */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* +? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/*{n,m}*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/*{nm}?*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, +/* END */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' } +}; + + return precedence_table_[_type][type_]; + } + + const char *precedence_string() const + { + // Moved in here for Solaris compiler. + static const char *precedence_strings_[END + 1] = + {"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", + "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")", + "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"}; + + return precedence_strings_[_type]; + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp new file mode 100644 index 00000000..c7e1e52d --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp @@ -0,0 +1,778 @@ +// tokeniser.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_RE_TOKENISER_HPP +#define LEXERTL_RE_TOKENISER_HPP + +#include +#include "re_token.hpp" +#include "../../runtime_error.hpp" +#include +#include "../../string_token.hpp" +#include "re_tokeniser_helper.hpp" + +namespace lexertl +{ +namespace detail +{ +template +class basic_re_tokeniser +{ +public: + using re_token = basic_re_token; + using tokeniser_helper = + basic_re_tokeniser_helper; + using char_state = typename tokeniser_helper::char_state; + using state = typename tokeniser_helper::state; + using string_token = basic_string_token; + + static void next(re_token &lhs_, state &state_, re_token &token_) + { + rules_char_type ch_ = 0; + bool eos_ = state_.next(ch_); + bool skipped_ = false; + + token_.clear(); + + do + { + // string begin/end + while (!eos_ && ch_ == '"') + { + state_._in_string ^= 1; + eos_ = state_.next(ch_); + } + + if (eos_) break; + + // (?# ...) + skipped_ = comment(eos_, ch_, state_); + + if (eos_) break; + + // skip_ws set + skipped_ |= skip(eos_, ch_, state_); + } while (!eos_ && skipped_); + + if (eos_) + { + if (state_._in_string) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing '\"')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (state_._paren_count) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing ')')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + token_._type = END; + } + else + { + if (ch_ == '\\') + { + // Even if we are in a string, respect escape sequences... + token_._type = CHARSET; + escape(state_, token_._str); + } + else if (state_._in_string) + { + // All other meta characters lose their special meaning + // inside a string. + token_._type = CHARSET; + add_char(ch_, state_, token_._str); + } + else + { + // Not an escape sequence and not inside a string, so + // check for meta characters. + switch (ch_) + { + case '(': + token_._type = OPENPAREN; + ++state_._paren_count; + read_options(state_); + break; + case ')': + --state_._paren_count; + + if (state_._paren_count < 0) + { + std::ostringstream ss_; + + ss_ << "Number of open parenthesis < 0 " + "at index " << state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + token_._type = CLOSEPAREN; + + if (!state_._flags_stack.empty()) + { + state_._flags = state_._flags_stack.top(); + state_._flags_stack.pop(); + } + + break; + case '?': + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AOPT; + state_.increment(); + } + else + { + token_._type = OPT; + } + + break; + case '*': + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AZEROORMORE; + state_.increment(); + } + else + { + token_._type = ZEROORMORE; + } + + break; + case '+': + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AONEORMORE; + state_.increment(); + } + else + { + token_._type = ONEORMORE; + } + + break; + case '{': + open_curly(lhs_, state_, token_); + break; + case '|': + token_._type = OR; + break; + case '^': + if (!state_._macro_name && + state_._curr - 1 == state_._start) + { + token_._type = BOL; + } + else + { + token_._type = CHARSET; + token_._str.insert(range(ch_, ch_)); + } + + break; + case '$': + if (!state_._macro_name && state_._curr == state_._end) + { + token_._type = EOL; + } + else + { + token_._type = CHARSET; + token_._str.insert(range(ch_, ch_)); + } + + break; + case '.': + { + token_._type = CHARSET; + + if (state_._flags & dot_not_newline) + { + token_._str.insert(range('\n', '\n')); + } + else if (state_._flags & dot_not_cr_lf) + { + token_._str.insert(range('\n', '\n')); + token_._str.insert(range('\r', '\r')); + } + + token_._str.negate(); + break; + } + case '[': + { + token_._type = CHARSET; + tokeniser_helper::charset(state_, token_._str); + break; + } + case '/': + { + std::ostringstream ss_; + + ss_ << "Lookahead ('/') is not supported yet"; + state_.error(ss_); + throw runtime_error(ss_.str()); + break; + } + default: + token_._type = CHARSET; + add_char(ch_, state_, token_._str); + break; + } + } + } + } + +private: + using range = typename string_token::range; + + static bool comment(bool &eos_, rules_char_type &ch_, state &state_) + { + bool skipped_ = false; + + if (!state_._in_string && ch_ == '(' && !state_.eos() && + *state_._curr == '?' && state_._curr + 1 < state_._end && + *(state_._curr + 1) == '#') + { + std::size_t paren_count_ = 1; + + state_.increment(); + state_.increment(); + + do + { + eos_ = state_.next(ch_); + + if (ch_ == '(') + { + ++paren_count_; + } + else if (ch_ == ')') + { + --paren_count_; + } + } while (!eos_ && !(ch_ == ')' && paren_count_ == 0)); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (unterminated comment)"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + else + { + eos_ = state_.next(ch_); + } + + skipped_ = true; + } + + return skipped_; + } + + static bool skip(bool &eos_, rules_char_type &ch_, state &state_) + { + bool skipped_ = false; + + if ((state_._flags & skip_ws) && !state_._in_string) + { + bool c_comment_ = false; + bool skip_ws_ = false; + + do + { + c_comment_ = ch_ == '/' && !state_.eos() && + *state_._curr == '*'; + skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' || + ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v'); + + if (c_comment_) + { + state_.increment(); + eos_ = state_.next(ch_); + + while (!eos_ && !(ch_ == '*' && !state_.eos() && + *state_._curr == '/')) + { + eos_ = state_.next(ch_); + } + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (unterminated C style comment)"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + else + { + state_.increment(); + eos_ = state_.next(ch_); + } + + skipped_ = true; + } + else if (skip_ws_) + { + eos_ = state_.next(ch_); + skipped_ = true; + } + } while (!eos_ && (c_comment_ || skip_ws_)); + } + + return skipped_; + } + + static void read_options(state &state_) + { + if (!state_.eos() && *state_._curr == '?') + { + rules_char_type ch_ = 0; + bool eos_ = false; + bool negate_ = false; + + state_.increment(); + eos_ = state_.next(ch_); + state_._flags_stack.push(state_._flags); + + while (!eos_ && ch_ != ':') + { + switch (ch_) + { + case '-': + negate_ ^= 1; + break; + case 'i': + if (negate_) + { + state_._flags = state_._flags & ~icase; + } + else + { + state_._flags = state_._flags | icase; + } + + negate_ = false; + break; + case 's': + if (negate_) + { +#ifdef _WIN32 + state_._flags = state_._flags | dot_not_cr_lf; +#else + state_._flags = state_._flags | dot_not_newline; +#endif + } + else + { +#ifdef _WIN32 + state_._flags = state_._flags & ~dot_not_cr_lf; +#else + state_._flags = state_._flags & ~dot_not_newline; +#endif + } + + negate_ = false; + break; + case 'x': + if (negate_) + { + state_._flags = state_._flags & ~skip_ws; + } + else + { + state_._flags = state_._flags | skip_ws; + } + + negate_ = false; + break; + default: + { + std::ostringstream ss_; + + ss_ << "Unknown option at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + eos_ = state_.next(ch_); + } + + // End of string handler will handle early termination + } + else if (!state_._flags_stack.empty()) + { + state_._flags_stack.push(state_._flags); + } + } + + static void escape(state &state_, string_token &token_) + { + char_type ch_ = 0; + std::size_t str_len_ = 0; + const char *str_ = tokeniser_helper::escape_sequence(state_, + ch_, str_len_); + + if (str_) + { + char_state state2_(str_ + 1, str_ + str_len_, state_._id, + state_._flags, state_._locale, 0); + + tokeniser_helper::charset(state2_, token_); + } + else + { + add_char(ch_, state_, token_); + } + } + + static void add_char(const char_type ch_, const state &state_, + string_token &token_) + { + range range_(ch_, ch_); + + token_.insert(range_); + + if (state_._flags & icase) + { + string_token folded_; + + tokeniser_helper::fold(range_, state_._locale, + folded_, typename tokeniser_helper::template + size()); + + if (!folded_.empty()) + { + token_.insert(folded_); + } + } + } + + static void open_curly(re_token &lhs_, state &state_, + re_token &token_) + { + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing '}')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + else if (*state_._curr == '-' || *state_._curr == '+') + { + rules_char_type ch_ = 0; + + if (lhs_._type != CHARSET) + { + std::ostringstream ss_; + + ss_ << "CHARSET must precede {" << + state_._curr << "} at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + state_.next(ch_); + token_._type = DIFF; + token_._extra = ch_; + + if (state_.next(ch_)) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing '}')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + else if (*state_._curr >= '0' && *state_._curr <= '9') + { + repeat_n(state_, token_); + } + else + { + macro(state_, token_); + } + } + + // SYNTAX: + // {n[,[n]]} + // SEMANTIC RULES: + // {0} - INVALID (throw exception) + // {0,} = * + // {0,0} - INVALID (throw exception) + // {0,1} = ? + // {1,} = + + // {min,max} where min == max - {min} + // {min,max} where max < min - INVALID (throw exception) + static void repeat_n(state &state_, re_token &token_) + { + rules_char_type ch_ = 0; + bool eos_ = state_.next(ch_); + std::size_t min_ = 0; + std::size_t max_ = 0; + + while (!eos_ && ch_ >= '0' && ch_ <= '9') + { + min_ *= 10; + min_ += ch_ - '0'; + token_._extra += ch_; + eos_ = state_.next(ch_); + } + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing repeat terminator '}')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + bool min_max_ = false; + bool repeatn_ = true; + + if (ch_ == ',') + { + token_._extra += ch_; + eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing repeat terminator '}')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (ch_ == '}') + { + // Small optimisation: Check for '*' equivalency. + if (min_ == 0) + { + token_._type = ZEROORMORE; + repeatn_ = false; + } + // Small optimisation: Check for '+' equivalency. + else if (min_ == 1) + { + token_._type = ONEORMORE; + repeatn_ = false; + } + } + else + { + if (ch_ < '0' || ch_ > '9') + { + std::ostringstream ss_; + + ss_ << "Missing repeat terminator '}' at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + min_max_ = true; + + do + { + max_ *= 10; + max_ += ch_ - '0'; + token_._extra += ch_; + eos_ = state_.next(ch_); + } while (!eos_ && ch_ >= '0' && ch_ <= '9'); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing repeat terminator '}')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + // Small optimisation: Check for '?' equivalency. + if (min_ == 0 && max_ == 1) + { + token_._type = OPT; + repeatn_ = false; + } + // Small optimisation: if min == max, then min. + else if (min_ == max_) + { + token_._extra.erase(token_._extra.find(',')); + min_max_ = false; + max_ = 0; + } + } + } + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing repeat terminator '}' at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (repeatn_) + { + // SEMANTIC VALIDATION follows: + // NOTE: {0,} has already become * + // therefore we don't check for a comma. + if (min_ == 0 && max_ == 0) + { + std::ostringstream ss_; + + ss_ << "Cannot have exactly zero repeats preceding index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (min_max_ && max_ < min_) + { + std::ostringstream ss_; + + ss_ << "Max less than min preceding index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AREPEATN; + state_.increment(); + } + else + { + token_._type = REPEATN; + } + } + else if (token_._type == ZEROORMORE) + { + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AZEROORMORE; + state_.increment(); + } + } + else if (token_._type == ONEORMORE) + { + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AONEORMORE; + state_.increment(); + } + } + else if (token_._type == OPT) + { + if (!state_.eos() && *state_._curr == '?') + { + token_._type = AOPT; + state_.increment(); + } + } + } + + static void macro(state &state_, re_token &token_) + { + rules_char_type ch_ = 0; + bool eos_ = false; + + state_.next(ch_); + + if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') && + !(ch_ >= 'a' && ch_ <= 'z')) + { + std::ostringstream ss_; + + ss_ << "Invalid MACRO name at index " << state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + do + { + token_._extra += ch_; + eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing MACRO name terminator '}')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') || + (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9')); + + if (ch_ != '}') + { + std::ostringstream ss_; + + ss_ << "Missing MACRO name terminator '}' at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + token_._type = MACRO; + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp new file mode 100644 index 00000000..f9f40cfe --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp @@ -0,0 +1,3157 @@ +// tokeniser_helper.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_RE_TOKENISER_HELPER_HPP +#define LEXERTL_RE_TOKENISER_HELPER_HPP + +#include "../../char_traits.hpp" +// strlen() +#include +#include "re_tokeniser_state.hpp" +#include "../../runtime_error.hpp" +#include +#include "../../string_token.hpp" + +namespace lexertl +{ +namespace detail +{ +template > +class basic_re_tokeniser_helper +{ +public: + using char_state = basic_re_tokeniser_state; + using state = basic_re_tokeniser_state; + using string_token = basic_string_token; + using index_type = typename string_token::index_type; + using range = typename string_token::range; + + template + struct size + { + }; + + using one = size<1>; + using two = size<2>; + using four = size<4>; + + template + static const char *escape_sequence(state_type &state_, + char_type &ch_, std::size_t &str_len_) + { + bool eos_ = state_.eos(); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following '\\'"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + const char *str_ = charset_shortcut(state_, str_len_); + + if (str_) + { + state_.increment(); + } + else + { + ch_ = chr(state_); + } + + return str_; + } + + // This function can call itself. + template + static void charset(state_type &state_, string_token &token_) + { + bool negated_ = false; + typename state_type::char_type ch_ = 0; + bool eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following '['"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + negated_ = ch_ == '^'; + + if (negated_) + { + eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following '^'"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + bool chset_ = false; + typename string_token::char_type prev_ = 0; + + do + { + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + const char *str_ = escape_sequence(state_, prev_, + str_len_); + + chset_ = str_ != 0; + + if (chset_) + { + char_state temp_state_(str_ + 1, str_ + str_len_, + state_._id, state_._flags, state_._locale, 0); + string_token temp_token_; + + charset(temp_state_, temp_token_); + token_.insert(temp_token_); + } + } + else if (ch_ == '[' && !state_.eos() && *state_._curr == ':') + { + state_.increment(); + posix(state_, token_); + chset_ = true; + } + else + { + chset_ = false; + prev_ = ch_; + } + + eos_ = state_.next(ch_); + + // Covers preceding if, else if and else + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing ']')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (ch_ == '-' && *state_._curr != ']') + { + charset_range(chset_, state_, eos_, ch_, prev_, + token_); + } + else if (!chset_) + { + range range_(prev_, prev_); + + token_.insert(range_); + + if (state_._flags & icase) + { + string_token folded_; + + fold(range_, state_._locale, folded_, + size()); + + if (!folded_.empty()) + { + token_.insert(folded_); + } + } + } + } while (ch_ != ']'); + + if (negated_) + { + token_.negate(); + } + + if (token_.empty()) + { + std::ostringstream ss_; + + ss_ << "Empty charset not allowed preceding index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + static void fold(const range &range_, const std::locale &locale_, + string_token &out_, const one &) + { + // If string_token::char_type is 16 bit may overflow, + // so use std::size_t. + std::size_t start_ = range_.first; + std::size_t end_ = range_.second; + + // In 8 bit char mode, use locale and therefore consider every char + // individually. + for (; start_ <= end_; ++start_) + { + const input_char_type upper_ = std::toupper + (static_cast(start_), locale_); + const input_char_type lower_ = std::tolower + (static_cast(start_), locale_); + + if (upper_ != static_cast(start_)) + { + out_.insert(range(upper_, upper_)); + } + + if (lower_ != static_cast(start_)) + { + out_.insert(range(lower_, lower_)); + } + } + } + + // http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt + static void fold(const range &range_, const std::locale &, + string_token &out_, const two &) + { + static const fold_pair mapping_[] = + {{{0x0041, 0x005a}, {0x0061, 0x007a}}, + {{0x0061, 0x007a}, {0x0041, 0x005a}}, + {{0x00b5, 0x00b5}, {0x039c, 0x039c}}, + {{0x00c0, 0x00d6}, {0x00e0, 0x00f6}}, + {{0x00d8, 0x00de}, {0x00f8, 0x00fe}}, + {{0x00e0, 0x00f6}, {0x00c0, 0x00d6}}, + {{0x00f8, 0x00fe}, {0x00d8, 0x00de}}, + {{0x00ff, 0x00ff}, {0x0178, 0x0178}}, + {{0x0100, 0x0101}, {0x0101, 0x0100}}, + {{0x0102, 0x0103}, {0x0103, 0x0102}}, + {{0x0104, 0x0105}, {0x0105, 0x0104}}, + {{0x0106, 0x0107}, {0x0107, 0x0106}}, + {{0x0108, 0x0109}, {0x0109, 0x0108}}, + {{0x010a, 0x010b}, {0x010b, 0x010a}}, + {{0x010c, 0x010d}, {0x010d, 0x010c}}, + {{0x010e, 0x010f}, {0x010f, 0x010e}}, + {{0x0110, 0x0111}, {0x0111, 0x0110}}, + {{0x0112, 0x0113}, {0x0113, 0x0112}}, + {{0x0114, 0x0115}, {0x0115, 0x0114}}, + {{0x0116, 0x0117}, {0x0117, 0x0116}}, + {{0x0118, 0x0119}, {0x0119, 0x0118}}, + {{0x011a, 0x011b}, {0x011b, 0x011a}}, + {{0x011c, 0x011d}, {0x011d, 0x011c}}, + {{0x011e, 0x011f}, {0x011f, 0x011e}}, + {{0x0120, 0x0121}, {0x0121, 0x0120}}, + {{0x0122, 0x0123}, {0x0123, 0x0122}}, + {{0x0124, 0x0125}, {0x0125, 0x0124}}, + {{0x0126, 0x0127}, {0x0127, 0x0126}}, + {{0x0128, 0x0129}, {0x0129, 0x0128}}, + {{0x012a, 0x012b}, {0x012b, 0x012a}}, + {{0x012c, 0x012d}, {0x012d, 0x012c}}, + {{0x012e, 0x012f}, {0x012f, 0x012e}}, + {{0x0130, 0x0130}, {0x0069, 0x0069}}, + {{0x0131, 0x0131}, {0x0049, 0x0049}}, + {{0x0132, 0x0133}, {0x0133, 0x0132}}, + {{0x0134, 0x0135}, {0x0135, 0x0134}}, + {{0x0136, 0x0137}, {0x0137, 0x0136}}, + {{0x0139, 0x013a}, {0x013a, 0x0139}}, + {{0x013b, 0x013c}, {0x013c, 0x013b}}, + {{0x013d, 0x013e}, {0x013e, 0x013d}}, + {{0x013f, 0x0140}, {0x0140, 0x013f}}, + {{0x0141, 0x0142}, {0x0142, 0x0141}}, + {{0x0143, 0x0144}, {0x0144, 0x0143}}, + {{0x0145, 0x0146}, {0x0146, 0x0145}}, + {{0x0147, 0x0148}, {0x0148, 0x0147}}, + {{0x014a, 0x014b}, {0x014b, 0x014a}}, + {{0x014c, 0x014d}, {0x014d, 0x014c}}, + {{0x014e, 0x014f}, {0x014f, 0x014e}}, + {{0x0150, 0x0151}, {0x0151, 0x0150}}, + {{0x0152, 0x0153}, {0x0153, 0x0152}}, + {{0x0154, 0x0155}, {0x0155, 0x0154}}, + {{0x0156, 0x0157}, {0x0157, 0x0156}}, + {{0x0158, 0x0159}, {0x0159, 0x0158}}, + {{0x015a, 0x015b}, {0x015b, 0x015a}}, + {{0x015c, 0x015d}, {0x015d, 0x015c}}, + {{0x015e, 0x015f}, {0x015f, 0x015e}}, + {{0x0160, 0x0161}, {0x0161, 0x0160}}, + {{0x0162, 0x0163}, {0x0163, 0x0162}}, + {{0x0164, 0x0165}, {0x0165, 0x0164}}, + {{0x0166, 0x0167}, {0x0167, 0x0166}}, + {{0x0168, 0x0169}, {0x0169, 0x0168}}, + {{0x016a, 0x016b}, {0x016b, 0x016a}}, + {{0x016c, 0x016d}, {0x016d, 0x016c}}, + {{0x016e, 0x016f}, {0x016f, 0x016e}}, + {{0x0170, 0x0171}, {0x0171, 0x0170}}, + {{0x0172, 0x0173}, {0x0173, 0x0172}}, + {{0x0174, 0x0175}, {0x0175, 0x0174}}, + {{0x0176, 0x0177}, {0x0177, 0x0176}}, + {{0x0178, 0x0178}, {0x00ff, 0x00ff}}, + {{0x0179, 0x017a}, {0x017a, 0x0179}}, + {{0x017b, 0x017c}, {0x017c, 0x017b}}, + {{0x017d, 0x017e}, {0x017e, 0x017d}}, + {{0x017f, 0x017f}, {0x0053, 0x0053}}, + {{0x0180, 0x0180}, {0x0243, 0x0243}}, + {{0x0181, 0x0181}, {0x0253, 0x0253}}, + {{0x0182, 0x0183}, {0x0183, 0x0182}}, + {{0x0184, 0x0185}, {0x0185, 0x0184}}, + {{0x0186, 0x0186}, {0x0254, 0x0254}}, + {{0x0187, 0x0188}, {0x0188, 0x0187}}, + {{0x0189, 0x018a}, {0x0256, 0x0257}}, + {{0x018b, 0x018c}, {0x018c, 0x018b}}, + {{0x018e, 0x018e}, {0x01dd, 0x01dd}}, + {{0x018f, 0x018f}, {0x0259, 0x0259}}, + {{0x0190, 0x0190}, {0x025b, 0x025b}}, + {{0x0191, 0x0192}, {0x0192, 0x0191}}, + {{0x0193, 0x0193}, {0x0260, 0x0260}}, + {{0x0194, 0x0194}, {0x0263, 0x0263}}, + {{0x0195, 0x0195}, {0x01f6, 0x01f6}}, + {{0x0196, 0x0196}, {0x0269, 0x0269}}, + {{0x0197, 0x0197}, {0x0268, 0x0268}}, + {{0x0198, 0x0199}, {0x0199, 0x0198}}, + {{0x019a, 0x019a}, {0x023d, 0x023d}}, + {{0x019c, 0x019c}, {0x026f, 0x026f}}, + {{0x019d, 0x019d}, {0x0272, 0x0272}}, + {{0x019e, 0x019e}, {0x0220, 0x0220}}, + {{0x019f, 0x019f}, {0x0275, 0x0275}}, + {{0x01a0, 0x01a1}, {0x01a1, 0x01a0}}, + {{0x01a2, 0x01a3}, {0x01a3, 0x01a2}}, + {{0x01a4, 0x01a5}, {0x01a5, 0x01a4}}, + {{0x01a6, 0x01a6}, {0x0280, 0x0280}}, + {{0x01a7, 0x01a8}, {0x01a8, 0x01a7}}, + {{0x01a9, 0x01a9}, {0x0283, 0x0283}}, + {{0x01ac, 0x01ad}, {0x01ad, 0x01ac}}, + {{0x01ae, 0x01ae}, {0x0288, 0x0288}}, + {{0x01af, 0x01b0}, {0x01b0, 0x01af}}, + {{0x01b1, 0x01b2}, {0x028a, 0x028b}}, + {{0x01b3, 0x01b4}, {0x01b4, 0x01b3}}, + {{0x01b5, 0x01b6}, {0x01b6, 0x01b5}}, + {{0x01b7, 0x01b7}, {0x0292, 0x0292}}, + {{0x01b8, 0x01b9}, {0x01b9, 0x01b8}}, + {{0x01bc, 0x01bd}, {0x01bd, 0x01bc}}, + {{0x01bf, 0x01bf}, {0x01f7, 0x01f7}}, + {{0x01c4, 0x01c4}, {0x01c6, 0x01c6}}, + {{0x01c6, 0x01c6}, {0x01c4, 0x01c4}}, + {{0x01c7, 0x01c7}, {0x01c9, 0x01c9}}, + {{0x01c9, 0x01c9}, {0x01c7, 0x01c7}}, + {{0x01ca, 0x01ca}, {0x01cc, 0x01cc}}, + {{0x01cc, 0x01cc}, {0x01ca, 0x01ca}}, + {{0x01cd, 0x01ce}, {0x01ce, 0x01cd}}, + {{0x01cf, 0x01d0}, {0x01d0, 0x01cf}}, + {{0x01d1, 0x01d2}, {0x01d2, 0x01d1}}, + {{0x01d3, 0x01d4}, {0x01d4, 0x01d3}}, + {{0x01d5, 0x01d6}, {0x01d6, 0x01d5}}, + {{0x01d7, 0x01d8}, {0x01d8, 0x01d7}}, + {{0x01d9, 0x01da}, {0x01da, 0x01d9}}, + {{0x01db, 0x01dc}, {0x01dc, 0x01db}}, + {{0x01dd, 0x01dd}, {0x018e, 0x018e}}, + {{0x01de, 0x01df}, {0x01df, 0x01de}}, + {{0x01e0, 0x01e1}, {0x01e1, 0x01e0}}, + {{0x01e2, 0x01e3}, {0x01e3, 0x01e2}}, + {{0x01e4, 0x01e5}, {0x01e5, 0x01e4}}, + {{0x01e6, 0x01e7}, {0x01e7, 0x01e6}}, + {{0x01e8, 0x01e9}, {0x01e9, 0x01e8}}, + {{0x01ea, 0x01eb}, {0x01eb, 0x01ea}}, + {{0x01ec, 0x01ed}, {0x01ed, 0x01ec}}, + {{0x01ee, 0x01ef}, {0x01ef, 0x01ee}}, + {{0x01f1, 0x01f1}, {0x01f3, 0x01f3}}, + {{0x01f3, 0x01f3}, {0x01f1, 0x01f1}}, + {{0x01f4, 0x01f5}, {0x01f5, 0x01f4}}, + {{0x01f6, 0x01f6}, {0x0195, 0x0195}}, + {{0x01f7, 0x01f7}, {0x01bf, 0x01bf}}, + {{0x01f8, 0x01f9}, {0x01f9, 0x01f8}}, + {{0x01fa, 0x01fb}, {0x01fb, 0x01fa}}, + {{0x01fc, 0x01fd}, {0x01fd, 0x01fc}}, + {{0x01fe, 0x01ff}, {0x01ff, 0x01fe}}, + {{0x0200, 0x0201}, {0x0201, 0x0200}}, + {{0x0202, 0x0203}, {0x0203, 0x0202}}, + {{0x0204, 0x0205}, {0x0205, 0x0204}}, + {{0x0206, 0x0207}, {0x0207, 0x0206}}, + {{0x0208, 0x0209}, {0x0209, 0x0208}}, + {{0x020a, 0x020b}, {0x020b, 0x020a}}, + {{0x020c, 0x020d}, {0x020d, 0x020c}}, + {{0x020e, 0x020f}, {0x020f, 0x020e}}, + {{0x0210, 0x0211}, {0x0211, 0x0210}}, + {{0x0212, 0x0213}, {0x0213, 0x0212}}, + {{0x0214, 0x0215}, {0x0215, 0x0214}}, + {{0x0216, 0x0217}, {0x0217, 0x0216}}, + {{0x0218, 0x0219}, {0x0219, 0x0218}}, + {{0x021a, 0x021b}, {0x021b, 0x021a}}, + {{0x021c, 0x021d}, {0x021d, 0x021c}}, + {{0x021e, 0x021f}, {0x021f, 0x021e}}, + {{0x0220, 0x0220}, {0x019e, 0x019e}}, + {{0x0222, 0x0223}, {0x0223, 0x0222}}, + {{0x0224, 0x0225}, {0x0225, 0x0224}}, + {{0x0226, 0x0227}, {0x0227, 0x0226}}, + {{0x0228, 0x0229}, {0x0229, 0x0228}}, + {{0x022a, 0x022b}, {0x022b, 0x022a}}, + {{0x022c, 0x022d}, {0x022d, 0x022c}}, + {{0x022e, 0x022f}, {0x022f, 0x022e}}, + {{0x0230, 0x0231}, {0x0231, 0x0230}}, + {{0x0232, 0x0233}, {0x0233, 0x0232}}, + {{0x023a, 0x023a}, {0x2c65, 0x2c65}}, + {{0x023b, 0x023c}, {0x023c, 0x023b}}, + {{0x023d, 0x023d}, {0x019a, 0x019a}}, + {{0x023e, 0x023e}, {0x2c66, 0x2c66}}, + {{0x023f, 0x0240}, {0x2c7e, 0x2c7f}}, + {{0x0241, 0x0242}, {0x0242, 0x0241}}, + {{0x0243, 0x0243}, {0x0180, 0x0180}}, + {{0x0244, 0x0244}, {0x0289, 0x0289}}, + {{0x0245, 0x0245}, {0x028c, 0x028c}}, + {{0x0246, 0x0247}, {0x0247, 0x0246}}, + {{0x0248, 0x0249}, {0x0249, 0x0248}}, + {{0x024a, 0x024b}, {0x024b, 0x024a}}, + {{0x024c, 0x024d}, {0x024d, 0x024c}}, + {{0x024e, 0x024f}, {0x024f, 0x024e}}, + {{0x0250, 0x0250}, {0x2c6f, 0x2c6f}}, + {{0x0251, 0x0251}, {0x2c6d, 0x2c6d}}, + {{0x0252, 0x0252}, {0x2c70, 0x2c70}}, + {{0x0253, 0x0253}, {0x0181, 0x0181}}, + {{0x0254, 0x0254}, {0x0186, 0x0186}}, + {{0x0256, 0x0257}, {0x0189, 0x018a}}, + {{0x0259, 0x0259}, {0x018f, 0x018f}}, + {{0x025b, 0x025b}, {0x0190, 0x0190}}, + {{0x025c, 0x025c}, {0xa7ab, 0xa7ab}}, + {{0x0260, 0x0260}, {0x0193, 0x0193}}, + {{0x0261, 0x0261}, {0xa7ac, 0xa7ac}}, + {{0x0263, 0x0263}, {0x0194, 0x0194}}, + {{0x0265, 0x0265}, {0xa78d, 0xa78d}}, + {{0x0266, 0x0266}, {0xa7aa, 0xa7aa}}, + {{0x0268, 0x0269}, {0x0197, 0x0196}}, + {{0x026b, 0x026b}, {0x2c62, 0x2c62}}, + {{0x026c, 0x026c}, {0xa7ad, 0xa7ad}}, + {{0x026f, 0x026f}, {0x019c, 0x019c}}, + {{0x0271, 0x0271}, {0x2c6e, 0x2c6e}}, + {{0x0272, 0x0272}, {0x019d, 0x019d}}, + {{0x0275, 0x0275}, {0x019f, 0x019f}}, + {{0x027d, 0x027d}, {0x2c64, 0x2c64}}, + {{0x0280, 0x0280}, {0x01a6, 0x01a6}}, + {{0x0283, 0x0283}, {0x01a9, 0x01a9}}, + {{0x0287, 0x0287}, {0xa7b1, 0xa7b1}}, + {{0x0288, 0x0288}, {0x01ae, 0x01ae}}, + {{0x0289, 0x0289}, {0x0244, 0x0244}}, + {{0x028a, 0x028b}, {0x01b1, 0x01b2}}, + {{0x028c, 0x028c}, {0x0245, 0x0245}}, + {{0x0292, 0x0292}, {0x01b7, 0x01b7}}, + {{0x029d, 0x029d}, {0xa7b2, 0xa7b2}}, + {{0x029e, 0x029e}, {0xa7b0, 0xa7b0}}, + {{0x0370, 0x0371}, {0x0371, 0x0370}}, + {{0x0372, 0x0373}, {0x0373, 0x0372}}, + {{0x0376, 0x0377}, {0x0377, 0x0376}}, + {{0x037b, 0x037d}, {0x03fd, 0x03ff}}, + {{0x037f, 0x037f}, {0x03f3, 0x03f3}}, + {{0x0386, 0x0386}, {0x03ac, 0x03ac}}, + {{0x0388, 0x038a}, {0x03ad, 0x03af}}, + {{0x038c, 0x038c}, {0x03cc, 0x03cc}}, + {{0x038e, 0x038f}, {0x03cd, 0x03ce}}, + {{0x0391, 0x03a1}, {0x03b1, 0x03c1}}, + {{0x03a3, 0x03ab}, {0x03c3, 0x03cb}}, + {{0x03ac, 0x03ac}, {0x0386, 0x0386}}, + {{0x03ad, 0x03af}, {0x0388, 0x038a}}, + {{0x03b1, 0x03c1}, {0x0391, 0x03a1}}, + {{0x03c2, 0x03c2}, {0x03a3, 0x03a3}}, + {{0x03c3, 0x03cb}, {0x03a3, 0x03ab}}, + {{0x03cc, 0x03cc}, {0x038c, 0x038c}}, + {{0x03cd, 0x03ce}, {0x038e, 0x038f}}, + {{0x03cf, 0x03cf}, {0x03d7, 0x03d7}}, + {{0x03d0, 0x03d0}, {0x0392, 0x0392}}, + {{0x03d1, 0x03d1}, {0x0398, 0x0398}}, + {{0x03d5, 0x03d5}, {0x03a6, 0x03a6}}, + {{0x03d6, 0x03d6}, {0x03a0, 0x03a0}}, + {{0x03d7, 0x03d7}, {0x03cf, 0x03cf}}, + {{0x03d8, 0x03d9}, {0x03d9, 0x03d8}}, + {{0x03da, 0x03db}, {0x03db, 0x03da}}, + {{0x03dc, 0x03dd}, {0x03dd, 0x03dc}}, + {{0x03de, 0x03df}, {0x03df, 0x03de}}, + {{0x03e0, 0x03e1}, {0x03e1, 0x03e0}}, + {{0x03e2, 0x03e3}, {0x03e3, 0x03e2}}, + {{0x03e4, 0x03e5}, {0x03e5, 0x03e4}}, + {{0x03e6, 0x03e7}, {0x03e7, 0x03e6}}, + {{0x03e8, 0x03e9}, {0x03e9, 0x03e8}}, + {{0x03ea, 0x03eb}, {0x03eb, 0x03ea}}, + {{0x03ec, 0x03ed}, {0x03ed, 0x03ec}}, + {{0x03ee, 0x03ef}, {0x03ef, 0x03ee}}, + {{0x03f0, 0x03f0}, {0x039a, 0x039a}}, + {{0x03f1, 0x03f1}, {0x03a1, 0x03a1}}, + {{0x03f2, 0x03f2}, {0x03f9, 0x03f9}}, + {{0x03f3, 0x03f3}, {0x037f, 0x037f}}, + {{0x03f4, 0x03f4}, {0x03b8, 0x03b8}}, + {{0x03f5, 0x03f5}, {0x0395, 0x0395}}, + {{0x03f7, 0x03f8}, {0x03f8, 0x03f7}}, + {{0x03f9, 0x03f9}, {0x03f2, 0x03f2}}, + {{0x03fa, 0x03fb}, {0x03fb, 0x03fa}}, + {{0x03fd, 0x03ff}, {0x037b, 0x037d}}, + {{0x0400, 0x040f}, {0x0450, 0x045f}}, + {{0x0410, 0x042f}, {0x0430, 0x044f}}, + {{0x0430, 0x044f}, {0x0410, 0x042f}}, + {{0x0450, 0x045f}, {0x0400, 0x040f}}, + {{0x0460, 0x0461}, {0x0461, 0x0460}}, + {{0x0462, 0x0463}, {0x0463, 0x0462}}, + {{0x0464, 0x0465}, {0x0465, 0x0464}}, + {{0x0466, 0x0467}, {0x0467, 0x0466}}, + {{0x0468, 0x0469}, {0x0469, 0x0468}}, + {{0x046a, 0x046b}, {0x046b, 0x046a}}, + {{0x046c, 0x046d}, {0x046d, 0x046c}}, + {{0x046e, 0x046f}, {0x046f, 0x046e}}, + {{0x0470, 0x0471}, {0x0471, 0x0470}}, + {{0x0472, 0x0473}, {0x0473, 0x0472}}, + {{0x0474, 0x0475}, {0x0475, 0x0474}}, + {{0x0476, 0x0477}, {0x0477, 0x0476}}, + {{0x0478, 0x0479}, {0x0479, 0x0478}}, + {{0x047a, 0x047b}, {0x047b, 0x047a}}, + {{0x047c, 0x047d}, {0x047d, 0x047c}}, + {{0x047e, 0x047f}, {0x047f, 0x047e}}, + {{0x0480, 0x0481}, {0x0481, 0x0480}}, + {{0x048a, 0x048b}, {0x048b, 0x048a}}, + {{0x048c, 0x048d}, {0x048d, 0x048c}}, + {{0x048e, 0x048f}, {0x048f, 0x048e}}, + {{0x0490, 0x0491}, {0x0491, 0x0490}}, + {{0x0492, 0x0493}, {0x0493, 0x0492}}, + {{0x0494, 0x0495}, {0x0495, 0x0494}}, + {{0x0496, 0x0497}, {0x0497, 0x0496}}, + {{0x0498, 0x0499}, {0x0499, 0x0498}}, + {{0x049a, 0x049b}, {0x049b, 0x049a}}, + {{0x049c, 0x049d}, {0x049d, 0x049c}}, + {{0x049e, 0x049f}, {0x049f, 0x049e}}, + {{0x04a0, 0x04a1}, {0x04a1, 0x04a0}}, + {{0x04a2, 0x04a3}, {0x04a3, 0x04a2}}, + {{0x04a4, 0x04a5}, {0x04a5, 0x04a4}}, + {{0x04a6, 0x04a7}, {0x04a7, 0x04a6}}, + {{0x04a8, 0x04a9}, {0x04a9, 0x04a8}}, + {{0x04aa, 0x04ab}, {0x04ab, 0x04aa}}, + {{0x04ac, 0x04ad}, {0x04ad, 0x04ac}}, + {{0x04ae, 0x04af}, {0x04af, 0x04ae}}, + {{0x04b0, 0x04b1}, {0x04b1, 0x04b0}}, + {{0x04b2, 0x04b3}, {0x04b3, 0x04b2}}, + {{0x04b4, 0x04b5}, {0x04b5, 0x04b4}}, + {{0x04b6, 0x04b7}, {0x04b7, 0x04b6}}, + {{0x04b8, 0x04b9}, {0x04b9, 0x04b8}}, + {{0x04ba, 0x04bb}, {0x04bb, 0x04ba}}, + {{0x04bc, 0x04bd}, {0x04bd, 0x04bc}}, + {{0x04be, 0x04bf}, {0x04bf, 0x04be}}, + {{0x04c0, 0x04c0}, {0x04cf, 0x04cf}}, + {{0x04c1, 0x04c2}, {0x04c2, 0x04c1}}, + {{0x04c3, 0x04c4}, {0x04c4, 0x04c3}}, + {{0x04c5, 0x04c6}, {0x04c6, 0x04c5}}, + {{0x04c7, 0x04c8}, {0x04c8, 0x04c7}}, + {{0x04c9, 0x04ca}, {0x04ca, 0x04c9}}, + {{0x04cb, 0x04cc}, {0x04cc, 0x04cb}}, + {{0x04cd, 0x04ce}, {0x04ce, 0x04cd}}, + {{0x04cf, 0x04cf}, {0x04c0, 0x04c0}}, + {{0x04d0, 0x04d1}, {0x04d1, 0x04d0}}, + {{0x04d2, 0x04d3}, {0x04d3, 0x04d2}}, + {{0x04d4, 0x04d5}, {0x04d5, 0x04d4}}, + {{0x04d6, 0x04d7}, {0x04d7, 0x04d6}}, + {{0x04d8, 0x04d9}, {0x04d9, 0x04d8}}, + {{0x04da, 0x04db}, {0x04db, 0x04da}}, + {{0x04dc, 0x04dd}, {0x04dd, 0x04dc}}, + {{0x04de, 0x04df}, {0x04df, 0x04de}}, + {{0x04e0, 0x04e1}, {0x04e1, 0x04e0}}, + {{0x04e2, 0x04e3}, {0x04e3, 0x04e2}}, + {{0x04e4, 0x04e5}, {0x04e5, 0x04e4}}, + {{0x04e6, 0x04e7}, {0x04e7, 0x04e6}}, + {{0x04e8, 0x04e9}, {0x04e9, 0x04e8}}, + {{0x04ea, 0x04eb}, {0x04eb, 0x04ea}}, + {{0x04ec, 0x04ed}, {0x04ed, 0x04ec}}, + {{0x04ee, 0x04ef}, {0x04ef, 0x04ee}}, + {{0x04f0, 0x04f1}, {0x04f1, 0x04f0}}, + {{0x04f2, 0x04f3}, {0x04f3, 0x04f2}}, + {{0x04f4, 0x04f5}, {0x04f5, 0x04f4}}, + {{0x04f6, 0x04f7}, {0x04f7, 0x04f6}}, + {{0x04f8, 0x04f9}, {0x04f9, 0x04f8}}, + {{0x04fa, 0x04fb}, {0x04fb, 0x04fa}}, + {{0x04fc, 0x04fd}, {0x04fd, 0x04fc}}, + {{0x04fe, 0x04ff}, {0x04ff, 0x04fe}}, + {{0x0500, 0x0501}, {0x0501, 0x0500}}, + {{0x0502, 0x0503}, {0x0503, 0x0502}}, + {{0x0504, 0x0505}, {0x0505, 0x0504}}, + {{0x0506, 0x0507}, {0x0507, 0x0506}}, + {{0x0508, 0x0509}, {0x0509, 0x0508}}, + {{0x050a, 0x050b}, {0x050b, 0x050a}}, + {{0x050c, 0x050d}, {0x050d, 0x050c}}, + {{0x050e, 0x050f}, {0x050f, 0x050e}}, + {{0x0510, 0x0511}, {0x0511, 0x0510}}, + {{0x0512, 0x0513}, {0x0513, 0x0512}}, + {{0x0514, 0x0515}, {0x0515, 0x0514}}, + {{0x0516, 0x0517}, {0x0517, 0x0516}}, + {{0x0518, 0x0519}, {0x0519, 0x0518}}, + {{0x051a, 0x051b}, {0x051b, 0x051a}}, + {{0x051c, 0x051d}, {0x051d, 0x051c}}, + {{0x051e, 0x051f}, {0x051f, 0x051e}}, + {{0x0520, 0x0521}, {0x0521, 0x0520}}, + {{0x0522, 0x0523}, {0x0523, 0x0522}}, + {{0x0524, 0x0525}, {0x0525, 0x0524}}, + {{0x0526, 0x0527}, {0x0527, 0x0526}}, + {{0x0528, 0x0529}, {0x0529, 0x0528}}, + {{0x052a, 0x052b}, {0x052b, 0x052a}}, + {{0x052c, 0x052d}, {0x052d, 0x052c}}, + {{0x052e, 0x052f}, {0x052f, 0x052e}}, + {{0x0531, 0x0556}, {0x0561, 0x0586}}, + {{0x0561, 0x0586}, {0x0531, 0x0556}}, + {{0x10a0, 0x10c5}, {0x2d00, 0x2d25}}, + {{0x10c7, 0x10c7}, {0x2d27, 0x2d27}}, + {{0x10cd, 0x10cd}, {0x2d2d, 0x2d2d}}, + {{0x13a0, 0x13ef}, {0xab70, 0xabbf}}, + {{0x13f0, 0x13f5}, {0x13f8, 0x13fd}}, + {{0x13f8, 0x13fd}, {0x13f0, 0x13f5}}, + {{0x1d79, 0x1d79}, {0xa77d, 0xa77d}}, + {{0x1d7d, 0x1d7d}, {0x2c63, 0x2c63}}, + {{0x1e00, 0x1e01}, {0x1e01, 0x1e00}}, + {{0x1e02, 0x1e03}, {0x1e03, 0x1e02}}, + {{0x1e04, 0x1e05}, {0x1e05, 0x1e04}}, + {{0x1e06, 0x1e07}, {0x1e07, 0x1e06}}, + {{0x1e08, 0x1e09}, {0x1e09, 0x1e08}}, + {{0x1e0a, 0x1e0b}, {0x1e0b, 0x1e0a}}, + {{0x1e0c, 0x1e0d}, {0x1e0d, 0x1e0c}}, + {{0x1e0e, 0x1e0f}, {0x1e0f, 0x1e0e}}, + {{0x1e10, 0x1e11}, {0x1e11, 0x1e10}}, + {{0x1e12, 0x1e13}, {0x1e13, 0x1e12}}, + {{0x1e14, 0x1e15}, {0x1e15, 0x1e14}}, + {{0x1e16, 0x1e17}, {0x1e17, 0x1e16}}, + {{0x1e18, 0x1e19}, {0x1e19, 0x1e18}}, + {{0x1e1a, 0x1e1b}, {0x1e1b, 0x1e1a}}, + {{0x1e1c, 0x1e1d}, {0x1e1d, 0x1e1c}}, + {{0x1e1e, 0x1e1f}, {0x1e1f, 0x1e1e}}, + {{0x1e20, 0x1e21}, {0x1e21, 0x1e20}}, + {{0x1e22, 0x1e23}, {0x1e23, 0x1e22}}, + {{0x1e24, 0x1e25}, {0x1e25, 0x1e24}}, + {{0x1e26, 0x1e27}, {0x1e27, 0x1e26}}, + {{0x1e28, 0x1e29}, {0x1e29, 0x1e28}}, + {{0x1e2a, 0x1e2b}, {0x1e2b, 0x1e2a}}, + {{0x1e2c, 0x1e2d}, {0x1e2d, 0x1e2c}}, + {{0x1e2e, 0x1e2f}, {0x1e2f, 0x1e2e}}, + {{0x1e30, 0x1e31}, {0x1e31, 0x1e30}}, + {{0x1e32, 0x1e33}, {0x1e33, 0x1e32}}, + {{0x1e34, 0x1e35}, {0x1e35, 0x1e34}}, + {{0x1e36, 0x1e37}, {0x1e37, 0x1e36}}, + {{0x1e38, 0x1e39}, {0x1e39, 0x1e38}}, + {{0x1e3a, 0x1e3b}, {0x1e3b, 0x1e3a}}, + {{0x1e3c, 0x1e3d}, {0x1e3d, 0x1e3c}}, + {{0x1e3e, 0x1e3f}, {0x1e3f, 0x1e3e}}, + {{0x1e40, 0x1e41}, {0x1e41, 0x1e40}}, + {{0x1e42, 0x1e43}, {0x1e43, 0x1e42}}, + {{0x1e44, 0x1e45}, {0x1e45, 0x1e44}}, + {{0x1e46, 0x1e47}, {0x1e47, 0x1e46}}, + {{0x1e48, 0x1e49}, {0x1e49, 0x1e48}}, + {{0x1e4a, 0x1e4b}, {0x1e4b, 0x1e4a}}, + {{0x1e4c, 0x1e4d}, {0x1e4d, 0x1e4c}}, + {{0x1e4e, 0x1e4f}, {0x1e4f, 0x1e4e}}, + {{0x1e50, 0x1e51}, {0x1e51, 0x1e50}}, + {{0x1e52, 0x1e53}, {0x1e53, 0x1e52}}, + {{0x1e54, 0x1e55}, {0x1e55, 0x1e54}}, + {{0x1e56, 0x1e57}, {0x1e57, 0x1e56}}, + {{0x1e58, 0x1e59}, {0x1e59, 0x1e58}}, + {{0x1e5a, 0x1e5b}, {0x1e5b, 0x1e5a}}, + {{0x1e5c, 0x1e5d}, {0x1e5d, 0x1e5c}}, + {{0x1e5e, 0x1e5f}, {0x1e5f, 0x1e5e}}, + {{0x1e60, 0x1e61}, {0x1e61, 0x1e60}}, + {{0x1e62, 0x1e63}, {0x1e63, 0x1e62}}, + {{0x1e64, 0x1e65}, {0x1e65, 0x1e64}}, + {{0x1e66, 0x1e67}, {0x1e67, 0x1e66}}, + {{0x1e68, 0x1e69}, {0x1e69, 0x1e68}}, + {{0x1e6a, 0x1e6b}, {0x1e6b, 0x1e6a}}, + {{0x1e6c, 0x1e6d}, {0x1e6d, 0x1e6c}}, + {{0x1e6e, 0x1e6f}, {0x1e6f, 0x1e6e}}, + {{0x1e70, 0x1e71}, {0x1e71, 0x1e70}}, + {{0x1e72, 0x1e73}, {0x1e73, 0x1e72}}, + {{0x1e74, 0x1e75}, {0x1e75, 0x1e74}}, + {{0x1e76, 0x1e77}, {0x1e77, 0x1e76}}, + {{0x1e78, 0x1e79}, {0x1e79, 0x1e78}}, + {{0x1e7a, 0x1e7b}, {0x1e7b, 0x1e7a}}, + {{0x1e7c, 0x1e7d}, {0x1e7d, 0x1e7c}}, + {{0x1e7e, 0x1e7f}, {0x1e7f, 0x1e7e}}, + {{0x1e80, 0x1e81}, {0x1e81, 0x1e80}}, + {{0x1e82, 0x1e83}, {0x1e83, 0x1e82}}, + {{0x1e84, 0x1e85}, {0x1e85, 0x1e84}}, + {{0x1e86, 0x1e87}, {0x1e87, 0x1e86}}, + {{0x1e88, 0x1e89}, {0x1e89, 0x1e88}}, + {{0x1e8a, 0x1e8b}, {0x1e8b, 0x1e8a}}, + {{0x1e8c, 0x1e8d}, {0x1e8d, 0x1e8c}}, + {{0x1e8e, 0x1e8f}, {0x1e8f, 0x1e8e}}, + {{0x1e90, 0x1e91}, {0x1e91, 0x1e90}}, + {{0x1e92, 0x1e93}, {0x1e93, 0x1e92}}, + {{0x1e94, 0x1e95}, {0x1e95, 0x1e94}}, + {{0x1e9b, 0x1e9b}, {0x1e60, 0x1e60}}, + {{0x1e9e, 0x1e9e}, {0x00df, 0x00df}}, + {{0x1ea0, 0x1ea1}, {0x1ea1, 0x1ea0}}, + {{0x1ea2, 0x1ea3}, {0x1ea3, 0x1ea2}}, + {{0x1ea4, 0x1ea5}, {0x1ea5, 0x1ea4}}, + {{0x1ea6, 0x1ea7}, {0x1ea7, 0x1ea6}}, + {{0x1ea8, 0x1ea9}, {0x1ea9, 0x1ea8}}, + {{0x1eaa, 0x1eab}, {0x1eab, 0x1eaa}}, + {{0x1eac, 0x1ead}, {0x1ead, 0x1eac}}, + {{0x1eae, 0x1eaf}, {0x1eaf, 0x1eae}}, + {{0x1eb0, 0x1eb1}, {0x1eb1, 0x1eb0}}, + {{0x1eb2, 0x1eb3}, {0x1eb3, 0x1eb2}}, + {{0x1eb4, 0x1eb5}, {0x1eb5, 0x1eb4}}, + {{0x1eb6, 0x1eb7}, {0x1eb7, 0x1eb6}}, + {{0x1eb8, 0x1eb9}, {0x1eb9, 0x1eb8}}, + {{0x1eba, 0x1ebb}, {0x1ebb, 0x1eba}}, + {{0x1ebc, 0x1ebd}, {0x1ebd, 0x1ebc}}, + {{0x1ebe, 0x1ebf}, {0x1ebf, 0x1ebe}}, + {{0x1ec0, 0x1ec1}, {0x1ec1, 0x1ec0}}, + {{0x1ec2, 0x1ec3}, {0x1ec3, 0x1ec2}}, + {{0x1ec4, 0x1ec5}, {0x1ec5, 0x1ec4}}, + {{0x1ec6, 0x1ec7}, {0x1ec7, 0x1ec6}}, + {{0x1ec8, 0x1ec9}, {0x1ec9, 0x1ec8}}, + {{0x1eca, 0x1ecb}, {0x1ecb, 0x1eca}}, + {{0x1ecc, 0x1ecd}, {0x1ecd, 0x1ecc}}, + {{0x1ece, 0x1ecf}, {0x1ecf, 0x1ece}}, + {{0x1ed0, 0x1ed1}, {0x1ed1, 0x1ed0}}, + {{0x1ed2, 0x1ed3}, {0x1ed3, 0x1ed2}}, + {{0x1ed4, 0x1ed5}, {0x1ed5, 0x1ed4}}, + {{0x1ed6, 0x1ed7}, {0x1ed7, 0x1ed6}}, + {{0x1ed8, 0x1ed9}, {0x1ed9, 0x1ed8}}, + {{0x1eda, 0x1edb}, {0x1edb, 0x1eda}}, + {{0x1edc, 0x1edd}, {0x1edd, 0x1edc}}, + {{0x1ede, 0x1edf}, {0x1edf, 0x1ede}}, + {{0x1ee0, 0x1ee1}, {0x1ee1, 0x1ee0}}, + {{0x1ee2, 0x1ee3}, {0x1ee3, 0x1ee2}}, + {{0x1ee4, 0x1ee5}, {0x1ee5, 0x1ee4}}, + {{0x1ee6, 0x1ee7}, {0x1ee7, 0x1ee6}}, + {{0x1ee8, 0x1ee9}, {0x1ee9, 0x1ee8}}, + {{0x1eea, 0x1eeb}, {0x1eeb, 0x1eea}}, + {{0x1eec, 0x1eed}, {0x1eed, 0x1eec}}, + {{0x1eee, 0x1eef}, {0x1eef, 0x1eee}}, + {{0x1ef0, 0x1ef1}, {0x1ef1, 0x1ef0}}, + {{0x1ef2, 0x1ef3}, {0x1ef3, 0x1ef2}}, + {{0x1ef4, 0x1ef5}, {0x1ef5, 0x1ef4}}, + {{0x1ef6, 0x1ef7}, {0x1ef7, 0x1ef6}}, + {{0x1ef8, 0x1ef9}, {0x1ef9, 0x1ef8}}, + {{0x1efa, 0x1efb}, {0x1efb, 0x1efa}}, + {{0x1efc, 0x1efd}, {0x1efd, 0x1efc}}, + {{0x1efe, 0x1eff}, {0x1eff, 0x1efe}}, + {{0x1f00, 0x1f07}, {0x1f08, 0x1f0f}}, + {{0x1f08, 0x1f0f}, {0x1f00, 0x1f07}}, + {{0x1f10, 0x1f15}, {0x1f18, 0x1f1d}}, + {{0x1f18, 0x1f1d}, {0x1f10, 0x1f15}}, + {{0x1f20, 0x1f27}, {0x1f28, 0x1f2f}}, + {{0x1f28, 0x1f2f}, {0x1f20, 0x1f27}}, + {{0x1f30, 0x1f37}, {0x1f38, 0x1f3f}}, + {{0x1f38, 0x1f3f}, {0x1f30, 0x1f37}}, + {{0x1f40, 0x1f45}, {0x1f48, 0x1f4d}}, + {{0x1f48, 0x1f4d}, {0x1f40, 0x1f45}}, + {{0x1f51, 0x1f51}, {0x1f59, 0x1f59}}, + {{0x1f53, 0x1f53}, {0x1f5b, 0x1f5b}}, + {{0x1f55, 0x1f55}, {0x1f5d, 0x1f5d}}, + {{0x1f57, 0x1f57}, {0x1f5f, 0x1f5f}}, + {{0x1f59, 0x1f59}, {0x1f51, 0x1f51}}, + {{0x1f5b, 0x1f5b}, {0x1f53, 0x1f53}}, + {{0x1f5d, 0x1f5d}, {0x1f55, 0x1f55}}, + {{0x1f5f, 0x1f5f}, {0x1f57, 0x1f57}}, + {{0x1f60, 0x1f67}, {0x1f68, 0x1f6f}}, + {{0x1f68, 0x1f6f}, {0x1f60, 0x1f67}}, + {{0x1f70, 0x1f71}, {0x1fba, 0x1fbb}}, + {{0x1f72, 0x1f75}, {0x1fc8, 0x1fcb}}, + {{0x1f76, 0x1f77}, {0x1fda, 0x1fdb}}, + {{0x1f78, 0x1f79}, {0x1ff8, 0x1ff9}}, + {{0x1f7a, 0x1f7b}, {0x1fea, 0x1feb}}, + {{0x1f7c, 0x1f7d}, {0x1ffa, 0x1ffb}}, + {{0x1f80, 0x1f87}, {0x1f88, 0x1f8f}}, + {{0x1f90, 0x1f97}, {0x1f98, 0x1f9f}}, + {{0x1fa0, 0x1fa7}, {0x1fa8, 0x1faf}}, + {{0x1fb0, 0x1fb1}, {0x1fb8, 0x1fb9}}, + {{0x1fb3, 0x1fb3}, {0x1fbc, 0x1fbc}}, + {{0x1fb8, 0x1fb9}, {0x1fb0, 0x1fb1}}, + {{0x1fba, 0x1fbb}, {0x1f70, 0x1f71}}, + {{0x1fbe, 0x1fbe}, {0x0399, 0x0399}}, + {{0x1fc3, 0x1fc3}, {0x1fcc, 0x1fcc}}, + {{0x1fc8, 0x1fcb}, {0x1f72, 0x1f75}}, + {{0x1fd0, 0x1fd1}, {0x1fd8, 0x1fd9}}, + {{0x1fd8, 0x1fd9}, {0x1fd0, 0x1fd1}}, + {{0x1fda, 0x1fdb}, {0x1f76, 0x1f77}}, + {{0x1fe0, 0x1fe1}, {0x1fe8, 0x1fe9}}, + {{0x1fe5, 0x1fe5}, {0x1fec, 0x1fec}}, + {{0x1fe8, 0x1fe9}, {0x1fe0, 0x1fe1}}, + {{0x1fea, 0x1feb}, {0x1f7a, 0x1f7b}}, + {{0x1fec, 0x1fec}, {0x1fe5, 0x1fe5}}, + {{0x1ff3, 0x1ff3}, {0x1ffc, 0x1ffc}}, + {{0x1ff8, 0x1ff9}, {0x1f78, 0x1f79}}, + {{0x1ffa, 0x1ffb}, {0x1f7c, 0x1f7d}}, + {{0x2126, 0x2126}, {0x03c9, 0x03c9}}, + {{0x212a, 0x212a}, {0x006b, 0x006b}}, + {{0x212b, 0x212b}, {0x00e5, 0x00e5}}, + {{0x2132, 0x2132}, {0x214e, 0x214e}}, + {{0x214e, 0x214e}, {0x2132, 0x2132}}, + {{0x2183, 0x2184}, {0x2184, 0x2183}}, + {{0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}}, + {{0x2c30, 0x2c5e}, {0x2c00, 0x2c2e}}, + {{0x2c60, 0x2c61}, {0x2c61, 0x2c60}}, + {{0x2c62, 0x2c62}, {0x026b, 0x026b}}, + {{0x2c63, 0x2c63}, {0x1d7d, 0x1d7d}}, + {{0x2c64, 0x2c64}, {0x027d, 0x027d}}, + {{0x2c65, 0x2c65}, {0x023a, 0x023a}}, + {{0x2c66, 0x2c66}, {0x023e, 0x023e}}, + {{0x2c67, 0x2c68}, {0x2c68, 0x2c67}}, + {{0x2c69, 0x2c6a}, {0x2c6a, 0x2c69}}, + {{0x2c6b, 0x2c6c}, {0x2c6c, 0x2c6b}}, + {{0x2c6d, 0x2c6d}, {0x0251, 0x0251}}, + {{0x2c6e, 0x2c6e}, {0x0271, 0x0271}}, + {{0x2c6f, 0x2c6f}, {0x0250, 0x0250}}, + {{0x2c70, 0x2c70}, {0x0252, 0x0252}}, + {{0x2c72, 0x2c73}, {0x2c73, 0x2c72}}, + {{0x2c75, 0x2c76}, {0x2c76, 0x2c75}}, + {{0x2c7e, 0x2c7f}, {0x023f, 0x0240}}, + {{0x2c80, 0x2c81}, {0x2c81, 0x2c80}}, + {{0x2c82, 0x2c83}, {0x2c83, 0x2c82}}, + {{0x2c84, 0x2c85}, {0x2c85, 0x2c84}}, + {{0x2c86, 0x2c87}, {0x2c87, 0x2c86}}, + {{0x2c88, 0x2c89}, {0x2c89, 0x2c88}}, + {{0x2c8a, 0x2c8b}, {0x2c8b, 0x2c8a}}, + {{0x2c8c, 0x2c8d}, {0x2c8d, 0x2c8c}}, + {{0x2c8e, 0x2c8f}, {0x2c8f, 0x2c8e}}, + {{0x2c90, 0x2c91}, {0x2c91, 0x2c90}}, + {{0x2c92, 0x2c93}, {0x2c93, 0x2c92}}, + {{0x2c94, 0x2c95}, {0x2c95, 0x2c94}}, + {{0x2c96, 0x2c97}, {0x2c97, 0x2c96}}, + {{0x2c98, 0x2c99}, {0x2c99, 0x2c98}}, + {{0x2c9a, 0x2c9b}, {0x2c9b, 0x2c9a}}, + {{0x2c9c, 0x2c9d}, {0x2c9d, 0x2c9c}}, + {{0x2c9e, 0x2c9f}, {0x2c9f, 0x2c9e}}, + {{0x2ca0, 0x2ca1}, {0x2ca1, 0x2ca0}}, + {{0x2ca2, 0x2ca3}, {0x2ca3, 0x2ca2}}, + {{0x2ca4, 0x2ca5}, {0x2ca5, 0x2ca4}}, + {{0x2ca6, 0x2ca7}, {0x2ca7, 0x2ca6}}, + {{0x2ca8, 0x2ca9}, {0x2ca9, 0x2ca8}}, + {{0x2caa, 0x2cab}, {0x2cab, 0x2caa}}, + {{0x2cac, 0x2cad}, {0x2cad, 0x2cac}}, + {{0x2cae, 0x2caf}, {0x2caf, 0x2cae}}, + {{0x2cb0, 0x2cb1}, {0x2cb1, 0x2cb0}}, + {{0x2cb2, 0x2cb3}, {0x2cb3, 0x2cb2}}, + {{0x2cb4, 0x2cb5}, {0x2cb5, 0x2cb4}}, + {{0x2cb6, 0x2cb7}, {0x2cb7, 0x2cb6}}, + {{0x2cb8, 0x2cb9}, {0x2cb9, 0x2cb8}}, + {{0x2cba, 0x2cbb}, {0x2cbb, 0x2cba}}, + {{0x2cbc, 0x2cbd}, {0x2cbd, 0x2cbc}}, + {{0x2cbe, 0x2cbf}, {0x2cbf, 0x2cbe}}, + {{0x2cc0, 0x2cc1}, {0x2cc1, 0x2cc0}}, + {{0x2cc2, 0x2cc3}, {0x2cc3, 0x2cc2}}, + {{0x2cc4, 0x2cc5}, {0x2cc5, 0x2cc4}}, + {{0x2cc6, 0x2cc7}, {0x2cc7, 0x2cc6}}, + {{0x2cc8, 0x2cc9}, {0x2cc9, 0x2cc8}}, + {{0x2cca, 0x2ccb}, {0x2ccb, 0x2cca}}, + {{0x2ccc, 0x2ccd}, {0x2ccd, 0x2ccc}}, + {{0x2cce, 0x2ccf}, {0x2ccf, 0x2cce}}, + {{0x2cd0, 0x2cd1}, {0x2cd1, 0x2cd0}}, + {{0x2cd2, 0x2cd3}, {0x2cd3, 0x2cd2}}, + {{0x2cd4, 0x2cd5}, {0x2cd5, 0x2cd4}}, + {{0x2cd6, 0x2cd7}, {0x2cd7, 0x2cd6}}, + {{0x2cd8, 0x2cd9}, {0x2cd9, 0x2cd8}}, + {{0x2cda, 0x2cdb}, {0x2cdb, 0x2cda}}, + {{0x2cdc, 0x2cdd}, {0x2cdd, 0x2cdc}}, + {{0x2cde, 0x2cdf}, {0x2cdf, 0x2cde}}, + {{0x2ce0, 0x2ce1}, {0x2ce1, 0x2ce0}}, + {{0x2ce2, 0x2ce3}, {0x2ce3, 0x2ce2}}, + {{0x2ceb, 0x2cec}, {0x2cec, 0x2ceb}}, + {{0x2ced, 0x2cee}, {0x2cee, 0x2ced}}, + {{0x2cf2, 0x2cf3}, {0x2cf3, 0x2cf2}}, + {{0x2d00, 0x2d25}, {0x10a0, 0x10c5}}, + {{0x2d27, 0x2d27}, {0x10c7, 0x10c7}}, + {{0x2d2d, 0x2d2d}, {0x10cd, 0x10cd}}, + {{0xa640, 0xa641}, {0xa641, 0xa640}}, + {{0xa642, 0xa643}, {0xa643, 0xa642}}, + {{0xa644, 0xa645}, {0xa645, 0xa644}}, + {{0xa646, 0xa647}, {0xa647, 0xa646}}, + {{0xa648, 0xa649}, {0xa649, 0xa648}}, + {{0xa64a, 0xa64b}, {0xa64b, 0xa64a}}, + {{0xa64c, 0xa64d}, {0xa64d, 0xa64c}}, + {{0xa64e, 0xa64f}, {0xa64f, 0xa64e}}, + {{0xa650, 0xa651}, {0xa651, 0xa650}}, + {{0xa652, 0xa653}, {0xa653, 0xa652}}, + {{0xa654, 0xa655}, {0xa655, 0xa654}}, + {{0xa656, 0xa657}, {0xa657, 0xa656}}, + {{0xa658, 0xa659}, {0xa659, 0xa658}}, + {{0xa65a, 0xa65b}, {0xa65b, 0xa65a}}, + {{0xa65c, 0xa65d}, {0xa65d, 0xa65c}}, + {{0xa65e, 0xa65f}, {0xa65f, 0xa65e}}, + {{0xa660, 0xa661}, {0xa661, 0xa660}}, + {{0xa662, 0xa663}, {0xa663, 0xa662}}, + {{0xa664, 0xa665}, {0xa665, 0xa664}}, + {{0xa666, 0xa667}, {0xa667, 0xa666}}, + {{0xa668, 0xa669}, {0xa669, 0xa668}}, + {{0xa66a, 0xa66b}, {0xa66b, 0xa66a}}, + {{0xa66c, 0xa66d}, {0xa66d, 0xa66c}}, + {{0xa680, 0xa681}, {0xa681, 0xa680}}, + {{0xa682, 0xa683}, {0xa683, 0xa682}}, + {{0xa684, 0xa685}, {0xa685, 0xa684}}, + {{0xa686, 0xa687}, {0xa687, 0xa686}}, + {{0xa688, 0xa689}, {0xa689, 0xa688}}, + {{0xa68a, 0xa68b}, {0xa68b, 0xa68a}}, + {{0xa68c, 0xa68d}, {0xa68d, 0xa68c}}, + {{0xa68e, 0xa68f}, {0xa68f, 0xa68e}}, + {{0xa690, 0xa691}, {0xa691, 0xa690}}, + {{0xa692, 0xa693}, {0xa693, 0xa692}}, + {{0xa694, 0xa695}, {0xa695, 0xa694}}, + {{0xa696, 0xa697}, {0xa697, 0xa696}}, + {{0xa698, 0xa699}, {0xa699, 0xa698}}, + {{0xa69a, 0xa69b}, {0xa69b, 0xa69a}}, + {{0xa722, 0xa723}, {0xa723, 0xa722}}, + {{0xa724, 0xa725}, {0xa725, 0xa724}}, + {{0xa726, 0xa727}, {0xa727, 0xa726}}, + {{0xa728, 0xa729}, {0xa729, 0xa728}}, + {{0xa72a, 0xa72b}, {0xa72b, 0xa72a}}, + {{0xa72c, 0xa72d}, {0xa72d, 0xa72c}}, + {{0xa72e, 0xa72f}, {0xa72f, 0xa72e}}, + {{0xa732, 0xa733}, {0xa733, 0xa732}}, + {{0xa734, 0xa735}, {0xa735, 0xa734}}, + {{0xa736, 0xa737}, {0xa737, 0xa736}}, + {{0xa738, 0xa739}, {0xa739, 0xa738}}, + {{0xa73a, 0xa73b}, {0xa73b, 0xa73a}}, + {{0xa73c, 0xa73d}, {0xa73d, 0xa73c}}, + {{0xa73e, 0xa73f}, {0xa73f, 0xa73e}}, + {{0xa740, 0xa741}, {0xa741, 0xa740}}, + {{0xa742, 0xa743}, {0xa743, 0xa742}}, + {{0xa744, 0xa745}, {0xa745, 0xa744}}, + {{0xa746, 0xa747}, {0xa747, 0xa746}}, + {{0xa748, 0xa749}, {0xa749, 0xa748}}, + {{0xa74a, 0xa74b}, {0xa74b, 0xa74a}}, + {{0xa74c, 0xa74d}, {0xa74d, 0xa74c}}, + {{0xa74e, 0xa74f}, {0xa74f, 0xa74e}}, + {{0xa750, 0xa751}, {0xa751, 0xa750}}, + {{0xa752, 0xa753}, {0xa753, 0xa752}}, + {{0xa754, 0xa755}, {0xa755, 0xa754}}, + {{0xa756, 0xa757}, {0xa757, 0xa756}}, + {{0xa758, 0xa759}, {0xa759, 0xa758}}, + {{0xa75a, 0xa75b}, {0xa75b, 0xa75a}}, + {{0xa75c, 0xa75d}, {0xa75d, 0xa75c}}, + {{0xa75e, 0xa75f}, {0xa75f, 0xa75e}}, + {{0xa760, 0xa761}, {0xa761, 0xa760}}, + {{0xa762, 0xa763}, {0xa763, 0xa762}}, + {{0xa764, 0xa765}, {0xa765, 0xa764}}, + {{0xa766, 0xa767}, {0xa767, 0xa766}}, + {{0xa768, 0xa769}, {0xa769, 0xa768}}, + {{0xa76a, 0xa76b}, {0xa76b, 0xa76a}}, + {{0xa76c, 0xa76d}, {0xa76d, 0xa76c}}, + {{0xa76e, 0xa76f}, {0xa76f, 0xa76e}}, + {{0xa779, 0xa77a}, {0xa77a, 0xa779}}, + {{0xa77b, 0xa77c}, {0xa77c, 0xa77b}}, + {{0xa77d, 0xa77d}, {0x1d79, 0x1d79}}, + {{0xa77e, 0xa77f}, {0xa77f, 0xa77e}}, + {{0xa780, 0xa781}, {0xa781, 0xa780}}, + {{0xa782, 0xa783}, {0xa783, 0xa782}}, + {{0xa784, 0xa785}, {0xa785, 0xa784}}, + {{0xa786, 0xa787}, {0xa787, 0xa786}}, + {{0xa78b, 0xa78c}, {0xa78c, 0xa78b}}, + {{0xa78d, 0xa78d}, {0x0265, 0x0265}}, + {{0xa790, 0xa791}, {0xa791, 0xa790}}, + {{0xa792, 0xa793}, {0xa793, 0xa792}}, + {{0xa796, 0xa797}, {0xa797, 0xa796}}, + {{0xa798, 0xa799}, {0xa799, 0xa798}}, + {{0xa79a, 0xa79b}, {0xa79b, 0xa79a}}, + {{0xa79c, 0xa79d}, {0xa79d, 0xa79c}}, + {{0xa79e, 0xa79f}, {0xa79f, 0xa79e}}, + {{0xa7a0, 0xa7a1}, {0xa7a1, 0xa7a0}}, + {{0xa7a2, 0xa7a3}, {0xa7a3, 0xa7a2}}, + {{0xa7a4, 0xa7a5}, {0xa7a5, 0xa7a4}}, + {{0xa7a6, 0xa7a7}, {0xa7a7, 0xa7a6}}, + {{0xa7a8, 0xa7a9}, {0xa7a9, 0xa7a8}}, + {{0xa7aa, 0xa7aa}, {0x0266, 0x0266}}, + {{0xa7ab, 0xa7ab}, {0x025c, 0x025c}}, + {{0xa7ac, 0xa7ac}, {0x0261, 0x0261}}, + {{0xa7ad, 0xa7ad}, {0x026c, 0x026c}}, + {{0xa7b0, 0xa7b0}, {0x029e, 0x029e}}, + {{0xa7b1, 0xa7b1}, {0x0287, 0x0287}}, + {{0xa7b2, 0xa7b2}, {0x029d, 0x029d}}, + {{0xa7b3, 0xa7b3}, {0xab53, 0xab53}}, + {{0xa7b4, 0xa7b5}, {0xa7b5, 0xa7b4}}, + {{0xa7b6, 0xa7b7}, {0xa7b7, 0xa7b6}}, + {{0xab53, 0xab53}, {0xa7b3, 0xa7b3}}, + {{0xab70, 0xabbf}, {0x13a0, 0x13ef}}, + {{0xff21, 0xff3a}, {0xff41, 0xff5a}}, + {{0xff41, 0xff5a}, {0xff21, 0xff3a}}, + {{0, 0}, {0, 0}}}; + const fold_pair *ptr_ = mapping_; + + for (; ptr_->from.first != 0; ++ptr_) + { + if (range_.second < ptr_->from.first) break; + + if (range_.first >= ptr_->from.first && + range_.first <= ptr_->from.second) + { + if (ptr_->to.first <= ptr_->to.second) + { + const index_type first_ = ptr_->to.first + + (range_.first - ptr_->from.first); + + out_.insert(range(first_, + range_.second > ptr_->from.second ? + ptr_->to.second : + static_cast(ptr_->to.first + + (range_.second - ptr_->from.first)))); + } + else + { + const index_type first_ = ptr_->to.second + + (range_.first - ptr_->from.first); + + out_.insert(range(first_, + range_.second > ptr_->from.second ? + ptr_->to.first : + static_cast(ptr_->to.second + + (range_.second - ptr_->from.first)))); + } + } + else if (range_.second >= ptr_->from.first && + range_.second <= ptr_->from.second) + { + if (ptr_->to.first <= ptr_->to.second) + { + const index_type second_ = ptr_->to.first + + (range_.second - ptr_->from.first); + + out_.insert(range(ptr_->to.first, second_)); + } + else + { + const index_type second_ = ptr_->to.second + + (range_.second - ptr_->from.first); + + out_.insert(range(ptr_->to.second, second_)); + } + } + // Either range fully encompasses from range or not at all. + else if (ptr_->from.first >= range_.first && + ptr_->from.first <= range_.second) + { + if (ptr_->to.first <= ptr_->to.second) + { + out_.insert(range(ptr_->to.first, ptr_->to.second)); + } + else + { + out_.insert(range(ptr_->to.second, ptr_->to.first)); + } + } + } + } + + static void fold(const range &range_, const std::locale &locale_, + string_token &out_, const four &) + { + if (range_.first < 0x10000) + { + fold(range_, locale_, out_, two()); + } + + static const fold_pair mapping_[] = + {{{0x10400, 0x10427}, {0x10428, 0x1044f}}, + {{0x10428, 0x1044f}, {0x10400, 0x10427}}, + {{0x10c80, 0x10cb2}, {0x10cc0, 0x10cf2}}, + {{0x10cc0, 0x10cf2}, {0x10c80, 0x10cb2}}, + {{0x118a0, 0x118bf}, {0x118c0, 0x118df}}, + {{0x118c0, 0x118df}, {0x118a0, 0x118bf}}, + {{0, 0}, {0, 0}}}; + const fold_pair *ptr_ = mapping_; + + for (; ptr_->from.first != 0; ++ptr_) + { + if (range_.second < ptr_->from.first) break; + + if (range_.first >= ptr_->from.first && + range_.first <= ptr_->from.second) + { + out_.insert(range(ptr_->to.first + + (range_.first - ptr_->from.first), + range_.second > ptr_->from.second ? + ptr_->to.second : + ptr_->to.first + (range_.second - ptr_->from.first))); + } + else if (range_.second >= ptr_->from.first && + range_.second <= ptr_->from.second) + { + out_.insert(range(ptr_->to.first, + ptr_->to.first + (range_.second - ptr_->from.first))); + } + // Either range fully encompasses from range or not at all. + else if (ptr_->from.first >= range_.first && + ptr_->from.first <= range_.second) + { + out_.insert(range(ptr_->to.first, ptr_->to.second)); + } + } + } + + template + static input_char_type chr(state_type &state_) + { + input_char_type ch_ = 0; + + // eos_ has already been checked for. + switch (*state_._curr) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + ch_ = decode_octal(state_); + break; + case 'a': + ch_ = '\a'; + state_.increment(); + break; + case 'b': + ch_ = '\b'; + state_.increment(); + break; + case 'c': + ch_ = decode_control_char(state_); + break; + case 'e': + ch_ = 27; // '\e' not recognised by compiler + state_.increment(); + break; + case 'f': + ch_ = '\f'; + state_.increment(); + break; + case 'n': + ch_ = '\n'; + state_.increment(); + break; + case 'r': + ch_ = '\r'; + state_.increment(); + break; + case 't': + ch_ = '\t'; + state_.increment(); + break; + case 'v': + ch_ = '\v'; + state_.increment(); + break; + case 'x': + ch_ = decode_hex(state_); + break; + default: + ch_ = *state_._curr; + state_.increment(); + break; + } + + return ch_; + } + +private: + struct char_pair + { + input_char_type first; + input_char_type second; + }; + + struct fold_pair + { + char_pair from; + char_pair to; + }; + + template + static void posix(state_type &state_, string_token &token_) + { + bool negate_ = false; + + if (!state_.eos() && *state_._curr == '^') + { + negate_ = true; + state_.increment(); + } + + if (state_.eos()) + { + unterminated_posix(state_); + } + else + { + switch (*state_._curr) + { + case 'a': + // alnum + // alpha + alnum_alpha(state_, token_, negate_); + break; + case 'b': + // blank + blank(state_, token_, negate_); + break; + case 'c': + // cntrl + cntrl(state_, token_, negate_); + break; + case 'd': + // digit + digit(state_, token_, negate_); + break; + case 'g': + // graph + graph(state_, token_, negate_); + break; + case 'l': + // lower + lower(state_, token_, negate_); + break; + case 'p': + // print + // punct + print_punct(state_, token_, negate_); + break; + case 's': + // space + space(state_, token_, negate_); + break; + case 'u': + // upper + upper(state_, token_, negate_); + break; + case 'x': + // xdigit + xdigit(state_, token_, negate_); + break; + default: + unknown_posix(state_); + break; + } + } + } + + template + static void alnum_alpha(state_type &state_, string_token &token_, + const bool negate_) + { + enum {unknown, alnum, alpha}; + std::size_t type_ = unknown; + + state_.increment(); + + if (!state_.eos() && *state_._curr == 'l') + { + state_.increment(); + + if (!state_.eos()) + { + if (*state_._curr == 'n') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'u') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'm') + { + state_.increment(); + type_ = alnum; + } + } + } + else if (*state_._curr == 'p') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'h') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'a') + { + state_.increment(); + type_ = alpha; + } + } + } + } + } + + if (type_ == unknown) + { + unknown_posix(state_); + } + else + { + std::string str_; + + check_posix_termination(state_); + + if (type_ == alnum) + { + // alnum + str_ = sizeof(input_char_type) == 1 ? + make_alnum(state_._locale) : + std::string("[\\p{Ll}\\p{Lu}\\p{Nd}]"); + } + else + { + // alpha + str_ = sizeof(input_char_type) == 1 ? + make_alpha(state_._locale) : + std::string("[\\p{Ll}\\p{Lu}]"); + } + + insert_charset(str_.c_str(), state_, token_, negate_); + } + } + + static std::string make_alnum(std::locale &locale_) + { + std::string str_(1, '['); + + for (std::size_t i_ = 0; i_ < 256; ++i_) + { + if (std::use_facet >(locale_). + is(std::ctype_base::alnum, static_cast(i_))) + { + str_ += static_cast(i_); + } + } + + str_ += ']'; + return str_; + } + + static std::string make_alpha(std::locale &locale_) + { + std::string str_(1, '['); + + for (std::size_t i_ = 0; i_ < 256; ++i_) + { + if (std::use_facet >(locale_). + is(std::ctype_base::alpha, static_cast(i_))) + { + str_ += static_cast(i_); + } + } + + str_ += ']'; + return str_; + } + + template + static void blank(state_type &state_, string_token &token_, + const bool negate_) + { + const char *blank_ = "lank"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *blank_ && + static_cast(*state_._curr) == + static_cast(*blank_)) + { + state_.increment(); + ++blank_; + } + + if (*blank_) + { + unknown_posix(state_); + } + else + { + const char *str_ = sizeof(input_char_type) == 1 ? + "[ \t]" : "[\\p{Zs}\t]"; + + check_posix_termination(state_); + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void cntrl(state_type &state_, string_token &token_, + const bool negate_) + { + const char *cntrl_ = "ntrl"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *cntrl_ && + static_cast(*state_._curr) == + static_cast(*cntrl_)) + { + state_.increment(); + ++cntrl_; + } + + if (*cntrl_) + { + unknown_posix(state_); + } + else + { + const char *str_ = sizeof(input_char_type) == 1 ? + "[\\x00-\x1f\x7f]" : "[\\p{Cc}]"; + + check_posix_termination(state_); + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void digit(state_type &state_, string_token &token_, + const bool negate_) + { + const char *digit_ = "igit"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *digit_ && + static_cast(*state_._curr) == + static_cast(*digit_)) + { + state_.increment(); + ++digit_; + } + + if (*digit_) + { + unknown_posix(state_); + } + else + { + const char *str_ = sizeof(input_char_type) == 1 ? + "[0-9]" : "[\\p{Nd}]"; + + check_posix_termination(state_); + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void graph(state_type &state_, string_token &token_, + const bool negate_) + { + const char *graph_ = "raph"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *graph_ && + static_cast(*state_._curr) == + static_cast(*graph_)) + { + state_.increment(); + ++graph_; + } + + if (*graph_) + { + unknown_posix(state_); + } + else + { + const char *str_ = sizeof(input_char_type) == 1 ? + "[\x21-\x7e]" : "[^\\p{Z}\\p{C}]"; + + check_posix_termination(state_); + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void lower(state_type &state_, string_token &token_, + const bool negate_) + { + const char *lower_ = "ower"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *lower_ && + static_cast(*state_._curr) == + static_cast(*lower_)) + { + state_.increment(); + ++lower_; + } + + if (*lower_) + { + unknown_posix(state_); + } + else + { + std::string str_ = sizeof(input_char_type) == 1 ? + create_lower(state_._locale) : + std::string("[\\p{Ll}]"); + + check_posix_termination(state_); + insert_charset(str_.c_str(), state_, token_, negate_); + } + } + + static std::string create_lower(std::locale &locale_) + { + std::string str_(1, '['); + + for (std::size_t i_ = 0; i_ < 256; ++i_) + { + if (std::use_facet >(locale_). + is(std::ctype_base::lower, static_cast(i_))) + { + str_ += static_cast(i_); + } + } + + str_ += ']'; + return str_; + } + + template + static void print_punct(state_type &state_, string_token &token_, + const bool negate_) + { + enum {unknown, print, punct}; + std::size_t type_ = unknown; + + state_.increment(); + + if (!state_.eos()) + { + if (*state_._curr == 'r') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'i') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'n') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 't') + { + state_.increment(); + type_ = print; + } + } + } + } + else if (*state_._curr == 'u') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'n') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 'c') + { + state_.increment(); + + if (!state_.eos() && *state_._curr == 't') + { + state_.increment(); + type_ = punct; + } + } + } + } + } + + if (type_ == unknown) + { + unknown_posix(state_); + } + else + { + const char *str_ = nullptr; + + check_posix_termination(state_); + + if (type_ == print) + { + // print + str_ = sizeof(input_char_type) == 1 ? + "[\x20-\x7e]" : "[\\p{C}]"; + } + else + { + // punct + str_ = sizeof(input_char_type) == 1 ? + "[!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~]" : + "[\\p{P}\\p{S}]"; + } + + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void space(state_type &state_, string_token &token_, + const bool negate_) + { + const char *space_ = "pace"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *space_ && + static_cast(*state_._curr) == + static_cast(*space_)) + { + state_.increment(); + ++space_; + } + + if (*space_) + { + unknown_posix(state_); + } + else + { + const char *str_ = sizeof(input_char_type) == 1 ? + "[ \t\r\n\v\f]" : "[\\p{Z}\t\r\n\v\f]"; + + check_posix_termination(state_); + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void upper(state_type &state_, string_token &token_, + const bool negate_) + { + const char *upper_ = "pper"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *upper_ && + static_cast(*state_._curr) == + static_cast(*upper_)) + { + state_.increment(); + ++upper_; + } + + if (*upper_) + { + unknown_posix(state_); + } + else + { + std::string str_ = sizeof(input_char_type) == 1 ? + create_upper(state_._locale) : + std::string("[\\p{Lu}]"); + + check_posix_termination(state_); + insert_charset(str_.c_str(), state_, token_, negate_); + } + } + + static std::string create_upper(std::locale &locale_) + { + std::string str_(1, '['); + + for (std::size_t i_ = 0; i_ < 256; ++i_) + { + if (std::use_facet >(locale_). + is(std::ctype_base::upper, static_cast(i_))) + { + str_ += static_cast(i_); + } + } + + str_ += ']'; + return str_; + } + + template + static void xdigit(state_type &state_, string_token &token_, + const bool negate_) + { + const char *xdigit_ = "digit"; + + state_.increment(); + + // Casts to prevent warnings (VC++ 2012) + while (!state_.eos() && *xdigit_ && + static_cast(*state_._curr) == + static_cast(*xdigit_)) + { + state_.increment(); + ++xdigit_; + } + + if (*xdigit_) + { + unknown_posix(state_); + } + else + { + const char *str_ = "[0-9A-Fa-f]"; + + check_posix_termination(state_); + insert_charset(str_, state_, token_, negate_); + } + } + + template + static void check_posix_termination(state_type &state_) + { + if (state_.eos()) + { + unterminated_posix(state_); + } + + if (*state_._curr != ':') + { + std::ostringstream ss_; + + ss_ << "Missing ':' at index " << state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + state_.increment(); + + if (state_.eos()) + { + unterminated_posix(state_); + } + + if (*state_._curr != ']') + { + std::ostringstream ss_; + + ss_ << "Missing ']' at index " << state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + state_.increment(); + } + + template + static void unterminated_posix(state_type &state_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (unterminated POSIX charset)"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + template + static void unknown_posix(state_type &state_) + { + std::ostringstream ss_; + + ss_ << "Unknown POSIX charset at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + template + static void insert_charset(const char *str_, state_type &state_, + string_token &token_, const bool negate_) + { + // Some systems have strlen in namespace std. + using namespace std; + + char_state temp_state_(str_ + 1, str_ + strlen(str_), + state_._id, state_._flags, state_._locale, 0); + string_token temp_token_; + + charset(temp_state_, temp_token_); + + if (negate_) temp_token_.negate(); + + token_.insert(temp_token_); + } + + template + static const char *charset_shortcut + (state_type &state_, std::size_t &str_len_) + { + const char *str_ = nullptr; + + switch (*state_._curr) + { + case 'd': + str_ = "[0-9]"; + break; + case 'D': + str_ = "[^0-9]"; + break; + case 'p': + str_ = unicode_escape(state_); + break; + case 's': + str_ = "[ \t\n\r\f\v]"; + break; + case 'S': + str_ = "[^ \t\n\r\f\v]"; + break; + case 'w': + str_ = "[_0-9A-Za-z]"; + break; + case 'W': + str_ = "[^_0-9A-Za-z]"; + break; + } + + if (str_) + { + // Some systems have strlen in namespace std. + using namespace std; + + str_len_ = strlen(str_); + } + else + { + str_len_ = 0; + } + + return str_; + } + + template + static const char *unicode_escape(state_type &state_) + { + const char *str_ = nullptr; + + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (*state_._curr != '{') + { + std::ostringstream ss_; + + ss_ << "Missing '{' following \\p at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case 'C': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{C"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}]"; + break; + case 'c': + str_ = other_control(); + state_.increment(); + break; + case 'f': + str_ = other_format(); + state_.increment(); + break; +// case 'n': +// break; + case 'o': + str_ = other_private(); + state_.increment(); + break; + case 's': + str_ = other_surrogate(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{C at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + case 'L': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{L"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Ll}\\p{Lm}\\p{Lo}\\p{Lt}\\p{Lu}]"; + break; + case 'C': + str_ = "[\\p{Ll}\\p{Lt}\\p{Lu}]"; + state_.increment(); + break; + case 'l': + str_ = letter_lowercase(); + state_.increment(); + break; + case 'm': + str_ = letter_modifier(); + state_.increment(); + break; + case 'o': + str_ = letter_other(); + state_.increment(); + break; + case 't': + str_ = letter_titlecase(); + state_.increment(); + break; + case 'u': + str_ = letter_uppercase(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{L at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + case 'M': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{M"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Mc}\\p{Me}\\p{Mn}]"; + break; + case 'c': + str_ = mark_combining(); + state_.increment(); + break; + case 'e': + str_ = mark_enclosing(); + state_.increment(); + break; + case 'n': + str_ = mark_nonspacing(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{M at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + case 'N': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{N"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Nd}\\p{Nl}\\p{No}]"; + break; + case 'd': + str_ = number_decimal(); + state_.increment(); + break; + case 'l': + str_ = number_letter(); + state_.increment(); + break; + case 'o': + str_ = number_other(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{N at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + case 'P': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{P"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}" + "\\p{Ps}]"; + break; + case 'c': + str_ = punctuation_connector(); + state_.increment(); + break; + case 'd': + str_ = punctuation_dash(); + state_.increment(); + break; + case 'e': + str_ = punctuation_close(); + state_.increment(); + break; + case 'f': + str_ = punctuation_final(); + state_.increment(); + break; + case 'i': + str_ = punctuation_initial(); + state_.increment(); + break; + case 'o': + str_ = punctuation_other(); + state_.increment(); + break; + case 's': + str_ = punctuation_open(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{P at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + case 'S': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{S"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Sc}\\p{Sk}\\p{Sm}\\p{So}]"; + break; + case 'c': + str_ = symbol_currency(); + state_.increment(); + break; + case 'k': + str_ = symbol_modifier(); + state_.increment(); + break; + case 'm': + str_ = symbol_math(); + state_.increment(); + break; + case 'o': + str_ = symbol_other(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{S at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + case 'Z': + state_.increment(); + + if (state_.eos()) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\p{Z"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + switch (*state_._curr) + { + case '}': + str_ = "[\\p{Zl}\\p{Zp}\\p{Zs}]"; + break; + case 'l': + str_ = separator_line(); + state_.increment(); + break; + case 'p': + str_ = separator_paragraph(); + state_.increment(); + break; + case 's': + str_ = separator_space(); + state_.increment(); + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{Z at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + break; + default: + { + std::ostringstream ss_; + + ss_ << "Syntax error following \\p{ at index " << + state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + if (*state_._curr != '}') + { + std::ostringstream ss_; + + ss_ << "Missing '}' at index " << state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + return str_; + } + + static const char *other_control() + { + return "[\\x0-\\x1f\\x7f-\\x9f]"; + } + + static const char *other_format() + { + return "[\\xad\\x600-\\x605\\x61c\\x6dd\\x70f\\x180e\\x200b-\\x200f" + "\\x202a-\\x202e\\x2060-\\x2064\\x2066-\\x206f\\xfeff" + "\\xfff9-\\xfffb\\x110bd\\x1bca0-\\x1bca3\\x1d173-\\x1d17a\\xe0001" + "\\xe0020-\\xe007f]"; + } + + static const char *other_private() + { + return "[\\xe000\\xf8ff\\xf0000\\xffffd\\x100000\\x10fffd]"; + } + + static const char *other_surrogate() + { + return "[\\xd800\\xdb7f\\xdb80\\xdbff\\xdc00\\xdfff]"; + } + + static const char *letter_lowercase() + { + return "[\\x61-\\x7a\\xb5\\xdf-\\xf6\\xf8-\\xff\\x101\\x103\\x105\\x107" + "\\x109\\x10b\\x10d\\x10f\\x111\\x113\\x115\\x117\\x119\\x11b\\x11d" + "\\x11f\\x121\\x123\\x125\\x127\\x129\\x12b\\x12d\\x12f\\x131\\x133" + "\\x135\\x137\\x138\\x13a\\x13c\\x13e\\x140\\x142\\x144\\x146" + "\\x148\\x149\\x14b\\x14d\\x14f\\x151\\x153\\x155\\x157\\x159\\x15b" + "\\x15d\\x15f\\x161\\x163\\x165\\x167\\x169\\x16b\\x16d\\x16f\\x171" + "\\x173\\x175\\x177\\x17a\\x17c\\x17e-\\x180\\x183\\x185\\x188" + "\\x18c\\x18d\\x192\\x195\\x199-\\x19b\\x19e\\x1a1\\x1a3\\x1a5" + "\\x1a8\\x1aa\\x1ab\\x1ad\\x1b0\\x1b4\\x1b6\\x1b9\\x1ba" + "\\x1bd-\\x1bf\\x1c6\\x1c9\\x1cc\\x1ce\\x1d0\\x1d2\\x1d4\\x1d6" + "\\x1d8\\x1da\\x1dc\\x1dd\\x1df\\x1e1\\x1e3\\x1e5\\x1e7\\x1e9\\x1eb" + "\\x1ed\\x1ef\\x1f0\\x1f3\\x1f5\\x1f9\\x1fb\\x1fd\\x1ff\\x201\\x203" + "\\x205\\x207\\x209\\x20b\\x20d\\x20f\\x211\\x213\\x215\\x217\\x219" + "\\x21b\\x21d\\x21f\\x221\\x223\\x225\\x227\\x229\\x22b\\x22d\\x22f" + "\\x231\\x233-\\x239\\x23c\\x23f\\x240\\x242\\x247\\x249\\x24b" + "\\x24d\\x24f-\\x293\\x295-\\x2af\\x371\\x373\\x377\\x37b-\\x37d" + "\\x390\\x3ac-\\x3ce\\x3d0\\x3d1\\x3d5-\\x3d7\\x3d9\\x3db\\x3dd" + "\\x3df\\x3e1\\x3e3\\x3e5\\x3e7\\x3e9\\x3eb\\x3ed\\x3ef-\\x3f3" + "\\x3f5\\x3f8\\x3fb\\x3fc\\x430-\\x45f\\x461\\x463\\x465\\x467" + "\\x469\\x46b\\x46d\\x46f\\x471\\x473\\x475\\x477\\x479\\x47b\\x47d" + "\\x47f\\x481\\x48b\\x48d\\x48f\\x491\\x493\\x495\\x497\\x499\\x49b" + "\\x49d\\x49f\\x4a1\\x4a3\\x4a5\\x4a7\\x4a9\\x4ab\\x4ad\\x4af\\x4b1" + "\\x4b3\\x4b5\\x4b7\\x4b9\\x4bb\\x4bd\\x4bf\\x4c2\\x4c4\\x4c6\\x4c8" + "\\x4ca\\x4cc\\x4ce\\x4cf\\x4d1\\x4d3\\x4d5\\x4d7\\x4d9\\x4db\\x4dd" + "\\x4df\\x4e1\\x4e3\\x4e5\\x4e7\\x4e9\\x4eb\\x4ed\\x4ef\\x4f1\\x4f3" + "\\x4f5\\x4f7\\x4f9\\x4fb\\x4fd\\x4ff\\x501\\x503\\x505\\x507\\x509" + "\\x50b\\x50d\\x50f\\x511\\x513\\x515\\x517\\x519\\x51b\\x51d\\x51f" + "\\x521\\x523\\x525\\x527\\x529\\x52b\\x52d\\x52f\\x561-\\x587" + "\\x13f8-\\x13fd\\x1d00-\\x1d2b\\x1d6b-\\x1d77\\x1d79-\\x1d9a" + "\\x1e01\\x1e03\\x1e05\\x1e07\\x1e09\\x1e0b\\x1e0d\\x1e0f\\x1e11" + "\\x1e13\\x1e15\\x1e17\\x1e19\\x1e1b\\x1e1d\\x1e1f\\x1e21\\x1e23" + "\\x1e25\\x1e27\\x1e29\\x1e2b\\x1e2d\\x1e2f\\x1e31\\x1e33\\x1e35" + "\\x1e37\\x1e39\\x1e3b\\x1e3d\\x1e3f\\x1e41\\x1e43\\x1e45\\x1e47" + "\\x1e49\\x1e4b\\x1e4d\\x1e4f\\x1e51\\x1e53\\x1e55\\x1e57\\x1e59" + "\\x1e5b\\x1e5d\\x1e5f\\x1e61\\x1e63\\x1e65\\x1e67\\x1e69\\x1e6b" + "\\x1e6d\\x1e6f\\x1e71\\x1e73\\x1e75\\x1e77\\x1e79\\x1e7b\\x1e7d" + "\\x1e7f\\x1e81\\x1e83\\x1e85\\x1e87\\x1e89\\x1e8b\\x1e8d\\x1e8f" + "\\x1e91\\x1e93\\x1e95-\\x1e9d\\x1e9f\\x1ea1\\x1ea3\\x1ea5\\x1ea7" + "\\x1ea9\\x1eab\\x1ead\\x1eaf\\x1eb1\\x1eb3\\x1eb5\\x1eb7\\x1eb9" + "\\x1ebb\\x1ebd\\x1ebf\\x1ec1\\x1ec3\\x1ec5\\x1ec7\\x1ec9\\x1ecb" + "\\x1ecd\\x1ecf\\x1ed1\\x1ed3\\x1ed5\\x1ed7\\x1ed9\\x1edb\\x1edd" + "\\x1edf\\x1ee1\\x1ee3\\x1ee5\\x1ee7\\x1ee9\\x1eeb\\x1eed\\x1eef" + "\\x1ef1\\x1ef3\\x1ef5\\x1ef7\\x1ef9\\x1efb\\x1efd\\x1eff-\\x1f07" + "\\x1f10-\\x1f15\\x1f20-\\x1f27\\x1f30-\\x1f37\\x1f40-\\x1f45" + "\\x1f50-\\x1f57\\x1f60-\\x1f67\\x1f70-\\x1f7d\\x1f80-\\x1f87" + "\\x1f90-\\x1f97\\x1fa0-\\x1fa7\\x1fb0-\\x1fb4\\x1fb6\\x1fb7\\x1fbe" + "\\x1fc2-\\x1fc4\\x1fc6\\x1fc7\\x1fd0-\\x1fd3\\x1fd6\\x1fd7" + "\\x1fe0-\\x1fe7\\x1ff2-\\x1ff4\\x1ff6\\x1ff7\\x210a\\x210e\\x210f" + "\\x2113\\x212f\\x2134\\x2139\\x213c\\x213d\\x2146-\\x2149\\x214e" + "\\x2184\\x2c30-\\x2c5e\\x2c61\\x2c65\\x2c66\\x2c68\\x2c6a\\x2c6c" + "\\x2c71\\x2c73\\x2c74\\x2c76-\\x2c7b\\x2c81\\x2c83\\x2c85\\x2c87" + "\\x2c89\\x2c8b\\x2c8d\\x2c8f\\x2c91\\x2c93\\x2c95\\x2c97\\x2c99" + "\\x2c9b\\x2c9d\\x2c9f\\x2ca1\\x2ca3\\x2ca5\\x2ca7\\x2ca9\\x2cab" + "\\x2cad\\x2caf\\x2cb1\\x2cb3\\x2cb5\\x2cb7\\x2cb9\\x2cbb\\x2cbd" + "\\x2cbf\\x2cc1\\x2cc3\\x2cc5\\x2cc7\\x2cc9\\x2ccb\\x2ccd\\x2ccf" + "\\x2cd1\\x2cd3\\x2cd5\\x2cd7\\x2cd9\\x2cdb\\x2cdd\\x2cdf\\x2ce1" + "\\x2ce3\\x2ce4\\x2cec\\x2cee\\x2cf3\\x2d00-\\x2d25\\x2d27\\x2d2d" + "\\xa641\\xa643\\xa645\\xa647\\xa649\\xa64b\\xa64d\\xa64f\\xa651" + "\\xa653\\xa655\\xa657\\xa659\\xa65b\\xa65d\\xa65f\\xa661\\xa663" + "\\xa665\\xa667\\xa669\\xa66b\\xa66d\\xa681\\xa683\\xa685\\xa687" + "\\xa689\\xa68b\\xa68d\\xa68f\\xa691\\xa693\\xa695\\xa697\\xa699" + "\\xa69b\\xa723\\xa725\\xa727\\xa729\\xa72b\\xa72d\\xa72f-\\xa731" + "\\xa733\\xa735\\xa737\\xa739\\xa73b\\xa73d\\xa73f\\xa741\\xa743" + "\\xa745\\xa747\\xa749\\xa74b\\xa74d\\xa74f\\xa751\\xa753\\xa755" + "\\xa757\\xa759\\xa75b\\xa75d\\xa75f\\xa761\\xa763\\xa765\\xa767" + "\\xa769\\xa76b\\xa76d\\xa76f\\xa771-\\xa778\\xa77a\\xa77c\\xa77f" + "\\xa781\\xa783\\xa785\\xa787\\xa78c\\xa78e\\xa791\\xa793-\\xa795" + "\\xa797\\xa799\\xa79b\\xa79d\\xa79f\\xa7a1\\xa7a3\\xa7a5\\xa7a7" + "\\xa7a9\\xa7b5\\xa7b7\\xa7fa\\xab30-\\xab5a\\xab60-\\xab65" + "\\xab70-\\xabbf\\xfb00-\\xfb06\\xfb13-\\xfb17\\xff41-\\xff5a" + "\\x10428-\\x1044f\\x10cc0-\\x10cf2\\x118c0-\\x118df" + "\\x1d41a-\\x1d433\\x1d44e-\\x1d454\\x1d456-\\x1d467" + "\\x1d482-\\x1d49b\\x1d4b6-\\x1d4b9\\x1d4bb\\x1d4bd-\\x1d4c3" + "\\x1d4c5-\\x1d4cf\\x1d4ea-\\x1d503\\x1d51e-\\x1d537" + "\\x1d552-\\x1d56b\\x1d586-\\x1d59f\\x1d5ba-\\x1d5d3" + "\\x1d5ee-\\x1d607\\x1d622-\\x1d63b\\x1d656-\\x1d66f" + "\\x1d68a-\\x1d6a5\\x1d6c2-\\x1d6da\\x1d6dc-\\x1d6e1" + "\\x1d6fc-\\x1d714\\x1d716-\\x1d71b\\x1d736-\\x1d74e" + "\\x1d750-\\x1d755\\x1d770-\\x1d788\\x1d78a-\\x1d78f" + "\\x1d7aa-\\x1d7c2\\x1d7c4-\\x1d7c9\\x1d7cb]"; + } + + static const char *letter_modifier() + { + return "[\\x2b0-\\x2c1\\x2c6-\\x2d1\\x2e0-\\x2e4\\x2ec\\x2ee\\x374" + "\\x37a\\x559\\x640\\x6e5\\x6e6\\x7f4\\x7f5\\x7fa\\x81a\\x824\\x828" + "\\x971\\xe46\\xec6\\x10fc\\x17d7\\x1843\\x1aa7\\x1c78-\\x1c7d" + "\\x1d2c-\\x1d6a\\x1d78\\x1d9b-\\x1dbf\\x2071\\x207f\\x2090-\\x209c" + "\\x2c7c\\x2c7d\\x2d6f\\x2e2f\\x3005\\x3031-\\x3035\\x303b" + "\\x309d\\x309e\\x30fc-\\x30fe\\xa015\\xa4f8-\\xa4fd\\xa60c\\xa67f" + "\\xa69c\\xa69d\\xa717-\\xa71f\\xa770\\xa788\\xa7f8\\xa7f9\\xa9cf" + "\\xa9e6\\xaa70\\xaadd\\xaaf3\\xaaf4\\xab5c-\\xab5f\\xff70" + "\\xff9e\\xff9f\\x16b40-\\x16b43\\x16f93-\\x16f9f]"; + } + + static const char *letter_other() + { + return "[\\xaa\\xba\\x1bb\\x1c0-\\x1c3\\x294\\x5d0-\\x5ea\\x5f0-\\x5f2" + "\\x620-\\x63f\\x641-\\x64a\\x66e\\x66f\\x671-\\x6d3\\x6d5" + "\\x6ee\\x6ef\\x6fa-\\x6fc\\x6ff\\x710\\x712-\\x72f\\x74d-\\x7a5" + "\\x7b1\\x7ca-\\x7ea\\x800-\\x815\\x840-\\x858\\x8a0-\\x8b4" + "\\x904-\\x939\\x93d\\x950\\x958-\\x961\\x972-\\x980\\x985-\\x98c" + "\\x98f\\x990\\x993-\\x9a8\\x9aa-\\x9b0\\x9b2\\x9b6-\\x9b9\\x9bd" + "\\x9ce\\x9dc\\x9dd\\x9df-\\x9e1\\x9f0\\x9f1\\xa05-\\xa0a" + "\\xa0f\\xa10\\xa13-\\xa28\\xa2a-\\xa30\\xa32\\xa33\\xa35\\xa36" + "\\xa38\\xa39\\xa59-\\xa5c\\xa5e\\xa72-\\xa74\\xa85-\\xa8d" + "\\xa8f-\\xa91\\xa93-\\xaa8\\xaaa-\\xab0\\xab2\\xab3\\xab5-\\xab9" + "\\xabd\\xad0\\xae0\\xae1\\xaf9\\xb05-\\xb0c\\xb0f\\xb10" + "\\xb13-\\xb28\\xb2a-\\xb30\\xb32\\xb33\\xb35-\\xb39\\xb3d" + "\\xb5c\\xb5d\\xb5f-\\xb61\\xb71\\xb83\\xb85-\\xb8a\\xb8e-\\xb90" + "\\xb92-\\xb95\\xb99\\xb9a\\xb9c\\xb9e\\xb9f\\xba3\\xba4" + "\\xba8-\\xbaa\\xbae-\\xbb9\\xbd0\\xc05-\\xc0c\\xc0e-\\xc10" + "\\xc12-\\xc28\\xc2a-\\xc39\\xc3d\\xc58-\\xc5a\\xc60\\xc61" + "\\xc85-\\xc8c\\xc8e-\\xc90\\xc92-\\xca8\\xcaa-\\xcb3\\xcb5-\\xcb9" + "\\xcbd\\xcde\\xce0\\xce1\\xcf1\\xcf2\\xd05-\\xd0c\\xd0e-\\xd10" + "\\xd12-\\xd3a\\xd3d\\xd4e\\xd5f-\\xd61\\xd7a-\\xd7f\\xd85-\\xd96" + "\\xd9a-\\xdb1\\xdb3-\\xdbb\\xdbd\\xdc0-\\xdc6\\xe01-\\xe30" + "\\xe32\\xe33\\xe40-\\xe45\\xe81\\xe82\\xe84\\xe87\\xe88\\xe8a" + "\\xe8d\\xe94-\\xe97\\xe99-\\xe9f\\xea1-\\xea3\\xea5\\xea7" + "\\xeaa\\xeab\\xead-\\xeb0\\xeb2\\xeb3\\xebd\\xec0-\\xec4" + "\\xedc-\\xedf\\xf00\\xf40-\\xf47\\xf49-\\xf6c\\xf88-\\xf8c" + "\\x1000-\\x102a\\x103f\\x1050-\\x1055\\x105a-\\x105d\\x1061" + "\\x1065\\x1066\\x106e-\\x1070\\x1075-\\x1081\\x108e\\x10d0-\\x10fa" + "\\x10fd-\\x1248\\x124a-\\x124d\\x1250-\\x1256\\x1258" + "\\x125a-\\x125d\\x1260-\\x1288\\x128a-\\x128d\\x1290-\\x12b0" + "\\x12b2-\\x12b5\\x12b8-\\x12be\\x12c0\\x12c2-\\x12c5" + "\\x12c8-\\x12d6\\x12d8-\\x1310\\x1312-\\x1315\\x1318-\\x135a" + "\\x1380-\\x138f\\x1401-\\x166c\\x166f-\\x167f\\x1681-\\x169a" + "\\x16a0-\\x16ea\\x16f1-\\x16f8\\x1700-\\x170c\\x170e-\\x1711" + "\\x1720-\\x1731\\x1740-\\x1751\\x1760-\\x176c\\x176e-\\x1770" + "\\x1780-\\x17b3\\x17dc\\x1820-\\x1842\\x1844-\\x1877" + "\\x1880-\\x18a8\\x18aa\\x18b0-\\x18f5\\x1900-\\x191e" + "\\x1950-\\x196d\\x1970-\\x1974\\x1980-\\x19ab\\x19b0-\\x19c9" + "\\x1a00-\\x1a16\\x1a20-\\x1a54\\x1b05-\\x1b33\\x1b45-\\x1b4b" + "\\x1b83-\\x1ba0\\x1bae\\x1baf\\x1bba-\\x1be5\\x1c00-\\x1c23" + "\\x1c4d-\\x1c4f\\x1c5a-\\x1c77\\x1ce9-\\x1cec\\x1cee-\\x1cf1" + "\\x1cf5\\x1cf6\\x2135-\\x2138\\x2d30-\\x2d67\\x2d80-\\x2d96" + "\\x2da0-\\x2da6\\x2da8-\\x2dae\\x2db0-\\x2db6\\x2db8-\\x2dbe" + "\\x2dc0-\\x2dc6\\x2dc8-\\x2dce\\x2dd0-\\x2dd6\\x2dd8-\\x2dde" + "\\x3006\\x303c\\x3041-\\x3096\\x309f\\x30a1-\\x30fa\\x30ff" + "\\x3105-\\x312d\\x3131-\\x318e\\x31a0-\\x31ba\\x31f0-\\x31ff" + "\\x3400\\x4db5\\x4e00\\x9fd5\\xa000-\\xa014\\xa016-\\xa48c" + "\\xa4d0-\\xa4f7\\xa500-\\xa60b\\xa610-\\xa61f\\xa62a\\xa62b\\xa66e" + "\\xa6a0-\\xa6e5\\xa78f\\xa7f7\\xa7fb-\\xa801\\xa803-\\xa805" + "\\xa807-\\xa80a\\xa80c-\\xa822\\xa840-\\xa873\\xa882-\\xa8b3" + "\\xa8f2-\\xa8f7\\xa8fb\\xa8fd\\xa90a-\\xa925\\xa930-\\xa946" + "\\xa960-\\xa97c\\xa984-\\xa9b2\\xa9e0-\\xa9e4\\xa9e7-\\xa9ef" + "\\xa9fa-\\xa9fe\\xaa00-\\xaa28\\xaa40-\\xaa42\\xaa44-\\xaa4b" + "\\xaa60-\\xaa6f\\xaa71-\\xaa76\\xaa7a\\xaa7e-\\xaaaf\\xaab1" + "\\xaab5\\xaab6\\xaab9-\\xaabd\\xaac0\\xaac2\\xaadb\\xaadc" + "\\xaae0-\\xaaea\\xaaf2\\xab01-\\xab06\\xab09-\\xab0e" + "\\xab11-\\xab16\\xab20-\\xab26\\xab28-\\xab2e\\xabc0-\\xabe2" + "\\xac00\\xd7a3\\xd7b0-\\xd7c6\\xd7cb-\\xd7fb\\xf900-\\xfa6d" + "\\xfa70-\\xfad9\\xfb1d\\xfb1f-\\xfb28\\xfb2a-\\xfb36" + "\\xfb38-\\xfb3c\\xfb3e\\xfb40\\xfb41\\xfb43\\xfb44\\xfb46-\\xfbb1" + "\\xfbd3-\\xfd3d\\xfd50-\\xfd8f\\xfd92-\\xfdc7\\xfdf0-\\xfdfb" + "\\xfe70-\\xfe74\\xfe76-\\xfefc\\xff66-\\xff6f\\xff71-\\xff9d" + "\\xffa0-\\xffbe\\xffc2-\\xffc7\\xffca-\\xffcf\\xffd2-\\xffd7" + "\\xffda-\\xffdc\\x10000-\\x1000b\\x1000d-\\x10026\\x10028-\\x1003a" + "\\x1003c\\x1003d\\x1003f-\\x1004d\\x10050-\\x1005d" + "\\x10080-\\x100fa\\x10280-\\x1029c\\x102a0-\\x102d0" + "\\x10300-\\x1031f\\x10330-\\x10340\\x10342-\\x10349" + "\\x10350-\\x10375\\x10380-\\x1039d\\x103a0-\\x103c3" + "\\x103c8-\\x103cf\\x10450-\\x1049d\\x10500-\\x10527" + "\\x10530-\\x10563\\x10600-\\x10736\\x10740-\\x10755" + "\\x10760-\\x10767\\x10800-\\x10805\\x10808\\x1080a-\\x10835" + "\\x10837\\x10838\\x1083c\\x1083f-\\x10855\\x10860-\\x10876" + "\\x10880-\\x1089e\\x108e0-\\x108f2\\x108f4\\x108f5" + "\\x10900-\\x10915\\x10920-\\x10939\\x10980-\\x109b7" + "\\x109be\\x109bf\\x10a00\\x10a10-\\x10a13\\x10a15-\\x10a17" + "\\x10a19-\\x10a33\\x10a60-\\x10a7c\\x10a80-\\x10a9c" + "\\x10ac0-\\x10ac7\\x10ac9-\\x10ae4\\x10b00-\\x10b35" + "\\x10b40-\\x10b55\\x10b60-\\x10b72\\x10b80-\\x10b91" + "\\x10c00-\\x10c48\\x11003-\\x11037\\x11083-\\x110af" + "\\x110d0-\\x110e8\\x11103-\\x11126\\x11150-\\x11172\\x11176" + "\\x11183-\\x111b2\\x111c1-\\x111c4\\x111da\\x111dc" + "\\x11200-\\x11211\\x11213-\\x1122b\\x11280-\\x11286\\x11288" + "\\x1128a-\\x1128d\\x1128f-\\x1129d\\x1129f-\\x112a8" + "\\x112b0-\\x112de\\x11305-\\x1130c\\x1130f\\x11310" + "\\x11313-\\x11328\\x1132a-\\x11330\\x11332\\x11333" + "\\x11335-\\x11339\\x1133d\\x11350\\x1135d-\\x11361" + "\\x11480-\\x114af\\x114c4\\x114c5\\x114c7\\x11580-\\x115ae" + "\\x115d8-\\x115db\\x11600-\\x1162f\\x11644\\x11680-\\x116aa" + "\\x11700-\\x11719\\x118ff\\x11ac0-\\x11af8\\x12000-\\x12399" + "\\x12480-\\x12543\\x13000-\\x1342e\\x14400-\\x14646" + "\\x16800-\\x16a38\\x16a40-\\x16a5e\\x16ad0-\\x16aed" + "\\x16b00-\\x16b2f\\x16b63-\\x16b77\\x16b7d-\\x16b8f" + "\\x16f00-\\x16f44\\x16f50\\x1b000\\x1b001\\x1bc00-\\x1bc6a" + "\\x1bc70-\\x1bc7c\\x1bc80-\\x1bc88\\x1bc90-\\x1bc99" + "\\x1e800-\\x1e8c4\\x1ee00-\\x1ee03\\x1ee05-\\x1ee1f" + "\\x1ee21\\x1ee22\\x1ee24\\x1ee27\\x1ee29-\\x1ee32\\x1ee34-\\x1ee37" + "\\x1ee39\\x1ee3b\\x1ee42\\x1ee47\\x1ee49\\x1ee4b\\x1ee4d-\\x1ee4f" + "\\x1ee51\\x1ee52\\x1ee54\\x1ee57\\x1ee59\\x1ee5b\\x1ee5d\\x1ee5f" + "\\x1ee61\\x1ee62\\x1ee64\\x1ee67-\\x1ee6a\\x1ee6c-\\x1ee72" + "\\x1ee74-\\x1ee77\\x1ee79-\\x1ee7c\\x1ee7e\\x1ee80-\\x1ee89" + "\\x1ee8b-\\x1ee9b\\x1eea1-\\x1eea3\\x1eea5-\\x1eea9" + "\\x1eeab-\\x1eebb\\x20000\\x2a6d6\\x2a700\\x2b734\\x2b740\\x2b81d" + "\\x2b820\\x2cea1\\x2f800-\\x2fa1d]"; + } + + static const char *letter_titlecase() + { + return "[\\x1c5\\x1c8\\x1cb\\x1f2\\x1f88-\\x1f8f\\x1f98-\\x1f9f" + "\\x1fa8-\\x1faf\\x1fbc\\x1fcc\\x1ffc]"; + } + + static const char *letter_uppercase() + { + return "[\\x41-\\x5a\\xc0-\\xd6\\xd8-\\xde\\x100\\x102\\x104\\x106" + "\\x108\\x10a\\x10c\\x10e\\x110\\x112\\x114\\x116\\x118\\x11a\\x11c" + "\\x11e\\x120\\x122\\x124\\x126\\x128\\x12a\\x12c\\x12e\\x130\\x132" + "\\x134\\x136\\x139\\x13b\\x13d\\x13f\\x141\\x143\\x145\\x147\\x14a" + "\\x14c\\x14e\\x150\\x152\\x154\\x156\\x158\\x15a\\x15c\\x15e\\x160" + "\\x162\\x164\\x166\\x168\\x16a\\x16c\\x16e\\x170\\x172\\x174\\x176" + "\\x178\\x179\\x17b\\x17d\\x181\\x182\\x184\\x186\\x187" + "\\x189-\\x18b\\x18e-\\x191\\x193\\x194\\x196-\\x198\\x19c\\x19d" + "\\x19f\\x1a0\\x1a2\\x1a4\\x1a6\\x1a7\\x1a9\\x1ac\\x1ae\\x1af" + "\\x1b1-\\x1b3\\x1b5\\x1b7\\x1b8\\x1bc\\x1c4\\x1c7\\x1ca\\x1cd" + "\\x1cf\\x1d1\\x1d3\\x1d5\\x1d7\\x1d9\\x1db\\x1de\\x1e0\\x1e2\\x1e4" + "\\x1e6\\x1e8\\x1ea\\x1ec\\x1ee\\x1f1\\x1f4\\x1f6-\\x1f8\\x1fa" + "\\x1fc\\x1fe\\x200\\x202\\x204\\x206\\x208\\x20a\\x20c\\x20e\\x210" + "\\x212\\x214\\x216\\x218\\x21a\\x21c\\x21e\\x220\\x222\\x224\\x226" + "\\x228\\x22a\\x22c\\x22e\\x230\\x232\\x23a\\x23b\\x23d\\x23e\\x241" + "\\x243-\\x246\\x248\\x24a\\x24c\\x24e\\x370\\x372\\x376\\x37f" + "\\x386\\x388-\\x38a\\x38c\\x38e\\x38f\\x391-\\x3a1\\x3a3-\\x3ab" + "\\x3cf\\x3d2-\\x3d4\\x3d8\\x3da\\x3dc\\x3de\\x3e0\\x3e2\\x3e4" + "\\x3e6\\x3e8\\x3ea\\x3ec\\x3ee\\x3f4\\x3f7\\x3f9\\x3fa" + "\\x3fd-\\x42f\\x460\\x462\\x464\\x466\\x468\\x46a\\x46c\\x46e" + "\\x470\\x472\\x474\\x476\\x478\\x47a\\x47c\\x47e\\x480\\x48a\\x48c" + "\\x48e\\x490\\x492\\x494\\x496\\x498\\x49a\\x49c\\x49e\\x4a0\\x4a2" + "\\x4a4\\x4a6\\x4a8\\x4aa\\x4ac\\x4ae\\x4b0\\x4b2\\x4b4\\x4b6\\x4b8" + "\\x4ba\\x4bc\\x4be\\x4c0\\x4c1\\x4c3\\x4c5\\x4c7\\x4c9\\x4cb\\x4cd" + "\\x4d0\\x4d2\\x4d4\\x4d6\\x4d8\\x4da\\x4dc\\x4de\\x4e0\\x4e2\\x4e4" + "\\x4e6\\x4e8\\x4ea\\x4ec\\x4ee\\x4f0\\x4f2\\x4f4\\x4f6\\x4f8\\x4fa" + "\\x4fc\\x4fe\\x500\\x502\\x504\\x506\\x508\\x50a\\x50c\\x50e\\x510" + "\\x512\\x514\\x516\\x518\\x51a\\x51c\\x51e\\x520\\x522\\x524\\x526" + "\\x528\\x52a\\x52c\\x52e\\x531-\\x556\\x10a0-\\x10c5\\x10c7\\x10cd" + "\\x13a0-\\x13f5\\x1e00\\x1e02\\x1e04\\x1e06\\x1e08\\x1e0a\\x1e0c" + "\\x1e0e\\x1e10\\x1e12\\x1e14\\x1e16\\x1e18\\x1e1a\\x1e1c\\x1e1e" + "\\x1e20\\x1e22\\x1e24\\x1e26\\x1e28\\x1e2a\\x1e2c\\x1e2e\\x1e30" + "\\x1e32\\x1e34\\x1e36\\x1e38\\x1e3a\\x1e3c\\x1e3e\\x1e40\\x1e42" + "\\x1e44\\x1e46\\x1e48\\x1e4a\\x1e4c\\x1e4e\\x1e50\\x1e52\\x1e54" + "\\x1e56\\x1e58\\x1e5a\\x1e5c\\x1e5e\\x1e60\\x1e62\\x1e64\\x1e66" + "\\x1e68\\x1e6a\\x1e6c\\x1e6e\\x1e70\\x1e72\\x1e74\\x1e76\\x1e78" + "\\x1e7a\\x1e7c\\x1e7e\\x1e80\\x1e82\\x1e84\\x1e86\\x1e88\\x1e8a" + "\\x1e8c\\x1e8e\\x1e90\\x1e92\\x1e94\\x1e9e\\x1ea0\\x1ea2\\x1ea4" + "\\x1ea6\\x1ea8\\x1eaa\\x1eac\\x1eae\\x1eb0\\x1eb2\\x1eb4\\x1eb6" + "\\x1eb8\\x1eba\\x1ebc\\x1ebe\\x1ec0\\x1ec2\\x1ec4\\x1ec6\\x1ec8" + "\\x1eca\\x1ecc\\x1ece\\x1ed0\\x1ed2\\x1ed4\\x1ed6\\x1ed8\\x1eda" + "\\x1edc\\x1ede\\x1ee0\\x1ee2\\x1ee4\\x1ee6\\x1ee8\\x1eea\\x1eec" + "\\x1eee\\x1ef0\\x1ef2\\x1ef4\\x1ef6\\x1ef8\\x1efa\\x1efc\\x1efe" + "\\x1f08-\\x1f0f\\x1f18-\\x1f1d\\x1f28-\\x1f2f\\x1f38-\\x1f3f" + "\\x1f48-\\x1f4d\\x1f59\\x1f5b\\x1f5d\\x1f5f\\x1f68-\\x1f6f" + "\\x1fb8-\\x1fbb\\x1fc8-\\x1fcb\\x1fd8-\\x1fdb\\x1fe8-\\x1fec" + "\\x1ff8-\\x1ffb\\x2102\\x2107\\x210b-\\x210d\\x2110-\\x2112\\x2115" + "\\x2119-\\x211d\\x2124\\x2126\\x2128\\x212a-\\x212d\\x2130-\\x2133" + "\\x213e\\x213f\\x2145\\x2183\\x2c00-\\x2c2e\\x2c60\\x2c62-\\x2c64" + "\\x2c67\\x2c69\\x2c6b\\x2c6d-\\x2c70\\x2c72\\x2c75\\x2c7e-\\x2c80" + "\\x2c82\\x2c84\\x2c86\\x2c88\\x2c8a\\x2c8c\\x2c8e\\x2c90\\x2c92" + "\\x2c94\\x2c96\\x2c98\\x2c9a\\x2c9c\\x2c9e\\x2ca0\\x2ca2\\x2ca4" + "\\x2ca6\\x2ca8\\x2caa\\x2cac\\x2cae\\x2cb0\\x2cb2\\x2cb4\\x2cb6" + "\\x2cb8\\x2cba\\x2cbc\\x2cbe\\x2cc0\\x2cc2\\x2cc4\\x2cc6\\x2cc8" + "\\x2cca\\x2ccc\\x2cce\\x2cd0\\x2cd2\\x2cd4\\x2cd6\\x2cd8\\x2cda" + "\\x2cdc\\x2cde\\x2ce0\\x2ce2\\x2ceb\\x2ced\\x2cf2\\xa640\\xa642" + "\\xa644\\xa646\\xa648\\xa64a\\xa64c\\xa64e\\xa650\\xa652\\xa654" + "\\xa656\\xa658\\xa65a\\xa65c\\xa65e\\xa660\\xa662\\xa664\\xa666" + "\\xa668\\xa66a\\xa66c\\xa680\\xa682\\xa684\\xa686\\xa688\\xa68a" + "\\xa68c\\xa68e\\xa690\\xa692\\xa694\\xa696\\xa698\\xa69a\\xa722" + "\\xa724\\xa726\\xa728\\xa72a\\xa72c\\xa72e\\xa732\\xa734\\xa736" + "\\xa738\\xa73a\\xa73c\\xa73e\\xa740\\xa742\\xa744\\xa746\\xa748" + "\\xa74a\\xa74c\\xa74e\\xa750\\xa752\\xa754\\xa756\\xa758\\xa75a" + "\\xa75c\\xa75e\\xa760\\xa762\\xa764\\xa766\\xa768\\xa76a\\xa76c" + "\\xa76e\\xa779\\xa77b\\xa77d\\xa77e\\xa780\\xa782\\xa784\\xa786" + "\\xa78b\\xa78d\\xa790\\xa792\\xa796\\xa798\\xa79a\\xa79c\\xa79e" + "\\xa7a0\\xa7a2\\xa7a4\\xa7a6\\xa7a8\\xa7aa-\\xa7ad\\xa7b0-\\xa7b4" + "\\xa7b6\\xff21-\\xff3a\\x10400-\\x10427\\x10c80-\\x10cb2" + "\\x118a0-\\x118bf\\x1d400-\\x1d419\\x1d434-\\x1d44d" + "\\x1d468-\\x1d481\\x1d49c\\x1d49e\\x1d49f\\x1d4a2\\x1d4a5\\x1d4a6" + "\\x1d4a9-\\x1d4ac\\x1d4ae-\\x1d4b5\\x1d4d0-\\x1d4e9" + "\\x1d504\\x1d505\\x1d507-\\x1d50a\\x1d50d-\\x1d514" + "\\x1d516-\\x1d51c\\x1d538\\x1d539\\x1d53b-\\x1d53e" + "\\x1d540-\\x1d544\\x1d546\\x1d54a-\\x1d550\\x1d56c-\\x1d585" + "\\x1d5a0-\\x1d5b9\\x1d5d4-\\x1d5ed\\x1d608-\\x1d621" + "\\x1d63c-\\x1d655\\x1d670-\\x1d689\\x1d6a8-\\x1d6c0" + "\\x1d6e2-\\x1d6fa\\x1d71c-\\x1d734\\x1d756-\\x1d76e" + "\\x1d790-\\x1d7a8\\x1d7ca]"; + } + + static const char *mark_combining() + { + return "[\\x903\\x93b\\x93e-\\x940\\x949-\\x94c\\x94e\\x94f\\x982\\x983" + "\\x9be-\\x9c0\\x9c7\\x9c8\\x9cb\\x9cc\\x9d7\\xa03\\xa3e-\\xa40" + "\\xa83\\xabe-\\xac0\\xac9\\xacb\\xacc\\xb02\\xb03\\xb3e\\xb40" + "\\xb47\\xb48\\xb4b\\xb4c\\xb57\\xbbe\\xbbf\\xbc1\\xbc2" + "\\xbc6-\\xbc8\\xbca-\\xbcc\\xbd7\\xc01-\\xc03\\xc41-\\xc44" + "\\xc82\\xc83\\xcbe\\xcc0-\\xcc4\\xcc7\\xcc8\\xcca\\xccb" + "\\xcd5\\xcd6\\xd02\\xd03\\xd3e-\\xd40\\xd46-\\xd48\\xd4a-\\xd4c" + "\\xd57\\xd82\\xd83\\xdcf-\\xdd1\\xdd8-\\xddf\\xdf2\\xdf3" + "\\xf3e\\xf3f\\xf7f\\x102b\\x102c\\x1031\\x1038\\x103b\\x103c" + "\\x1056\\x1057\\x1062-\\x1064\\x1067-\\x106d\\x1083\\x1084" + "\\x1087-\\x108c\\x108f\\x109a-\\x109c\\x17b6\\x17be-\\x17c5" + "\\x17c7\\x17c8\\x1923-\\x1926\\x1929-\\x192b\\x1930\\x1931" + "\\x1933-\\x1938\\x1a19\\x1a1a\\x1a55\\x1a57\\x1a61\\x1a63\\x1a64" + "\\x1a6d-\\x1a72\\x1b04\\x1b35\\x1b3b\\x1b3d-\\x1b41\\x1b43\\x1b44" + "\\x1b82\\x1ba1\\x1ba6\\x1ba7\\x1baa\\x1be7\\x1bea-\\x1bec\\x1bee" + "\\x1bf2\\x1bf3\\x1c24-\\x1c2b\\x1c34\\x1c35\\x1ce1\\x1cf2\\x1cf3" + "\\x302e\\x302f\\xa823\\xa824\\xa827\\xa880\\xa881\\xa8b4-\\xa8c3" + "\\xa952\\xa953\\xa983\\xa9b4\\xa9b5\\xa9ba\\xa9bb\\xa9bd-\\xa9c0" + "\\xaa2f\\xaa30\\xaa33\\xaa34\\xaa4d\\xaa7b\\xaa7d\\xaaeb" + "\\xaaee\\xaaef\\xaaf5\\xabe3\\xabe4\\xabe6\\xabe7\\xabe9\\xabea" + "\\xabec\\x11000\\x11002\\x11082\\x110b0-\\x110b2\\x110b7\\x110b8" + "\\x1112c\\x11182\\x111b3-\\x111b5\\x111bf\\x111c0\\x1122c-\\x1122e" + "\\x11232\\x11233\\x11235\\x112e0-\\x112e2\\x11302\\x11303" + "\\x1133e\\x1133f\\x11341-\\x11344\\x11347\\x11348\\x1134b-\\x1134d" + "\\x11357\\x11362\\x11363\\x114b0-\\x114b2\\x114b9\\x114bb-\\x114be" + "\\x114c1\\x115af-\\x115b1\\x115b8-\\x115bb\\x115be" + "\\x11630-\\x11632\\x1163b\\x1163c\\x1163e\\x116ac\\x116ae\\x116af" + "\\x116b6\\x11720\\x11721\\x11726\\x16f51-\\x16f7e\\x1d165\\x1d166" + "\\x1d16d-\\x1d172]"; + } + + static const char *mark_enclosing() + { + return "[\\x488\\x489\\x1abe\\x20dd-\\x20e0\\x20e2-\\x20e4" + "\\xa670-\\xa672]"; + } + + static const char *mark_nonspacing() + { + return "[\\x300-\\x36f\\x483-\\x487\\x591-\\x5bd\\x5bf\\x5c1\\x5c2" + "\\x5c4\\x5c5\\x5c7\\x610-\\x61a\\x64b-\\x65f\\x670\\x6d6-\\x6dc" + "\\x6df-\\x6e4\\x6e7\\x6e8\\x6ea-\\x6ed\\x711\\x730-\\x74a" + "\\x7a6-\\x7b0\\x7eb-\\x7f3\\x816-\\x819\\x81b-\\x823\\x825-\\x827" + "\\x829-\\x82d\\x859-\\x85b\\x8e3-\\x902\\x93a\\x93c\\x941-\\x948" + "\\x94d\\x951-\\x957\\x962\\x963\\x981\\x9bc\\x9c1-\\x9c4\\x9cd" + "\\x9e2\\x9e3\\xa01\\xa02\\xa3c\\xa41\\xa42\\xa47\\xa48" + "\\xa4b-\\xa4d\\xa51\\xa70\\xa71\\xa75\\xa81\\xa82\\xabc" + "\\xac1-\\xac5\\xac7\\xac8\\xacd\\xae2\\xae3\\xb01\\xb3c\\xb3f" + "\\xb41-\\xb44\\xb4d\\xb56\\xb62\\xb63\\xb82\\xbc0\\xbcd\\xc00" + "\\xc3e-\\xc40\\xc46-\\xc48\\xc4a-\\xc4d\\xc55\\xc56\\xc62\\xc63" + "\\xc81\\xcbc\\xcbf\\xcc6\\xccc\\xccd\\xce2\\xce3\\xd01" + "\\xd41-\\xd44\\xd4d\\xd62\\xd63\\xdca\\xdd2-\\xdd4\\xdd6\\xe31" + "\\xe34-\\xe3a\\xe47-\\xe4e\\xeb1\\xeb4-\\xeb9\\xebb\\xebc" + "\\xec8-\\xecd\\xf18\\xf19\\xf35\\xf37\\xf39\\xf71-\\xf7e" + "\\xf80-\\xf84\\xf86\\xf87\\xf8d-\\xf97\\xf99-\\xfbc\\xfc6" + "\\x102d-\\x1030\\x1032-\\x1037\\x1039\\x103a\\x103d\\x103e" + "\\x1058\\x1059\\x105e-\\x1060\\x1071-\\x1074\\x1082\\x1085\\x1086" + "\\x108d\\x109d\\x135d-\\x135f\\x1712-\\x1714\\x1732-\\x1734" + "\\x1752\\x1753\\x1772\\x1773\\x17b4\\x17b5\\x17b7-\\x17bd\\x17c6" + "\\x17c9-\\x17d3\\x17dd\\x180b-\\x180d\\x18a9\\x1920-\\x1922" + "\\x1927\\x1928\\x1932\\x1939-\\x193b\\x1a17\\x1a18\\x1a1b\\x1a56" + "\\x1a58-\\x1a5e\\x1a60\\x1a62\\x1a65-\\x1a6c\\x1a73-\\x1a7c\\x1a7f" + "\\x1ab0-\\x1abd\\x1b00-\\x1b03\\x1b34\\x1b36-\\x1b3a\\x1b3c\\x1b42" + "\\x1b6b-\\x1b73\\x1b80\\x1b81\\x1ba2-\\x1ba5\\x1ba8\\x1ba9" + "\\x1bab-\\x1bad\\x1be6\\x1be8\\x1be9\\x1bed\\x1bef-\\x1bf1" + "\\x1c2c-\\x1c33\\x1c36\\x1c37\\x1cd0-\\x1cd2\\x1cd4-\\x1ce0" + "\\x1ce2-\\x1ce8\\x1ced\\x1cf4\\x1cf8\\x1cf9\\x1dc0-\\x1df5" + "\\x1dfc-\\x1dff\\x20d0-\\x20dc\\x20e1\\x20e5-\\x20f0" + "\\x2cef-\\x2cf1\\x2d7f\\x2de0-\\x2dff\\x302a-\\x302d\\x3099\\x309a" + "\\xa66f\\xa674-\\xa67d\\xa69e\\xa69f\\xa6f0\\xa6f1\\xa802\\xa806" + "\\xa80b\\xa825\\xa826\\xa8c4\\xa8e0-\\xa8f1\\xa926-\\xa92d" + "\\xa947-\\xa951\\xa980-\\xa982\\xa9b3\\xa9b6-\\xa9b9\\xa9bc\\xa9e5" + "\\xaa29-\\xaa2e\\xaa31\\xaa32\\xaa35\\xaa36\\xaa43\\xaa4c\\xaa7c" + "\\xaab0\\xaab2-\\xaab4\\xaab7\\xaab8\\xaabe\\xaabf\\xaac1" + "\\xaaec\\xaaed\\xaaf6\\xabe5\\xabe8\\xabed\\xfb1e\\xfe00-\\xfe0f" + "\\xfe20-\\xfe2f\\x101fd\\x102e0\\x10376-\\x1037a\\x10a01-\\x10a03" + "\\x10a05\\x10a06\\x10a0c-\\x10a0f\\x10a38-\\x10a3a\\x10a3f" + "\\x10ae5\\x10ae6\\x11001\\x11038-\\x11046\\x1107f-\\x11081" + "\\x110b3-\\x110b6\\x110b9\\x110ba\\x11100-\\x11102" + "\\x11127-\\x1112b\\x1112d-\\x11134\\x11173\\x11180\\x11181" + "\\x111b6-\\x111be\\x111ca-\\x111cc\\x1122f-\\x11231\\x11234" + "\\x11236\\x11237\\x112df\\x112e3-\\x112ea\\x11300\\x11301\\x1133c" + "\\x11340\\x11366-\\x1136c\\x11370-\\x11374\\x114b3-\\x114b8" + "\\x114ba\\x114bf\\x114c0\\x114c2\\x114c3\\x115b2-\\x115b5" + "\\x115bc\\x115bd\\x115bf\\x115c0\\x115dc\\x115dd\\x11633-\\x1163a" + "\\x1163d\\x1163f\\x11640\\x116ab\\x116ad\\x116b0-\\x116b5\\x116b7" + "\\x1171d-\\x1171f\\x11722-\\x11725\\x11727-\\x1172b" + "\\x16af0-\\x16af4\\x16b30-\\x16b36\\x16f8f-\\x16f92" + "\\x1bc9d\\x1bc9e\\x1d167-\\x1d169\\x1d17b-\\x1d182" + "\\x1d185-\\x1d18b\\x1d1aa-\\x1d1ad\\x1d242-\\x1d244" + "\\x1da00-\\x1da36\\x1da3b-\\x1da6c\\x1da75\\x1da84" + "\\x1da9b-\\x1da9f\\x1daa1-\\x1daaf\\x1e8d0-\\x1e8d6" + "\\xe0100-\\xe01ef]"; + } + + static const char *number_decimal() + { + return "[\\x30-\\x39\\x660-\\x669\\x6f0-\\x6f9\\x7c0-\\x7c9" + "\\x966-\\x96f\\x9e6-\\x9ef\\xa66-\\xa6f\\xae6-\\xaef\\xb66-\\xb6f" + "\\xbe6-\\xbef\\xc66-\\xc6f\\xce6-\\xcef\\xd66-\\xd6f\\xde6-\\xdef" + "\\xe50-\\xe59\\xed0-\\xed9\\xf20-\\xf29\\x1040-\\x1049" + "\\x1090-\\x1099\\x17e0-\\x17e9\\x1810-\\x1819\\x1946-\\x194f" + "\\x19d0-\\x19d9\\x1a80-\\x1a89\\x1a90-\\x1a99\\x1b50-\\x1b59" + "\\x1bb0-\\x1bb9\\x1c40-\\x1c49\\x1c50-\\x1c59\\xa620-\\xa629" + "\\xa8d0-\\xa8d9\\xa900-\\xa909\\xa9d0-\\xa9d9\\xa9f0-\\xa9f9" + "\\xaa50-\\xaa59\\xabf0-\\xabf9\\xff10-\\xff19\\x104a0-\\x104a9" + "\\x11066-\\x1106f\\x110f0-\\x110f9\\x11136-\\x1113f" + "\\x111d0-\\x111d9\\x112f0-\\x112f9\\x114d0-\\x114d9" + "\\x11650-\\x11659\\x116c0-\\x116c9\\x11730-\\x11739" + "\\x118e0-\\x118e9\\x16a60-\\x16a69\\x16b50-\\x16b59" + "\\x1d7ce-\\x1d7ff]"; + } + + static const char *number_letter() + { + return "[\\x16ee-\\x16f0\\x2160-\\x2182\\x2185-\\x2188\\x3007" + "\\x3021-\\x3029\\x3038-\\x303a\\xa6e6-\\xa6ef\\x10140-\\x10174" + "\\x10341\\x1034a\\x103d1-\\x103d5\\x12400-\\x1246e]"; + } + + static const char *number_other() + { + return "[\\xb2\\xb3\\xb9\\xbc-\\xbe\\x9f4-\\x9f9\\xb72-\\xb77" + "\\xbf0-\\xbf2\\xc78-\\xc7e\\xd70-\\xd75\\xf2a-\\xf33" + "\\x1369-\\x137c\\x17f0-\\x17f9\\x19da\\x2070\\x2074-\\x2079" + "\\x2080-\\x2089\\x2150-\\x215f\\x2189\\x2460-\\x249b" + "\\x24ea-\\x24ff\\x2776-\\x2793\\x2cfd\\x3192-\\x3195" + "\\x3220-\\x3229\\x3248-\\x324f\\x3251-\\x325f\\x3280-\\x3289" + "\\x32b1-\\x32bf\\xa830-\\xa835\\x10107-\\x10133\\x10175-\\x10178" + "\\x1018a\\x1018b\\x102e1-\\x102fb\\x10320-\\x10323" + "\\x10858-\\x1085f\\x10879-\\x1087f\\x108a7-\\x108af" + "\\x108fb-\\x108ff\\x10916-\\x1091b\\x109bc\\x109bd" + "\\x109c0-\\x109cf\\x109d2-\\x109ff\\x10a40-\\x10a47" + "\\x10a7d\\x10a7e\\x10a9d-\\x10a9f\\x10aeb-\\x10aef" + "\\x10b58-\\x10b5f\\x10b78-\\x10b7f\\x10ba9-\\x10baf" + "\\x10cfa-\\x10cff\\x10e60-\\x10e7e\\x11052-\\x11065" + "\\x111e1-\\x111f4\\x1173a\\x1173b\\x118ea-\\x118f2" + "\\x16b5b-\\x16b61\\x1d360-\\x1d371\\x1e8c7-\\x1e8cf" + "\\x1f100-\\x1f10c]"; + } + + static const char *punctuation_connector() + { + return "[\\x5f\\x203f\\x2040\\x2054\\xfe33\\xfe34\\xfe4d-\\xfe4f" + "\\xff3f]"; + } + + static const char *punctuation_dash() + { + return "[\\x2d\\x58a\\x5be\\x1400\\x1806\\x2010-\\x2015\\x2e17\\x2e1a" + "\\x2e3a\\x2e3b\\x2e40\\x301c\\x3030\\x30a0\\xfe31\\xfe32\\xfe58" + "\\xfe63\\xff0d]"; + } + + static const char *punctuation_close() + { + return "[\\x29\\x5d\\x7d\\xf3b\\xf3d\\x169c\\x2046\\x207e\\x208e\\x2309" + "\\x230b\\x232a\\x2769\\x276b\\x276d\\x276f\\x2771\\x2773\\x2775" + "\\x27c6\\x27e7\\x27e9\\x27eb\\x27ed\\x27ef\\x2984\\x2986\\x2988" + "\\x298a\\x298c\\x298e\\x2990\\x2992\\x2994\\x2996\\x2998\\x29d9" + "\\x29db\\x29fd\\x2e23\\x2e25\\x2e27\\x2e29\\x3009\\x300b\\x300d" + "\\x300f\\x3011\\x3015\\x3017\\x3019\\x301b\\x301e\\x301f\\xfd3e" + "\\xfe18\\xfe36\\xfe38\\xfe3a\\xfe3c\\xfe3e\\xfe40\\xfe42\\xfe44" + "\\xfe48\\xfe5a\\xfe5c\\xfe5e\\xff09\\xff3d\\xff5d\\xff60\\xff63]"; + } + + static const char *punctuation_final() + { + return "[\\xbb\\x2019\\x201d\\x203a\\x2e03\\x2e05\\x2e0a\\x2e0d\\x2e1d" + "\\x2e21]"; + } + + static const char *punctuation_initial() + { + return "[\\xab\\x2018\\x201b\\x201c\\x201f\\x2039\\x2e02\\x2e04\\x2e09" + "\\x2e0c\\x2e1c\\x2e20]"; + } + + static const char *punctuation_other() + { + return "[\\x21-\\x23\\x25-\\x27\\x2a\\x2c\\x2e\\x2f\\x3a\\x3b\\x3f\\x40" + "\\x5c\\xa1\\xa7\\xb6\\xb7\\xbf\\x37e\\x387\\x55a-\\x55f\\x589" + "\\x5c0\\x5c3\\x5c6\\x5f3\\x5f4\\x609\\x60a\\x60c\\x60d\\x61b" + "\\x61e\\x61f\\x66a-\\x66d\\x6d4\\x700-\\x70d\\x7f7-\\x7f9" + "\\x830-\\x83e\\x85e\\x964\\x965\\x970\\xaf0\\xdf4\\xe4f" + "\\xe5a\\xe5b\\xf04-\\xf12\\xf14\\xf85\\xfd0-\\xfd4\\xfd9\\xfda" + "\\x104a-\\x104f\\x10fb\\x1360-\\x1368\\x166d\\x166e\\x16eb-\\x16ed" + "\\x1735\\x1736\\x17d4-\\x17d6\\x17d8-\\x17da\\x1800-\\x1805" + "\\x1807-\\x180a\\x1944\\x1945\\x1a1e\\x1a1f\\x1aa0-\\x1aa6" + "\\x1aa8-\\x1aad\\x1b5a-\\x1b60\\x1bfc-\\x1bff\\x1c3b-\\x1c3f" + "\\x1c7e\\x1c7f\\x1cc0-\\x1cc7\\x1cd3\\x2016\\x2017\\x2020-\\x2027" + "\\x2030-\\x2038\\x203b-\\x203e\\x2041-\\x2043\\x2047-\\x2051" + "\\x2053\\x2055-\\x205e\\x2cf9-\\x2cfc\\x2cfe\\x2cff\\x2d70" + "\\x2e00\\x2e01\\x2e06-\\x2e08\\x2e0b\\x2e0e-\\x2e16\\x2e18\\x2e19" + "\\x2e1b\\x2e1e\\x2e1f\\x2e2a-\\x2e2e\\x2e30-\\x2e39\\x2e3c-\\x2e3f" + "\\x2e41\\x3001-\\x3003\\x303d\\x30fb\\xa4fe\\xa4ff\\xa60d-\\xa60f" + "\\xa673\\xa67e\\xa6f2-\\xa6f7\\xa874-\\xa877\\xa8ce\\xa8cf" + "\\xa8f8-\\xa8fa\\xa8fc\\xa92e\\xa92f\\xa95f\\xa9c1-\\xa9cd" + "\\xa9de\\xa9df\\xaa5c-\\xaa5f\\xaade\\xaadf\\xaaf0\\xaaf1\\xabeb" + "\\xfe10-\\xfe16\\xfe19\\xfe30\\xfe45\\xfe46\\xfe49-\\xfe4c" + "\\xfe50-\\xfe52\\xfe54-\\xfe57\\xfe5f-\\xfe61\\xfe68\\xfe6a\\xfe6b" + "\\xff01-\\xff03\\xff05-\\xff07\\xff0a\\xff0c\\xff0e\\xff0f" + "\\xff1a\\xff1b\\xff1f\\xff20\\xff3c\\xff61\\xff64\\xff65" + "\\x10100-\\x10102\\x1039f\\x103d0\\x1056f\\x10857\\x1091f\\x1093f" + "\\x10a50-\\x10a58\\x10a7f\\x10af0-\\x10af6\\x10b39-\\x10b3f" + "\\x10b99-\\x10b9c\\x11047-\\x1104d\\x110bb\\x110bc" + "\\x110be-\\x110c1\\x11140-\\x11143\\x11174\\x11175" + "\\x111c5-\\x111c9\\x111cd\\x111db\\x111dd-\\x111df" + "\\x11238-\\x1123d\\x112a9\\x114c6\\x115c1-\\x115d7" + "\\x11641-\\x11643\\x1173c-\\x1173e\\x12470-\\x12474" + "\\x16a6e\\x16a6f\\x16af5\\x16b37-\\x16b3b\\x16b44\\x1bc9f" + "\\x1da87-\\x1da8b]"; + } + + static const char *punctuation_open() + { + return "[\\x28\\x5b\\x7b\\xf3a\\xf3c\\x169b\\x201a\\x201e\\x2045\\x207d" + "\\x208d\\x2308\\x230a\\x2329\\x2768\\x276a\\x276c\\x276e\\x2770" + "\\x2772\\x2774\\x27c5\\x27e6\\x27e8\\x27ea\\x27ec\\x27ee\\x2983" + "\\x2985\\x2987\\x2989\\x298b\\x298d\\x298f\\x2991\\x2993\\x2995" + "\\x2997\\x29d8\\x29da\\x29fc\\x2e22\\x2e24\\x2e26\\x2e28\\x2e42" + "\\x3008\\x300a\\x300c\\x300e\\x3010\\x3014\\x3016\\x3018\\x301a" + "\\x301d\\xfd3f\\xfe17\\xfe35\\xfe37\\xfe39\\xfe3b\\xfe3d\\xfe3f" + "\\xfe41\\xfe43\\xfe47\\xfe59\\xfe5b\\xfe5d\\xff08\\xff3b\\xff5b" + "\\xff5f\\xff62]"; + } + + static const char *symbol_currency() + { + return "[\\x24\\xa2-\\xa5\\x58f\\x60b\\x9f2\\x9f3\\x9fb\\xaf1\\xbf9" + "\\xe3f\\x17db\\x20a0-\\x20be\\xa838\\xfdfc\\xfe69\\xff04" + "\\xffe0\\xffe1\\xffe5\\xffe6]"; + } + + static const char *symbol_modifier() + { + return "[\\x5e\\x60\\xa8\\xaf\\xb4\\xb8\\x2c2-\\x2c5\\x2d2-\\x2df" + "\\x2e5-\\x2eb\\x2ed\\x2ef-\\x2ff\\x375\\x384\\x385\\x1fbd" + "\\x1fbf-\\x1fc1\\x1fcd-\\x1fcf\\x1fdd-\\x1fdf\\x1fed-\\x1fef" + "\\x1ffd\\x1ffe\\x309b\\x309c\\xa700-\\xa716\\xa720\\xa721" + "\\xa789\\xa78a\\xab5b\\xfbb2-\\xfbc1\\xff3e\\xff40\\xffe3" + "\\x1f3fb-\\x1f3ff]"; + } + + static const char *symbol_math() + { + return "[\\x2b\\x3c-\\x3e\\x7c\\x7e\\xac\\xb1\\xd7\\xf7\\x3f6" + "\\x606-\\x608\\x2044\\x2052\\x207a-\\x207c\\x208a-\\x208c\\x2118" + "\\x2140-\\x2144\\x214b\\x2190-\\x2194\\x219a\\x219b\\x21a0\\x21a3" + "\\x21a6\\x21ae\\x21ce\\x21cf\\x21d2\\x21d4\\x21f4-\\x22ff" + "\\x2320\\x2321\\x237c\\x239b-\\x23b3\\x23dc-\\x23e1\\x25b7\\x25c1" + "\\x25f8-\\x25ff\\x266f\\x27c0-\\x27c4\\x27c7-\\x27e5" + "\\x27f0-\\x27ff\\x2900-\\x2982\\x2999-\\x29d7\\x29dc-\\x29fb" + "\\x29fe-\\x2aff\\x2b30-\\x2b44\\x2b47-\\x2b4c\\xfb29\\xfe62" + "\\xfe64-\\xfe66\\xff0b\\xff1c-\\xff1e\\xff5c\\xff5e\\xffe2" + "\\xffe9-\\xffec\\x1d6c1\\x1d6db\\x1d6fb\\x1d715\\x1d735\\x1d74f" + "\\x1d76f\\x1d789\\x1d7a9\\x1d7c3\\x1eef0\\x1eef1]"; + } + + static const char *symbol_other() + { + return "[\\xa6\\xa9\\xae\\xb0\\x482\\x58d\\x58e\\x60e\\x60f\\x6de\\x6e9" + "\\x6fd\\x6fe\\x7f6\\x9fa\\xb70\\xbf3-\\xbf8\\xbfa\\xc7f\\xd79" + "\\xf01-\\xf03\\xf13\\xf15-\\xf17\\xf1a-\\xf1f\\xf34\\xf36\\xf38" + "\\xfbe-\\xfc5\\xfc7-\\xfcc\\xfce\\xfcf\\xfd5-\\xfd8\\x109e\\x109f" + "\\x1390-\\x1399\\x1940\\x19de-\\x19ff\\x1b61-\\x1b6a" + "\\x1b74-\\x1b7c\\x2100\\x2101\\x2103-\\x2106\\x2108\\x2109\\x2114" + "\\x2116\\x2117\\x211e-\\x2123\\x2125\\x2127\\x2129\\x212e" + "\\x213a\\x213b\\x214a\\x214c\\x214d\\x214f\\x218a\\x218b" + "\\x2195-\\x2199\\x219c-\\x219f\\x21a1\\x21a2\\x21a4\\x21a5" + "\\x21a7-\\x21ad\\x21af-\\x21cd\\x21d0\\x21d1\\x21d3\\x21d5-\\x21f3" + "\\x2300-\\x2307\\x230c-\\x231f\\x2322-\\x2328\\x232b-\\x237b" + "\\x237d-\\x239a\\x23b4-\\x23db\\x23e2-\\x23fa\\x2400-\\x2426" + "\\x2440-\\x244a\\x249c-\\x24e9\\x2500-\\x25b6\\x25b8-\\x25c0" + "\\x25c2-\\x25f7\\x2600-\\x266e\\x2670-\\x2767\\x2794-\\x27bf" + "\\x2800-\\x28ff\\x2b00-\\x2b2f\\x2b45\\x2b46\\x2b4d-\\x2b73" + "\\x2b76-\\x2b95\\x2b98-\\x2bb9\\x2bbd-\\x2bc8\\x2bca-\\x2bd1" + "\\x2bec-\\x2bef\\x2ce5-\\x2cea\\x2e80-\\x2e99\\x2e9b-\\x2ef3" + "\\x2f00-\\x2fd5\\x2ff0-\\x2ffb\\x3004\\x3012\\x3013\\x3020" + "\\x3036\\x3037\\x303e\\x303f\\x3190\\x3191\\x3196-\\x319f" + "\\x31c0-\\x31e3\\x3200-\\x321e\\x322a-\\x3247\\x3250" + "\\x3260-\\x327f\\x328a-\\x32b0\\x32c0-\\x32fe\\x3300-\\x33ff" + "\\x4dc0-\\x4dff\\xa490-\\xa4c6\\xa828-\\xa82b\\xa836\\xa837\\xa839" + "\\xaa77-\\xaa79\\xfdfd\\xffe4\\xffe8\\xffed\\xffee\\xfffc\\xfffd" + "\\x10137-\\x1013f\\x10179-\\x10189\\x1018c\\x10190-\\x1019b" + "\\x101a0\\x101d0-\\x101fc\\x10877\\x10878\\x10ac8\\x1173f" + "\\x16b3c-\\x16b3f\\x16b45\\x1bc9c\\x1d000-\\x1d0f5" + "\\x1d100-\\x1d126\\x1d129-\\x1d164\\x1d16a-\\x1d16c" + "\\x1d183\\x1d184\\x1d18c-\\x1d1a9\\x1d1ae-\\x1d1e8" + "\\x1d200-\\x1d241\\x1d245\\x1d300-\\x1d356\\x1d800-\\x1d9ff" + "\\x1da37-\\x1da3a\\x1da6d-\\x1da74\\x1da76-\\x1da83" + "\\x1da85\\x1da86\\x1f000-\\x1f02b\\x1f030-\\x1f093" + "\\x1f0a0-\\x1f0ae\\x1f0b1-\\x1f0bf\\x1f0c1-\\x1f0cf" + "\\x1f0d1-\\x1f0f5\\x1f110-\\x1f12e\\x1f130-\\x1f16b" + "\\x1f170-\\x1f19a\\x1f1e6-\\x1f202\\x1f210-\\x1f23a" + "\\x1f240-\\x1f248\\x1f250\\x1f251\\x1f300-\\x1f3fa" + "\\x1f400-\\x1f579\\x1f57b-\\x1f5a3\\x1f5a5-\\x1f6d0" + "\\x1f6e0-\\x1f6ec\\x1f6f0-\\x1f6f3\\x1f700-\\x1f773" + "\\x1f780-\\x1f7d4\\x1f800-\\x1f80b\\x1f810-\\x1f847" + "\\x1f850-\\x1f859\\x1f860-\\x1f887\\x1f890-\\x1f8ad" + "\\x1f910-\\x1f918\\x1f980-\\x1f984\\x1f9c0]"; + } + + static const char *separator_line() + { + return "[\\x2028]"; + } + + static const char *separator_paragraph() + { + return "[\\x2029]"; + } + + static const char *separator_space() + { + return "[\\x20\\xa0\\x1680\\x2000-\\x200a\\x202f\\x205f\\x3000]"; + } + + template + static input_char_type decode_octal(state_type &state_) + { + std::size_t oct_ = 0; + auto ch_ = *state_._curr; + unsigned short count_ = 3; + bool eos_ = false; + + for (;;) + { + oct_ *= 8; + oct_ += ch_ - '0'; + --count_; + state_.increment(); + eos_ = state_.eos(); + + if (!count_ || eos_) break; + + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (ch_ < '0' || ch_ > '7') + { + break; + } + } + + if (oct_ > static_cast(char_traits::max_val())) + { + std::ostringstream ss_; + + ss_ << "Escape \\" << std::oct << oct_ << + " is too big for the state machine char type " + "preceding index " << std::dec << state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + return static_cast(oct_); + } + + template + static input_char_type decode_control_char(state_type &state_) + { + // Skip over 'c' + state_.increment(); + + typename state_type::char_type ch_ = 0; + bool eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\c"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + else + { + if (ch_ >= 'a' && ch_ <= 'z') + { + ch_ -= 'a' - 1; + } + else if (ch_ >= 'A' && ch_ <= 'Z') + { + ch_ -= 'A' - 1; + } + else if (ch_ == '@') + { + // Apparently... + ch_ = 0; + } + else + { + std::ostringstream ss_; + + ss_ << "Invalid control char at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + + return ch_; + } + + template + static input_char_type decode_hex(state_type &state_) + { + // Skip over 'x' + state_.increment(); + + typename state_type::char_type ch_ = 0; + bool eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following \\x"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || + (ch_ >= 'A' && ch_ <= 'F'))) + { + std::ostringstream ss_; + + ss_ << "Illegal char following \\x at index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + std::size_t hex_ = 0; + + do + { + hex_ *= 16; + + if (ch_ >= '0' && ch_ <= '9') + { + hex_ += ch_ - '0'; + } + else if (ch_ >= 'a' && ch_ <= 'f') + { + hex_ += 10 + (ch_ - 'a'); + } + else + { + hex_ += 10 + (ch_ - 'A'); + } + + eos_ = state_.eos(); + + if (!eos_) + { + ch_ = *state_._curr; + + // Don't consume invalid chars! + if (((ch_ >= '0' && ch_ <= '9') || + (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) + { + state_.increment(); + } + else + { + eos_ = true; + } + } + } while (!eos_); + + if (hex_ > static_cast(char_traits::max_val())) + { + std::ostringstream ss_; + + ss_ << "Escape \\x" << std::hex << hex_ << + " is too big for the state machine char type " << + "preceding index " << + std::dec << state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + return static_cast(hex_); + } + + template + static void charset_range(const bool chset_, state_type &state_, + bool &eos_, typename state_type::char_type &ch_, + const input_char_type prev_, string_token &chars_) + { + if (chset_) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form start of range preceding " + "index " << state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + eos_ = state_.next(ch_); + + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " following '-'"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + input_char_type curr_ = 0; + + if (ch_ == '\\') + { + std::size_t str_len_ = 0; + + if (escape_sequence(state_, curr_, str_len_)) + { + std::ostringstream ss_; + + ss_ << "Charset cannot form end of range preceding index " + << state_.index(); + state_.error(ss_); + throw runtime_error(ss_.str()); + } + } + else if (ch_ == '[' && !state_.eos() && *state_._curr == ':') + { + std::ostringstream ss_; + + ss_ << "POSIX char class cannot form end of range at " + "index " << state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + else + { + curr_ = ch_; + } + + eos_ = state_.next(ch_); + + // Covers preceding if and else + if (eos_) + { + std::ostringstream ss_; + + // Pointless returning index if at end of string + state_.unexpected_end(ss_); + ss_ << " (missing ']')"; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + // Use index_type as char is generally signed + // and we want to ignore signedness. + auto start_ = static_cast(prev_); + auto end_ = static_cast(curr_); + + // Semanic check + if (end_ < start_) + { + std::ostringstream ss_; + + ss_ << "Max less than Min in charset range preceding index " << + state_.index() - 1; + state_.error(ss_); + throw runtime_error(ss_.str()); + } + + // Even though ranges are used now, we still need to consider + // each character if icase is set. + if (state_._flags & icase) + { + range range_(start_, end_); + string_token folded_; + + chars_.insert(range_); + fold(range_, state_._locale, folded_, + size()); + + if (!folded_.empty()) + { + chars_.insert(folded_); + } + } + else + { + chars_.insert(range(prev_, curr_)); + } + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp new file mode 100644 index 00000000..1d41ea5c --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp @@ -0,0 +1,136 @@ +// tokeniser_state.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_RE_TOKENISER_STATE_HPP +#define LEXERTL_RE_TOKENISER_STATE_HPP + +#include "../../char_traits.hpp" +#include "../../enums.hpp" +#include +#include "../../narrow.hpp" +#include + +namespace lexertl +{ +namespace detail +{ +template +struct basic_re_tokeniser_state +{ + using char_type = ch_type; + using index_type = typename basic_char_traits::index_type; + + const char_type * const _start; + const char_type * const _end; + const char_type *_curr; + id_type _id; + std::size_t _flags; + std::stack _flags_stack; + std::locale _locale; + const char_type *_macro_name; + long _paren_count; + bool _in_string; + id_type _nl_id; + + basic_re_tokeniser_state(const char_type *start_, + const char_type * const end_, id_type id_, const std::size_t flags_, + const std::locale locale_, const char_type *macro_name_) : + _start(start_), + _end(end_), + _curr(start_), + _id(id_), + _flags(flags_), + _flags_stack(), + _locale(locale_), + _macro_name(macro_name_), + _paren_count(0), + _in_string(false), + _nl_id(static_cast(~0)) + { + } + + basic_re_tokeniser_state(const basic_re_tokeniser_state &rhs_) + { + assign(rhs_); + } + + // prevent VC++ 7.1 warning: + const basic_re_tokeniser_state &operator = + (const basic_re_tokeniser_state &rhs_) + { + return assign(rhs_); + } + + basic_re_tokeniser_state &assign(const basic_re_tokeniser_state &rhs_) + { + _start = rhs_._start; + _end = rhs_._end; + _curr = rhs_._curr; + _id = rhs_._id; + _flags = rhs_._flags; + _flags_stack = rhs_._flags_stack; + _locale = rhs_._locale; + _macro_name = rhs_._macro_name; + _paren_count = rhs_._paren_count; + _in_string = rhs_._in_string; + _nl_id = rhs_._nl_id; + return *this; + } + + inline bool next(char_type &ch_) + { + if (_curr >= _end) + { + ch_ = 0; + return true; + } + else + { + ch_ = *_curr; + increment(); + return false; + } + } + + inline void increment() + { + ++_curr; + } + + inline std::size_t index() + { + return _curr - _start; + } + + inline bool eos() + { + return _curr >= _end; + } + + inline void unexpected_end(std::ostringstream &ss_) + { + ss_ << "Unexpected end of regex"; + } + + inline void error(std::ostringstream &ss_) + { + ss_ << " in "; + + if (_macro_name) + { + ss_ << "MACRO '"; + narrow(_macro_name, ss_); + ss_ << "'."; + } + else + { + ss_ << "rule id " << _id << '.'; + } + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/end_node.hpp b/YACReaderLibrary/lexertl/parser/tree/end_node.hpp new file mode 100644 index 00000000..c485fca5 --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tree/end_node.hpp @@ -0,0 +1,111 @@ +// end_node.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_END_NODE_HPP +#define LEXERTL_END_NODE_HPP + +#include "node.hpp" + +namespace lexertl +{ +namespace detail +{ +template +class basic_end_node : public basic_node +{ +public: + using node = basic_node; + using bool_stack = typename node::bool_stack; + using const_node_stack = typename node::const_node_stack; + using node_ptr_vector = typename node::node_ptr_vector; + using node_stack = typename node::node_stack; + using node_type = typename node::node_type; + using node_vector = typename node::node_vector; + + basic_end_node(const id_type id_, const id_type user_id_, + const id_type next_dfa_, const id_type push_dfa_, + const bool pop_dfa_) : + node(false), + _id(id_), + _user_id(user_id_), + _next_dfa(next_dfa_), + _push_dfa(push_dfa_), + _pop_dfa(pop_dfa_), + _followpos() + { + node::_firstpos.push_back(this); + node::_lastpos.push_back(this); + } + + virtual ~basic_end_node() override + { + } + + virtual node_type what_type() const override + { + return node::END; + } + + virtual bool traverse(const_node_stack &/*node_stack_*/, + bool_stack &/*perform_op_stack_*/) const override + { + return false; + } + + virtual const node_vector &followpos() const override + { + // _followpos is always empty..! + return _followpos; + } + + virtual bool end_state() const override + { + return true; + } + + virtual id_type id() const override + { + return _id; + } + + virtual id_type user_id() const override + { + return _user_id; + } + + virtual id_type next_dfa() const override + { + return _next_dfa; + } + + virtual id_type push_dfa() const override + { + return _push_dfa; + } + + virtual bool pop_dfa() const override + { + return _pop_dfa; + } + +private: + id_type _id; + id_type _user_id; + id_type _next_dfa; + id_type _push_dfa; + bool _pop_dfa; + node_vector _followpos; + + virtual void copy_node(node_ptr_vector &/*node_ptr_vector_*/, + node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/, + bool &/*down_*/) const override + { + // Nothing to do, as end_nodes are not copied. + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp b/YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp new file mode 100644 index 00000000..41baba7e --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp @@ -0,0 +1,96 @@ +// iteration_node.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_ITERATION_NODE_HPP +#define LEXERTL_ITERATION_NODE_HPP + +#include "node.hpp" + +namespace lexertl +{ +namespace detail +{ +template +class basic_iteration_node : public basic_node +{ +public: + using node = basic_node; + using bool_stack = typename node::bool_stack; + using const_node_stack = typename node::const_node_stack; + using node_ptr_vector = typename node::node_ptr_vector; + using node_stack = typename node::node_stack; + using node_type = typename node::node_type; + using node_vector = typename node::node_vector; + + basic_iteration_node(observer_ptr next_, const bool greedy_) : + node(true), + _next(next_), + _greedy(greedy_) + { + _next->append_firstpos(node::_firstpos); + _next->append_lastpos(node::_lastpos); + + for (observer_ptr node_ : node::_lastpos) + { + node_->append_followpos(node::_firstpos); + } + + for (observer_ptr node_ : node::_firstpos) + { + node_->greedy(greedy_); + } + } + + virtual ~basic_iteration_node() override + { + } + + virtual node_type what_type() const override + { + return node::ITERATION; + } + + virtual bool traverse(const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const override + { + perform_op_stack_.push(true); + node_stack_.push(_next); + return true; + } + +private: + observer_ptr _next; + bool _greedy; + + virtual void copy_node(node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const override + { + if (perform_op_stack_.top()) + { + observer_ptr ptr_ = new_node_stack_.top(); + + node_ptr_vector_.emplace_back + (std::make_unique(ptr_, _greedy)); + new_node_stack_.top() = node_ptr_vector_.back().get(); + } + else + { + down_ = true; + } + + perform_op_stack_.pop(); + } + + // No copy construction. + basic_iteration_node(const basic_iteration_node &) = delete; + // No assignment. + const basic_iteration_node &operator = + (const basic_iteration_node &) = delete; +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp b/YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp new file mode 100644 index 00000000..ef1b485e --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp @@ -0,0 +1,110 @@ +// leaf_node.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_LEAF_NODE_HPP +#define LEXERTL_LEAF_NODE_HPP + +#include "../../enums.hpp" // null_token +#include "node.hpp" + +namespace lexertl +{ +namespace detail +{ +template +class basic_leaf_node : public basic_node +{ +public: + using node = basic_node; + using bool_stack = typename node::bool_stack; + using const_node_stack = typename node::const_node_stack; + using node_ptr_vector = typename node::node_ptr_vector; + using node_stack = typename node::node_stack; + using node_type = typename node::node_type; + using node_vector = typename node::node_vector; + + basic_leaf_node(const id_type token_, const bool greedy_) : + node(token_ == node::null_token()), + _token(token_), + _set_greedy(!greedy_), + _greedy(greedy_), + _followpos() + { + if (!node::_nullable) + { + node::_firstpos.push_back(this); + node::_lastpos.push_back(this); + } + } + + virtual ~basic_leaf_node() override + { + } + + virtual void append_followpos(const node_vector &followpos_) override + { + _followpos.insert(_followpos.end(), + followpos_.begin(), followpos_.end()); + } + + virtual node_type what_type() const override + { + return node::LEAF; + } + + virtual bool traverse(const_node_stack &/*node_stack_*/, + bool_stack &/*perform_op_stack_*/) const override + { + return false; + } + + virtual id_type token() const override + { + return _token; + } + + virtual void greedy(const bool greedy_) override + { + if (!_set_greedy) + { + _greedy = greedy_; + _set_greedy = true; + } + } + + virtual bool greedy() const override + { + return _greedy; + } + + virtual const node_vector &followpos() const override + { + return _followpos; + } + + virtual node_vector &followpos() override + { + return _followpos; + } + +private: + id_type _token; + bool _set_greedy; + bool _greedy; + node_vector _followpos; + + virtual void copy_node(node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/, + bool &/*down_*/) const override + { + node_ptr_vector_.emplace_back(std::make_unique + (_token, _greedy)); + new_node_stack_.push(node_ptr_vector_.back().get()); + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/node.hpp b/YACReaderLibrary/lexertl/parser/tree/node.hpp new file mode 100644 index 00000000..cee7729a --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tree/node.hpp @@ -0,0 +1,242 @@ +// node.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_NODE_HPP +#define LEXERTL_NODE_HPP + +#include +#include +#include "../../observer_ptr.hpp" +#include "../../runtime_error.hpp" +#include +#include + +namespace lexertl +{ +namespace detail +{ +template +class basic_node +{ +public: + enum node_type {LEAF, SEQUENCE, SELECTION, ITERATION, END}; + + using bool_stack = std::stack; + using node_stack = std::stack>; + using const_node_stack = std::stack>; + using node_vector = std::vector>; + using node_ptr_vector = std::vector>; + + basic_node() : + _nullable(false), + _firstpos(), + _lastpos() + { + } + + basic_node(const bool nullable_) : + _nullable(nullable_), + _firstpos(), + _lastpos() + { + } + + virtual ~basic_node() + { + } + + static id_type null_token() + { + return static_cast(~0); + } + + bool nullable() const + { + return _nullable; + } + + void append_firstpos(node_vector &firstpos_) const + { + firstpos_.insert(firstpos_.end(), + _firstpos.begin(), _firstpos.end()); + } + + void append_lastpos(node_vector &lastpos_) const + { + lastpos_.insert(lastpos_.end(), + _lastpos.begin(), _lastpos.end()); + } + + virtual void append_followpos(const node_vector &/*followpos_*/) + { + throw runtime_error("Internal error node::append_followpos()."); + } + + observer_ptr copy(node_ptr_vector &node_ptr_vector_) const + { + observer_ptr new_root_ = nullptr; + const_node_stack node_stack_; + bool_stack perform_op_stack_; + bool down_ = true; + node_stack new_node_stack_; + + node_stack_.push(this); + + while (!node_stack_.empty()) + { + while (down_) + { + down_ = node_stack_.top()->traverse(node_stack_, + perform_op_stack_); + } + + while (!down_ && !node_stack_.empty()) + { + observer_ptr top_ = node_stack_.top(); + + top_->copy_node(node_ptr_vector_, new_node_stack_, + perform_op_stack_, down_); + + if (!down_) node_stack_.pop(); + } + } + + assert(new_node_stack_.size() == 1); + new_root_ = new_node_stack_.top(); + new_node_stack_.pop(); + return new_root_; + } + + virtual node_type what_type() const = 0; + + virtual bool traverse(const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const = 0; + + node_vector &firstpos() + { + return _firstpos; + } + + const node_vector &firstpos() const + { + return _firstpos; + } + + // _lastpos modified externally, so not const & + node_vector &lastpos() + { + return _lastpos; + } + + virtual bool end_state() const + { + return false; + } + + virtual id_type id() const + { + throw runtime_error("Internal error node::id()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return id_type(); +#endif + } + + virtual id_type user_id() const + { + throw runtime_error("Internal error node::user_id()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return id_type(); +#endif + } + + virtual id_type next_dfa() const + { + throw runtime_error("Internal error node::next_dfa()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return id_type(); +#endif + } + + virtual id_type push_dfa() const + { + throw runtime_error("Internal error node::push_dfa()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return id_type(); +#endif + } + + virtual bool pop_dfa() const + { + throw runtime_error("Internal error node::pop_dfa()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return false; +#endif + } + + virtual id_type token() const + { + throw runtime_error("Internal error node::token()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return id_type(); +#endif + } + + virtual void greedy(const bool /*greedy_*/) + { + throw runtime_error("Internal error node::greedy(bool)."); + } + + virtual bool greedy() const + { + throw runtime_error("Internal error node::greedy()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return false; +#endif + } + + virtual const node_vector &followpos() const + { + throw runtime_error("Internal error node::followpos()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return firstpos; +#endif + } + + virtual node_vector &followpos() + { + throw runtime_error("Internal error node::followpos()."); +#ifdef __SUNPRO_CC + // Stop bogus Solaris compiler warning + return firstpos; +#endif + } + +protected: + const bool _nullable; + node_vector _firstpos; + node_vector _lastpos; + + virtual void copy_node(node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const = 0; + +private: + // No copy construction. + basic_node(const basic_node &) = delete; + // No assignment. + const basic_node &operator =(const basic_node &) = delete; +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/selection_node.hpp b/YACReaderLibrary/lexertl/parser/tree/selection_node.hpp new file mode 100644 index 00000000..603bbc68 --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tree/selection_node.hpp @@ -0,0 +1,104 @@ +// selection_node.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_SELECTION_NODE_HPP +#define LEXERTL_SELECTION_NODE_HPP + +#include "node.hpp" + +namespace lexertl +{ +namespace detail +{ +template +class basic_selection_node : public basic_node +{ +public: + using node = basic_node; + using bool_stack = typename node::bool_stack; + using const_node_stack = typename node::const_node_stack; + using node_ptr_vector = typename node::node_ptr_vector; + using node_stack = typename node::node_stack; + using node_type = typename node::node_type; + + basic_selection_node(observer_ptr left_, observer_ptr right_) : + node(left_->nullable() || right_->nullable()), + _left(left_), + _right(right_) + { + _left->append_firstpos(node::_firstpos); + _right->append_firstpos(node::_firstpos); + _left->append_lastpos(node::_lastpos); + _right->append_lastpos(node::_lastpos); + } + + virtual ~basic_selection_node() override + { + } + + virtual node_type what_type() const override + { + return node::SELECTION; + } + + virtual bool traverse(const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const override + { + perform_op_stack_.push(true); + + switch (_right->what_type()) + { + case node::SEQUENCE: + case node::SELECTION: + case node::ITERATION: + perform_op_stack_.push(false); + break; + default: + break; + } + + node_stack_.push(_right); + node_stack_.push(_left); + return true; + } + +private: + observer_ptr _left; + observer_ptr _right; + + virtual void copy_node(node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const override + { + if (perform_op_stack_.top()) + { + observer_ptr rhs_ = new_node_stack_.top(); + + new_node_stack_.pop(); + + observer_ptr lhs_ = new_node_stack_.top(); + + node_ptr_vector_.emplace_back + (std::make_unique(lhs_, rhs_)); + new_node_stack_.top() = node_ptr_vector_.back().get(); + } + else + { + down_ = true; + } + + perform_op_stack_.pop(); + } + + // No copy construction. + basic_selection_node(const basic_selection_node &) = delete; + // No assignment. + const basic_selection_node &operator = + (const basic_selection_node &) = delete; +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp b/YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp new file mode 100644 index 00000000..22276735 --- /dev/null +++ b/YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp @@ -0,0 +1,121 @@ +// sequence_node.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_SEQUENCE_NODE_HPP +#define LEXERTL_SEQUENCE_NODE_HPP + +#include "node.hpp" + +namespace lexertl +{ +namespace detail +{ +template +class basic_sequence_node : public basic_node +{ +public: + using node = basic_node; + using bool_stack = typename node::bool_stack; + using const_node_stack = typename node::const_node_stack; + using node_ptr_vector = typename node::node_ptr_vector; + using node_stack = typename node::node_stack; + using node_type = typename node::node_type; + using node_vector = typename node::node_vector; + + basic_sequence_node(observer_ptr left_, observer_ptr right_) : + node(left_->nullable() && right_->nullable()), + _left(left_), + _right(right_) + { + _left->append_firstpos(node::_firstpos); + + if (_left->nullable()) + { + _right->append_firstpos(node::_firstpos); + } + + if (_right->nullable()) + { + _left->append_lastpos(node::_lastpos); + } + + _right->append_lastpos(node::_lastpos); + + node_vector &lastpos_ = _left->lastpos(); + const node_vector &firstpos_ = _right->firstpos(); + + for (observer_ptr node_ : lastpos_) + { + node_->append_followpos(firstpos_); + } + } + + virtual ~basic_sequence_node() override + { + } + + virtual node_type what_type() const override + { + return node::SEQUENCE; + } + + virtual bool traverse(const_node_stack &node_stack_, + bool_stack &perform_op_stack_) const override + { + perform_op_stack_.push(true); + + switch (_right->what_type()) + { + case node::SEQUENCE: + case node::SELECTION: + case node::ITERATION: + perform_op_stack_.push(false); + break; + default: + break; + } + + node_stack_.push(_right); + node_stack_.push(_left); + return true; + } + +private: + observer_ptr _left; + observer_ptr _right; + + virtual void copy_node(node_ptr_vector &node_ptr_vector_, + node_stack &new_node_stack_, bool_stack &perform_op_stack_, + bool &down_) const override + { + if (perform_op_stack_.top()) + { + observer_ptr rhs_ = new_node_stack_.top(); + + new_node_stack_.pop(); + + observer_ptr lhs_ = new_node_stack_.top(); + + node_ptr_vector_.emplace_back + (std::make_unique(lhs_, rhs_)); + new_node_stack_.top() = node_ptr_vector_.back().get(); + } + else + { + down_ = true; + } + + perform_op_stack_.pop(); + } + + // No copy construction. + basic_sequence_node(const basic_sequence_node &) = delete; + // No assignment. + const basic_sequence_node &operator =(const basic_sequence_node &) = delete; +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/partition/charset.hpp b/YACReaderLibrary/lexertl/partition/charset.hpp new file mode 100644 index 00000000..2bfbd335 --- /dev/null +++ b/YACReaderLibrary/lexertl/partition/charset.hpp @@ -0,0 +1,72 @@ +// charset.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_CHARSET_HPP +#define LEXERTL_CHARSET_HPP + +#include +#include +#include +#include "../string_token.hpp" + +namespace lexertl +{ +namespace detail +{ +template +struct basic_charset +{ + using token = basic_string_token; + using index_set = std::set; + + token _token; + index_set _index_set; + + basic_charset() : + _token(), + _index_set() + { + } + + basic_charset(const token &token_, const id_type index_) : + _token(token_), + _index_set() + { + _index_set.insert(index_); + } + + bool empty() const + { + return _token.empty() && _index_set.empty(); + } + + void intersect(basic_charset &rhs_, basic_charset &overlap_) + { + _token.intersect(rhs_._token, overlap_._token); + + if (!overlap_._token.empty()) + { + std::merge(_index_set.begin(), _index_set.end(), + rhs_._index_set.begin(), rhs_._index_set.end(), + std::inserter(overlap_._index_set, + overlap_._index_set.end())); + + if (_token.empty()) + { + _index_set.clear(); + } + + if (rhs_._token.empty()) + { + rhs_._index_set.clear(); + } + } + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/partition/equivset.hpp b/YACReaderLibrary/lexertl/partition/equivset.hpp new file mode 100644 index 00000000..af709b03 --- /dev/null +++ b/YACReaderLibrary/lexertl/partition/equivset.hpp @@ -0,0 +1,135 @@ +// equivset.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_EQUIVSET_HPP +#define LEXERTL_EQUIVSET_HPP + +#include +#include "../parser/tree/node.hpp" +#include + +namespace lexertl +{ +namespace detail +{ +template +struct basic_equivset +{ + using index_set = std::set; + using index_vector = std::vector; + using node = basic_node; + using node_vector = std::vector>; + + index_vector _index_vector; + id_type _id; + bool _greedy; + node_vector _followpos; + + basic_equivset() : + _index_vector(), + _id(0), + _greedy(true), + _followpos() + { + } + + basic_equivset(const index_set &index_set_, const id_type id_, + const bool greedy_, const node_vector &followpos_) : + _index_vector(index_set_.begin(), index_set_.end()), + _id(id_), + _greedy(greedy_), + _followpos(followpos_) + { + } + + bool empty() const + { + return _index_vector.empty() && _followpos.empty(); + } + + void intersect(basic_equivset &rhs_, basic_equivset &overlap_) + { + intersect_indexes(rhs_._index_vector, overlap_._index_vector); + + if (!overlap_._index_vector.empty()) + { + // Note that the LHS takes priority in order to + // respect rule ordering priority in the lex spec. + overlap_._id = _id; + overlap_._greedy = _greedy; + overlap_._followpos = _followpos; + + auto overlap_begin_ = overlap_._followpos.cbegin(); + auto overlap_end_ = overlap_._followpos.cend(); + + for (observer_ptr node_ : rhs_._followpos) + { + if (std::find(overlap_begin_, overlap_end_, node_) == + overlap_end_) + { + overlap_._followpos.push_back(node_); + overlap_begin_ = overlap_._followpos.begin(); + overlap_end_ = overlap_._followpos.end(); + } + } + + if (_index_vector.empty()) + { + _followpos.clear(); + } + + if (rhs_._index_vector.empty()) + { + rhs_._followpos.clear(); + } + } + } + +private: + void intersect_indexes(index_vector &rhs_, index_vector &overlap_) + { + std::set_intersection(_index_vector.begin(), _index_vector.end(), + rhs_.begin(), rhs_.end(), std::back_inserter(overlap_)); + + if (!overlap_.empty()) + { + remove(overlap_, _index_vector); + remove(overlap_, rhs_); + } + } + + void remove(const index_vector &source_, index_vector &dest_) + { + auto inter_ = source_.begin(); + auto inter_end_ = source_.end(); + auto reader_ = std::find(dest_.begin(), dest_.end(), *inter_); + auto writer_ = reader_; + auto dest_end_ = dest_.end(); + + while (writer_ != dest_end_ && inter_ != inter_end_) + { + if (*reader_ == *inter_) + { + ++inter_; + ++reader_; + } + else + { + *writer_++ = *reader_++; + } + } + + while (reader_ != dest_end_) + { + *writer_++ = *reader_++; + } + + dest_.resize(dest_.size() - source_.size()); + } +}; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/rules.hpp b/YACReaderLibrary/lexertl/rules.hpp new file mode 100644 index 00000000..c5b29e0b --- /dev/null +++ b/YACReaderLibrary/lexertl/rules.hpp @@ -0,0 +1,1018 @@ +// rules.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_RULES_HPP +#define LEXERTL_RULES_HPP + +#include "enums.hpp" +#include +#include +#include "narrow.hpp" +#include "observer_ptr.hpp" +#include "parser/tokeniser/re_tokeniser.hpp" +#include "runtime_error.hpp" +#include +#include +#include +#include + +namespace lexertl +{ +template +class basic_rules +{ +public: + using bool_vector = std::vector; + using bool_vector_vector = std::vector; + using char_type = ch_type; + using rules_char_type = r_ch_type; + using id_type = id_ty; + using id_vector = std::vector; + using id_vector_vector = std::vector; + using re_state = detail::basic_re_tokeniser_state; + using string = std::basic_string; + using string_token = basic_string_token; + using string_vector = std::vector; + using string_set = std::set; + using string_pair = std::pair; + using string_id_type_map = std::map; + using string_id_type_pair = std::pair; + using token = detail::basic_re_token; + using token_vector = std::vector; + using token_vector_vector = std::vector; + using token_vector_vector_vector = std::vector; + using macro_map = std::map; + using macro_pair = std::pair; + using tokeniser = + detail::basic_re_tokeniser; + + // If you get a compile error here you have + // failed to define an unsigned id type. + static_assert(std::is_unsigned::value, "Your id type is signed"); + +#ifdef _WIN32 + basic_rules(const std::size_t flags_ = dot_not_cr_lf) : +#else + basic_rules(const std::size_t flags_ = dot_not_newline) : +#endif + _statemap(), + _macro_map(), + _regexes(), + _features(), + _ids(), + _user_ids(), + _next_dfas(), + _pushes(), + _pops(), + _flags(flags_), + _locale(), + _lexer_state_names() + { + push_state(initial()); + } + + void clear() + { + _statemap.clear(); + _macro_map.clear(); + _regexes.clear(); + _features.clear(); + _ids.clear(); + _user_ids.clear(); + _next_dfas.clear(); + _pushes.clear(); + _pops.clear(); +#ifdef _WIN32 + _flags = dot_not_cr_lf; +#else + _flags = dot_not_newline; +#endif + _locale = std::locale(); + _lexer_state_names.clear(); + push_state(initial()); + } + + void clear(const id_type dfa_) + { + if (_regexes.size() > dfa_) + { + _regexes[dfa_].clear(); + _features[dfa_] = 0; + _ids[dfa_].clear(); + _user_ids[dfa_].clear(); + _next_dfas[dfa_].clear(); + _pushes[dfa_].clear(); + _pops[dfa_].clear(); + } + } + + void flags(const std::size_t flags_) + { + _flags = flags_; + } + + std::size_t flags() const + { + return _flags; + } + + static id_type skip() + { + return static_cast(~1); + } + + id_type eoi() const + { + return 0; + } + + static id_type npos() + { + return static_cast(~0); + } + + std::locale imbue(const std::locale &locale_) + { + std::locale loc_ = _locale; + + _locale = locale_; + return loc_; + } + + const std::locale &locale() const + { + return _locale; + } + + const rules_char_type *state(const id_type index_) const + { + if (index_ == 0) + { + return initial(); + } + else + { + const id_type i_ = index_ - 1; + + if (_lexer_state_names.size() > i_) + { + return _lexer_state_names[i_].c_str(); + } + else + { + return 0; + } + } + } + + id_type state(const rules_char_type *name_) const + { + typename string_id_type_map::const_iterator iter_ = + _statemap.find(name_); + + if (iter_ == _statemap.end()) + { + return npos(); + } + else + { + return iter_->second; + } + } + + id_type push_state(const rules_char_type *name_) + { + validate(name_); + + if (_statemap.insert(string_id_type_pair(name_, + static_cast(_statemap.size()))).second) + { + _regexes.push_back(token_vector_vector()); + _features.push_back(0); + _ids.push_back(id_vector()); + _user_ids.push_back(id_vector()); + _next_dfas.push_back(id_vector()); + _pushes.push_back(id_vector()); + _pops.push_back(bool_vector()); + + if (string(name_) != initial()) + { + _lexer_state_names.push_back(name_); + } + } + else + { + return _statemap.find(name_)->second; + } + + if (_next_dfas.size() > npos()) + { + // Overflow + throw runtime_error("The data type you have chosen cannot hold " + "this many lexer start states."); + } + + // Initial is not stored, so no need to - 1. + return static_cast(_lexer_state_names.size()); + } + + void insert_macro(const rules_char_type *name_, + const rules_char_type *regex_) + { + insert_macro(name_, string(regex_)); + } + + void insert_macro(const rules_char_type *name_, + const rules_char_type *regex_start_, + const rules_char_type *regex_end_) + { + insert_macro(name_, string(regex_start_, regex_end_)); + } + + void insert_macro(const rules_char_type *name_, const string ®ex_) + { + validate(name_); + + typename macro_map::const_iterator iter_ = _macro_map.find(name_); + + if (iter_ == _macro_map.end()) + { + auto pair_ = _macro_map.insert(macro_pair(name_, token_vector())); + + tokenise(regex_, pair_.first->second, npos(), name_); + } + else + { + std::ostringstream ss_; + + ss_ << "Attempt to redefine MACRO '"; + narrow(name_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + } + + // Add rule to INITIAL + void push(const rules_char_type *regex_, const id_type id_, + const id_type user_id_ = npos()) + { + push(string(regex_), id_, user_id_); + } + + void push(const rules_char_type *regex_start_, + const rules_char_type *regex_end_, + const id_type id_, const id_type user_id_ = npos()) + { + push(string(regex_start_, regex_end_), id_, user_id_); + } + + void push(const string ®ex_, const id_type id_, + const id_type user_id_ = npos()) + { + check_for_invalid_id(id_); + _regexes.front().push_back(token_vector()); + tokenise(regex_, _regexes.front().back(), id_, 0); + + if (regex_[0] == '^') + { + _features.front() |= bol_bit; + } + + if (regex_.size() > 0 && regex_[regex_.size() - 1] == '$') + { + _features.front() |= eol_bit; + } + + if (id_ == skip()) + { + _features.front() |= skip_bit; + } + else if (id_ == eoi()) + { + _features.front() |= again_bit; + } + + _ids.front().push_back(id_); + _user_ids.front().push_back(user_id_); + _next_dfas.front().push_back(0); + _pushes.front().push_back(npos()); + _pops.front().push_back(false); + } + + // Add rule with no id + void push(const rules_char_type *curr_dfa_, + const rules_char_type *regex_, const rules_char_type *new_dfa_) + { + push(curr_dfa_, string(regex_), new_dfa_); + } + + void push(const rules_char_type *curr_dfa_, + const rules_char_type *regex_start_, const rules_char_type *regex_end_, + const rules_char_type *new_dfa_) + { + push(curr_dfa_, string(regex_start_, regex_end_), new_dfa_); + } + + void push(const rules_char_type *curr_dfa_, const string ®ex_, + const rules_char_type *new_dfa_) + { + push(curr_dfa_, regex_, eoi(), new_dfa_, false); + } + + // Add rule with id + void push(const rules_char_type *curr_dfa_, + const rules_char_type *regex_, const id_type id_, + const rules_char_type *new_dfa_, const id_type user_id_ = npos()) + { + push(curr_dfa_, string(regex_), id_, new_dfa_, user_id_); + } + + void push(const rules_char_type *curr_dfa_, + const rules_char_type *regex_start_, + const rules_char_type *regex_end_, const id_type id_, + const rules_char_type *new_dfa_, const id_type user_id_ = npos()) + { + push(curr_dfa_, string(regex_start_, regex_end_), + id_, new_dfa_, user_id_); + } + + void push(const rules_char_type *curr_dfa_, const string ®ex_, + const id_type id_, const rules_char_type *new_dfa_, + const id_type user_id_ = npos()) + { + push(curr_dfa_, regex_, id_, new_dfa_, true, user_id_); + } + + void reverse() + { + for (auto &state_ : _regexes) + { + for (auto ®ex_ : state_) + { + reverse(regex_); + } + } + + for (auto &pair_ : _macro_map) + { + reverse(pair_.second); + } + } + + const string_id_type_map &statemap() const + { + return _statemap; + } + + const token_vector_vector_vector ®exes() const + { + return _regexes; + } + + const id_vector &features() const + { + return _features; + } + + const id_vector_vector &ids() const + { + return _ids; + } + + const id_vector_vector &user_ids() const + { + return _user_ids; + } + + const id_vector_vector &next_dfas() const + { + return _next_dfas; + } + + const id_vector_vector &pushes() const + { + return _pushes; + } + + const bool_vector_vector &pops() const + { + return _pops; + } + + bool empty() const + { + bool empty_ = true; + + for (const auto ®ex_ : _regexes) + { + if (!regex_.empty()) + { + empty_ = false; + break; + } + } + + return empty_; + } + + static const rules_char_type *initial() + { + static const rules_char_type initial_ [] = + { 'I', 'N', 'I', 'T', 'I', 'A', 'L', 0 }; + + return initial_; + } + + static const rules_char_type *dot() + { + static const rules_char_type dot_ [] = { '.', 0 }; + + return dot_; + } + + static const rules_char_type *all_states() + { + static const rules_char_type star_ [] = { '*', 0 }; + + return star_; + } + +private: + string_id_type_map _statemap; + macro_map _macro_map; + token_vector_vector_vector _regexes; + id_vector _features; + id_vector_vector _ids; + id_vector_vector _user_ids; + id_vector_vector _next_dfas; + id_vector_vector _pushes; + bool_vector_vector _pops; + std::size_t _flags; + std::locale _locale; + string_vector _lexer_state_names; + + void tokenise(const string ®ex_, token_vector &tokens_, + const id_type id_, const rules_char_type *name_) + { + re_state state_(regex_.c_str(), regex_.c_str() + regex_.size(), id_, + _flags, _locale, name_); + string macro_; + rules_char_type diff_ = 0; + + tokens_.push_back(token()); + + do + { + observer_ptr lhs_ = &tokens_.back(); + token rhs_; + + tokeniser::next(*lhs_, state_, rhs_); + + if (rhs_._type != detail::DIFF && + lhs_->precedence(rhs_._type) == ' ') + { + std::ostringstream ss_; + + ss_ << "A syntax error occurred: '" << + lhs_->precedence_string() << + "' against '" << rhs_.precedence_string() << + "' preceding index " << state_.index() << + " in "; + + if (name_ != 0) + { + ss_ << "macro "; + narrow(name_, ss_); + } + else + { + ss_ << "rule id " << state_._id; + } + + ss_ << '.'; + throw runtime_error(ss_.str()); + } + + if (rhs_._type == detail::MACRO) + { + typename macro_map::const_iterator iter_ = + _macro_map.find(rhs_._extra); + + macro_ = rhs_._extra; + + if (iter_ == _macro_map.end()) + { + const rules_char_type *rhs_name_ = rhs_._extra.c_str(); + std::ostringstream ss_; + + ss_ << "Unknown MACRO name '"; + narrow(rhs_name_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + else + { + const bool multiple_ = iter_->second.size() > 3; + + if (diff_) + { + if (multiple_) + { + std::ostringstream ss_; + + ss_ << "Single CHARSET must follow {-} or {+} at " + "index " << state_.index() - 1 << " in "; + + if (name_ != 0) + { + ss_ << "macro "; + narrow(name_, ss_); + } + else + { + ss_ << "rule id " << state_._id; + } + + ss_ << '.'; + throw runtime_error(ss_.str()); + } + else + { + rhs_ = iter_->second[1]; + } + } + + // Any macro with more than one charset (or quantifiers) + // requires bracketing. + if (multiple_) + { + token open_; + + open_._type = detail::OPENPAREN; + open_._str.insert('('); + tokens_.push_back(open_); + } + + // Don't need to store token if it is diff. + if (!diff_) + { + // Don't insert BEGIN or END tokens + tokens_.insert(tokens_.end(), iter_->second.begin() + 1, + iter_->second.end() - 1); + lhs_ = &tokens_.back(); + } + + if (multiple_) + { + token close_; + + close_._type = detail::CLOSEPAREN; + close_._str.insert(')'); + tokens_.push_back(close_); + } + } + } + else if (rhs_._type == detail::DIFF) + { + if (!macro_.empty()) + { + typename macro_map::const_iterator iter_ = + _macro_map.find(macro_); + + if (iter_->second.size() > 3) + { + std::ostringstream ss_; + + ss_ << "Single CHARSET must precede {-} or {+} at " + "index " << state_.index() - 1 << " in "; + + if (name_ != 0) + { + ss_ << "macro "; + narrow(name_, ss_); + } + else + { + ss_ << "rule id " << state_._id; + } + + ss_ << '.'; + throw runtime_error(ss_.str()); + } + } + + diff_ = rhs_._extra[0]; + macro_.clear(); + continue; + } + else if (!diff_) + { + tokens_.push_back(rhs_); + lhs_ = &tokens_.back(); + macro_.clear(); + } + + // diff_ may have been set by previous conditional. + if (diff_) + { + if (rhs_._type != detail::CHARSET) + { + std::ostringstream ss_; + + ss_ << "CHARSET must follow {-} or {+} at index " << + state_.index() - 1 << " in "; + + if (name_ != 0) + { + ss_ << "macro "; + narrow(name_, ss_); + } + else + { + ss_ << "rule id " << state_._id; + } + + ss_ << '.'; + throw runtime_error(ss_.str()); + } + + switch (diff_) + { + case '-': + lhs_->_str.remove(rhs_._str); + + if (lhs_->_str.empty()) + { + std::ostringstream ss_; + + ss_ << "Empty charset created by {-} at index " << + state_.index() - 1 << " in "; + + if (name_ != 0) + { + ss_ << "macro "; + narrow(name_, ss_); + } + else + { + ss_ << "rule id " << state_._id; + } + + ss_ << '.'; + throw runtime_error(ss_.str()); + } + + break; + case '+': + lhs_->_str.insert(rhs_._str); + break; + } + + diff_ = 0; + } + } while (tokens_.back()._type != detail::END); + + if (tokens_.size() == 2) + { + std::ostringstream ss_; + + ss_ << "Empty regex in "; + + if (name_ != 0) + { + ss_ << "macro "; + narrow(name_, ss_); + } + else + { + ss_ << "rule id " << state_._id; + } + + ss_ << " is not allowed."; + throw runtime_error(ss_.str()); + } + } + + void reverse(token_vector &vector_) + { + token_vector new_vector_(vector_.size(), token()); + auto iter_ = vector_.rbegin(); + auto end_ = vector_.rend(); + auto dest_ = new_vector_.begin(); + std::stack stack_; + + for (; iter_ != end_; ++iter_, ++dest_) + { + switch (iter_->_type) + { + case detail::BEGIN: + iter_->swap(*dest_); + dest_->_type = detail::END; + break; + case detail::BOL: + iter_->swap(*dest_); + dest_->_type = detail::EOL; + break; + case detail::EOL: + iter_->swap(*dest_); + dest_->_type = detail::BOL; + break; + case detail::OPENPAREN: + iter_->swap(*dest_); + dest_->_type = detail::CLOSEPAREN; + + if (stack_.top() != end_) + { + ++dest_; + dest_->swap(*stack_.top()); + } + + stack_.pop(); + break; + case detail::CLOSEPAREN: + iter_->swap(*dest_); + dest_->_type = detail::OPENPAREN; + stack_.push(end_); + break; + case detail::OPT: + case detail::AOPT: + case detail::ZEROORMORE: + case detail::AZEROORMORE: + case detail::ONEORMORE: + case detail::AONEORMORE: + case detail::REPEATN: + case detail::AREPEATN: + { + auto temp_ = iter_ + 1; + + if (temp_->_type == detail::CLOSEPAREN) + { + stack_.push(iter_); + ++iter_; + iter_->swap(*dest_); + dest_->_type = detail::OPENPAREN; + } + else + { + dest_->swap(*temp_); + ++dest_; + dest_->swap(*iter_); + ++iter_; + } + + break; + } + case detail::END: + iter_->swap(*dest_); + dest_->_type = detail::BEGIN; + break; + default: + // detail::OR + // detail::CHARSET + iter_->swap(*dest_); + break; + } + } + + new_vector_.swap(vector_); + } + + void push(const rules_char_type *curr_dfa_, const string ®ex_, + const id_type id_, const rules_char_type *new_dfa_, + const bool check_, const id_type user_id_ = npos()) + { + const bool star_ = *curr_dfa_ == '*' && *(curr_dfa_ + 1) == 0; + const bool dot_ = *new_dfa_ == '.' && *(new_dfa_ + 1) == 0; + const bool push_ = *new_dfa_ == '>'; + const rules_char_type *push_dfa_ = nullptr; + const bool pop_ = *new_dfa_ == '<'; + + if (push_ || pop_) + { + ++new_dfa_; + } + + if (check_) + { + check_for_invalid_id(id_); + } + + if (!dot_ && !pop_) + { + const rules_char_type *temp_ = new_dfa_; + + while (*temp_ && *temp_ != ':') + { + ++temp_; + } + + if (*temp_) push_dfa_ = temp_ + 1; + + validate(new_dfa_, *temp_ ? temp_ : 0); + + if (push_dfa_) + { + validate(push_dfa_); + } + } + + // npos means pop here + id_type new_dfa_id_ = npos(); + id_type push_dfa_id_ = npos(); + typename string_id_type_map::const_iterator iter_; + auto end_ = _statemap.cend(); + id_vector next_dfas_; + + if (!dot_ && !pop_) + { + if (push_dfa_) + { + iter_ = _statemap.find(string(new_dfa_, push_dfa_ - 1)); + } + else + { + iter_ = _statemap.find(new_dfa_); + } + + if (iter_ == end_) + { + std::ostringstream ss_; + + ss_ << "Unknown state name '"; + narrow(new_dfa_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + + new_dfa_id_ = iter_->second; + + if (push_dfa_) + { + iter_ = _statemap.find(push_dfa_); + + if (iter_ == end_) + { + std::ostringstream ss_; + + ss_ << "Unknown state name '"; + narrow(push_dfa_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + + push_dfa_id_ = iter_->second; + } + } + + if (star_) + { + const std::size_t size_ = _statemap.size(); + + for (id_type i_ = 0; i_ < size_; ++i_) + { + next_dfas_.push_back(i_); + } + } + else + { + const rules_char_type *start_ = curr_dfa_; + string next_dfa_; + + while (*curr_dfa_) + { + while (*curr_dfa_ && *curr_dfa_ != ',') + { + ++curr_dfa_; + } + + next_dfa_.assign(start_, curr_dfa_); + + if (*curr_dfa_) + { + ++curr_dfa_; + start_ = curr_dfa_; + } + + validate(next_dfa_.c_str()); + iter_ = _statemap.find(next_dfa_.c_str()); + + if (iter_ == end_) + { + std::ostringstream ss_; + + ss_ << "Unknown state name '"; + curr_dfa_ = next_dfa_.c_str(); + narrow(curr_dfa_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + + next_dfas_.push_back(iter_->second); + } + } + + for (std::size_t i_ = 0, size_ = next_dfas_.size(); + i_ < size_; ++i_) + { + const id_type curr_ = next_dfas_[i_]; + + _regexes[curr_].push_back(token_vector()); + tokenise(regex_, _regexes[curr_].back(), id_, 0); + + if (regex_[0] == '^') + { + _features[curr_] |= bol_bit; + } + + if (regex_[regex_.size() - 1] == '$') + { + _features[curr_] |= eol_bit; + } + + if (id_ == skip()) + { + _features[curr_] |= skip_bit; + } + else if (id_ == eoi()) + { + _features[curr_] |= again_bit; + } + + if (push_ || pop_) + { + _features[curr_] |= recursive_bit; + } + + _ids[curr_].push_back(id_); + _user_ids[curr_].push_back(user_id_); + _next_dfas[curr_].push_back(dot_ ? curr_ : new_dfa_id_); + _pushes[curr_].push_back(push_ ? (push_dfa_ ? + push_dfa_id_ : curr_) : npos()); + _pops[curr_].push_back(pop_); + } + } + + void validate(const rules_char_type *name_, + const rules_char_type *end_ = nullptr) const + { + const rules_char_type *start_ = name_; + + if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') && + !(*name_ >= 'a' && *name_ <= 'z')) + { + std::ostringstream ss_; + + ss_ << "Invalid name '"; + narrow(name_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + else if (*name_) + { + ++name_; + } + + while (*name_ && name_ != end_) + { + if (*name_ != '_' && *name_ != '-' && + !(*name_ >= 'A' && *name_ <= 'Z') && + !(*name_ >= 'a' && *name_ <= 'z') && + !(*name_ >= '0' && *name_ <= '9')) + { + std::ostringstream ss_; + + ss_ << "Invalid name '"; + name_ = start_; + narrow(name_, ss_); + ss_ << "'."; + throw runtime_error(ss_.str()); + } + + ++name_; + } + } + + void check_for_invalid_id(const id_type id_) const + { + if (id_ == eoi()) + { + throw runtime_error("Cannot resuse the id for eoi."); + } + + if (id_ == npos()) + { + throw runtime_error("The id npos is reserved for the " + "UNKNOWN token."); + } + } +}; + +using rules = basic_rules; +using wrules = basic_rules; +using u32rules = basic_rules; +} + +#endif diff --git a/YACReaderLibrary/lexertl/runtime_error.hpp b/YACReaderLibrary/lexertl/runtime_error.hpp new file mode 100644 index 00000000..7c240118 --- /dev/null +++ b/YACReaderLibrary/lexertl/runtime_error.hpp @@ -0,0 +1,23 @@ +// runtime_error.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_RUNTIME_ERROR_HPP +#define LEXERTL_RUNTIME_ERROR_HPP + +#include + +namespace lexertl +{ +class runtime_error : public std::runtime_error +{ +public: + runtime_error(const std::string &what_arg_) : + std::runtime_error(what_arg_) + { + } +}; +} + +#endif diff --git a/YACReaderLibrary/lexertl/serialise.hpp b/YACReaderLibrary/lexertl/serialise.hpp new file mode 100644 index 00000000..931f519b --- /dev/null +++ b/YACReaderLibrary/lexertl/serialise.hpp @@ -0,0 +1,28 @@ +// serialise.hpp +// Copyright (c) 2007-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_SERIALISE_HPP +#define LEXERTL_SERIALISE_HPP + +#include "state_machine.hpp" +#include + +namespace lexertl +{ +// IMPORTANT! This won't work if you don't enable RTTI! +template +void serialise(basic_state_machine &sm_, Archive &ar_) +{ + detail::basic_internals &internals_ = sm_.data(); + + ar_ & internals_._eoi; + ar_ & *internals_._lookup; + ar_ & internals_._dfa_alphabet; + ar_ & internals_._features; + ar_ & *internals_._dfa; +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/sm_to_csm.hpp b/YACReaderLibrary/lexertl/sm_to_csm.hpp new file mode 100644 index 00000000..5ffe69fe --- /dev/null +++ b/YACReaderLibrary/lexertl/sm_to_csm.hpp @@ -0,0 +1,53 @@ +// sm_to_csm.hpp +// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_SM_TO_CSM_HPP +#define LEXERTL_SM_TO_CSM_HPP + +#include "enums.hpp" +#include "observer_ptr.hpp" +#include + +namespace lexertl +{ +template +void sm_to_csm(const sm &sm_, char_state_machine &csm_) +{ + using id_type = typename sm::traits::id_type; + using internals = typename sm::internals; + using string_token = typename char_state_machine::state::string_token; + using index_type = typename string_token::index_type; + using string_token_vector = + typename char_state_machine::string_token_vector; + const internals &internals_ = sm_.data(); + const std::size_t dfas_ = internals_._dfa.size(); + + for (id_type i_ = 0; i_ < dfas_; ++i_) + { + if (internals_._dfa_alphabet[i_] == 0) continue; + + const std::size_t alphabet_ = internals_._dfa_alphabet[i_] - + transitions_index; + string_token_vector token_vector_(alphabet_, string_token()); + observer_ptr ptr_ = &internals_._lookup[i_].front(); + + for (std::size_t c_ = 0; c_ < 256; ++c_, ++ptr_) + { + if (*ptr_ >= transitions_index) + { + string_token &token_ = token_vector_ + [*ptr_ - transitions_index]; + + token_.insert(typename string_token::range + (index_type(c_), index_type(c_))); + } + } + + csm_.append(token_vector_, internals_, i_); + } +} +} + +#endif diff --git a/YACReaderLibrary/lexertl/sm_traits.hpp b/YACReaderLibrary/lexertl/sm_traits.hpp new file mode 100644 index 00000000..161b29c9 --- /dev/null +++ b/YACReaderLibrary/lexertl/sm_traits.hpp @@ -0,0 +1,44 @@ +// sm_traits.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_SM_TRAITS_HPP +#define LEXERTL_SM_TRAITS_HPP + +namespace lexertl +{ +template +struct basic_sm_traits +{ + enum {char_24_bit = sizeof(ch_type) > 2, compressed = comp, lookup = look, + is_dfa = dfa_nfa}; + using input_char_type = ch_type; + using char_type = ch_type; + using id_type = sm_type; + + static id_type npos() + { + return static_cast(~0); + } +}; + +template +struct basic_sm_traits +{ + enum {char_24_bit = sizeof(ch_type) > 2, compressed = true, lookup = look, + is_dfa = dfa_nfa}; + using input_char_type = ch_type; + using char_type = unsigned char; + using id_type = sm_type; + + static id_type npos() + { + return static_cast(~0); + } +}; +} + +#endif diff --git a/YACReaderLibrary/lexertl/state_machine.hpp b/YACReaderLibrary/lexertl/state_machine.hpp new file mode 100644 index 00000000..76e7bc31 --- /dev/null +++ b/YACReaderLibrary/lexertl/state_machine.hpp @@ -0,0 +1,521 @@ +// state_machine.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_STATE_MACHINE_HPP +#define LEXERTL_STATE_MACHINE_HPP + +// memcmp() +#include +#include "internals.hpp" +#include +#include "observer_ptr.hpp" +#include +#include "sm_traits.hpp" +#include "string_token.hpp" + +namespace lexertl +{ +template +class basic_state_machine +{ +public: + using id_type = id_ty; + using traits = + basic_sm_traits 1), true, true>; + using internals = detail::basic_internals; + + // If you get a compile error here you have + // failed to define an unsigned id type. + static_assert(std::is_unsigned::value, "Your id type is signed"); + + basic_state_machine() : + _internals() + { + } + + void clear() + { + _internals.clear(); + } + + internals &data() + { + return _internals; + } + + const internals &data() const + { + return _internals; + } + + bool empty() const + { + return _internals.empty(); + } + + id_type eoi() const + { + return _internals._eoi; + } + + void minimise() + { + const id_type dfas_ = static_cast(_internals._dfa.size()); + + for (id_type i_ = 0; i_ < dfas_; ++i_) + { + const id_type dfa_alphabet_ = _internals._dfa_alphabet[i_]; + id_type_vector &dfa_ = _internals._dfa[i_]; + + if (dfa_alphabet_ != 0) + { + std::size_t size_ = 0; + + do + { + size_ = dfa_.size(); + minimise_dfa(dfa_alphabet_, dfa_, size_); + } while (dfa_.size() != size_); + } + } + } + + static id_type npos() + { + return static_cast(~0); + } + + static id_type skip() + { + return static_cast(~1); + } + + void swap(basic_state_machine &rhs_) + { + _internals.swap(rhs_._internals); + } + +private: + using id_type_vector = typename internals::id_type_vector; + using index_set = std::set; + internals _internals; + + void minimise_dfa(const id_type dfa_alphabet_, + id_type_vector &dfa_, std::size_t size_) + { + observer_ptr first_ = &dfa_.front(); + observer_ptr end_ = first_ + size_; + id_type index_ = 1; + id_type new_index_ = 1; + id_type_vector lookup_(size_ / dfa_alphabet_, npos()); + observer_ptr lookup_ptr_ = &lookup_.front(); + index_set index_set_; + const id_type bol_index_ = dfa_.front(); + + *lookup_ptr_ = 0; + // Only one 'jam' state, so skip it. + first_ += dfa_alphabet_; + + for (; first_ < end_; first_ += dfa_alphabet_, ++index_) + { + observer_ptr second_ = first_ + dfa_alphabet_; + + for (id_type curr_index_ = index_ + 1; second_ < end_; + ++curr_index_, second_ += dfa_alphabet_) + { + if (index_set_.find(curr_index_) != index_set_.end()) + { + continue; + } + + // Some systems have memcmp in namespace std. + using namespace std; + + if (memcmp(first_, second_, sizeof(id_type) * + dfa_alphabet_) == 0) + { + index_set_.insert(curr_index_); + lookup_ptr_[curr_index_] = new_index_; + } + } + + if (lookup_ptr_[index_] == npos()) + { + lookup_ptr_[index_] = new_index_; + ++new_index_; + } + } + + if (!index_set_.empty()) + { + observer_ptr front_ = &dfa_.front(); + id_type_vector new_dfa_(front_, front_ + dfa_alphabet_); + auto set_end_ = index_set_.cend(); + observer_ptr ptr_ = front_ + dfa_alphabet_; + observer_ptr new_ptr_ = nullptr; + + new_dfa_.resize(size_ - index_set_.size() * dfa_alphabet_, 0); + new_ptr_ = &new_dfa_.front() + dfa_alphabet_; + size_ /= dfa_alphabet_; + + if (bol_index_) + { + new_dfa_.front() = lookup_ptr_[bol_index_]; + } + + for (index_ = 1; index_ < size_; ++index_) + { + if (index_set_.find(index_) != set_end_) + { + ptr_ += dfa_alphabet_; + continue; + } + + new_ptr_[end_state_index] = ptr_[end_state_index]; + new_ptr_[id_index] = ptr_[id_index]; + new_ptr_[user_id_index] = ptr_[user_id_index]; + new_ptr_[push_dfa_index] = ptr_[push_dfa_index]; + new_ptr_[next_dfa_index] = ptr_[next_dfa_index]; + new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]]; + new_ptr_ += transitions_index; + ptr_ += transitions_index; + + for (id_type i_ = transitions_index; i_ < dfa_alphabet_; ++i_) + { + *new_ptr_++ = lookup_ptr_[*ptr_++]; + } + } + + dfa_.swap(new_dfa_); + } + } +}; + +using state_machine = basic_state_machine; +using wstate_machine = basic_state_machine; +using u32state_machine = basic_state_machine; + +template +struct basic_char_state_machine +{ + using id_type = id_ty; + using traits = basic_sm_traits; + using internals = detail::basic_internals; + using id_type_vector = typename internals::id_type_vector; + + struct state + { + using string_token = basic_string_token; + using id_type_string_token_map = std::map; + using id_type_string_token_pair = std::pair; + enum push_pop_dfa {neither, push_dfa, pop_dfa}; + + bool _end_state; + push_pop_dfa _push_pop_dfa; + id_type _id; + id_type _user_id; + id_type _push_dfa; + id_type _next_dfa; + id_type _eol_index; + id_type_string_token_map _transitions; + + state() : + _end_state(false), + _push_pop_dfa(neither), + _id(0), + _user_id(traits::npos()), + _push_dfa(traits::npos()), + _next_dfa(0), + _eol_index(traits::npos()), + _transitions() + { + } + + bool operator ==(const state rhs_) const + { + return _end_state == rhs_._end_state && + _push_pop_dfa == rhs_._push_pop_dfa && + _id == rhs_._id && + _user_id == rhs_._user_id && + _push_dfa == rhs_._push_dfa && + _next_dfa == rhs_._next_dfa && + _eol_index == rhs_._eol_index && + _transitions == rhs_._transitions; + } + }; + + using string_token = typename state::string_token; + using state_vector = std::vector; + using string_token_vector = std::vector; + using id_type_string_token_pair = + typename state::id_type_string_token_pair; + + struct dfa + { + id_type _bol_index; + state_vector _states; + + dfa(const std::size_t size_) : + _bol_index(traits::npos()), + _states(state_vector(size_)) + { + } + + std::size_t size() const + { + return _states.size(); + } + + void swap(dfa &rhs_) + { + std::swap(_bol_index, rhs_._bol_index); + _states.swap(rhs_._states); + } + }; + + static_assert(std::is_move_assignable::value && + std::is_move_constructible::value, "dfa is not movable."); + using dfa_vector = std::vector; + + static_assert(std::is_unsigned::value, "Your id type is signed"); + dfa_vector _sm_vector; + + basic_char_state_machine() : + _sm_vector() + { + } + + void append(const string_token_vector &token_vector_, + const internals &internals_, const id_type dfa_index_) + { + const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_index_]; + const std::size_t alphabet_ = dfa_alphabet_ - transitions_index; + const id_type_vector &source_dfa_ = internals_._dfa[dfa_index_]; + observer_ptr ptr_ = &source_dfa_.front(); + const std::size_t size_ = (source_dfa_.size() - dfa_alphabet_) / + dfa_alphabet_; + typename state::id_type_string_token_map::iterator trans_iter_; + + _sm_vector.push_back(dfa(size_)); + + dfa &dest_dfa_ = _sm_vector.back(); + + if (*ptr_) + { + dest_dfa_._bol_index = *ptr_ - 1; + } + + ptr_ += dfa_alphabet_; + + for (id_type i_ = 0; i_ < size_; ++i_) + { + state &state_ = dest_dfa_._states[i_]; + + state_._end_state = ptr_[end_state_index] != 0; + + if (ptr_[push_dfa_index] != npos()) + { + state_._push_pop_dfa = state::push_dfa; + } + else if (ptr_[end_state_index] & pop_dfa_bit) + { + state_._push_pop_dfa = state::pop_dfa; + } + + state_._id = ptr_[id_index]; + state_._user_id = ptr_[user_id_index]; + state_._push_dfa = ptr_[push_dfa_index]; + state_._next_dfa = ptr_[next_dfa_index]; + + if (ptr_[eol_index]) + { + state_._eol_index = ptr_[eol_index] - 1; + } + + ptr_ += transitions_index; + + for (id_type col_index_ = 0; col_index_ < alphabet_; + ++col_index_, ++ptr_) + { + const id_type next_ = *ptr_; + + if (next_ > 0) + { + trans_iter_ = state_._transitions.find(next_ - 1); + + if (trans_iter_ == state_._transitions.end()) + { + trans_iter_ = state_._transitions.insert + (id_type_string_token_pair(static_cast + (next_ - 1), token_vector_[col_index_])).first; + } + else + { + trans_iter_->second.insert(token_vector_[col_index_]); + } + } + } + } + } + + void clear() + { + _sm_vector.clear(); + } + + bool empty() const + { + return _sm_vector.empty(); + } + + void minimise() + { + const id_type dfas_ = static_cast(_sm_vector.size()); + + for (id_type i_ = 0; i_ < dfas_; ++i_) + { + observer_ptr dfa_ = &_sm_vector[i_]; + + if (dfa_->size() > 0) + { + std::size_t size_ = 0; + + do + { + size_ = dfa_->size(); + minimise_dfa(*dfa_, size_); + } while (dfa_->size() != size_); + } + } + } + + static id_type npos() + { + return traits::npos(); + } + + id_type size() const + { + return static_cast(_sm_vector.size()); + } + + static id_type skip() + { + return ~static_cast(1); + } + + void swap(basic_char_state_machine &csm_) + { + _sm_vector.swap(csm_._sm_vector); + } + +private: + using index_set = std::set; + + void minimise_dfa(dfa &dfa_, std::size_t size_) + { + observer_ptr first_ = &dfa_._states.front(); + observer_ptr end_ = first_ + size_; + id_type index_ = 0; + id_type new_index_ = 0; + id_type_vector lookup_(size_, npos()); + observer_ptr lookup_ptr_ = &lookup_.front(); + index_set index_set_; + + for (; first_ != end_; ++first_, ++index_) + { + observer_ptr second_ = first_ + 1; + + for (id_type curr_index_ = index_ + 1; second_ != end_; + ++curr_index_, ++second_) + { + if (index_set_.find(curr_index_) != index_set_.end()) + { + continue; + } + + if (*first_ == *second_) + { + index_set_.insert(curr_index_); + lookup_ptr_[curr_index_] = new_index_; + } + } + + if (lookup_ptr_[index_] == npos()) + { + lookup_ptr_[index_] = new_index_; + ++new_index_; + } + } + + if (!index_set_.empty()) + { + observer_ptr front_ = &dfa_._states.front(); + dfa new_dfa_(new_index_); + auto set_end_ = index_set_.cend(); + observer_ptr ptr_ = front_; + observer_ptr new_ptr_ = &new_dfa_._states.front(); + + if (dfa_._bol_index != npos()) + { + new_dfa_._bol_index = lookup_ptr_[dfa_._bol_index]; + } + + for (index_ = 0; index_ < size_; ++index_) + { + if (index_set_.find(index_) != set_end_) + { + ++ptr_; + continue; + } + + new_ptr_->_end_state = ptr_->_end_state; + new_ptr_->_id = ptr_->_end_state; + new_ptr_->_user_id = ptr_->_user_id; + new_ptr_->_next_dfa = ptr_->_next_dfa; + + if (ptr_->_eol_index != npos()) + { + new_ptr_->_eol_index = lookup_ptr_[ptr_->_eol_index]; + } + + auto iter_ = ptr_->_transitions.cbegin(); + auto end_ = ptr_->_transitions.cend(); + typename state::id_type_string_token_map::iterator find_; + + for (; iter_ != end_; ++iter_) + { + find_ = new_ptr_->_transitions.find + (lookup_ptr_[iter_->first]); + + if (find_ == new_ptr_->_transitions.end()) + { + new_ptr_->_transitions.insert + (id_type_string_token_pair + (lookup_ptr_[iter_->first], iter_->second)); + } + else + { + find_->second.insert(iter_->second); + } + } + + ++ptr_; + ++new_ptr_; + } + + dfa_.swap(new_dfa_); + } + } +}; + +using char_state_machine = basic_char_state_machine; +using wchar_state_machine = basic_char_state_machine; +using u32char_state_machine = basic_char_state_machine; +} + +#endif diff --git a/YACReaderLibrary/lexertl/stream_shared_iterator.hpp b/YACReaderLibrary/lexertl/stream_shared_iterator.hpp new file mode 100644 index 00000000..7946390f --- /dev/null +++ b/YACReaderLibrary/lexertl/stream_shared_iterator.hpp @@ -0,0 +1,352 @@ +// stream_shared_iterator.hpp +// Copyright (c) 2010-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#ifndef LEXERTL_STREAM_SHARED_ITERATOR_HPP +#define LEXERTL_STREAM_SHARED_ITERATOR_HPP + +#include +// memcpy +#include +#include +#include +#include "runtime_error.hpp" +#include + +namespace lexertl +{ +template +class basic_stream_shared_iterator +{ +public: + using istream = std::basic_istream; + using iterator_category = std::forward_iterator_tag; + using difference_type = std::size_t; + using value_type = char_type; + using pointer = char_type *; + using reference = char_type &; + + basic_stream_shared_iterator() : + _master(false), + _live(false), + _index(shared::npos()), + _shared(nullptr) + { + } + + basic_stream_shared_iterator(istream &stream_, + const std::size_t buff_size_ = 1024, + const std::size_t increment_ = 1024) : + _master(true), + _live(false), + _index(shared::npos()), + // For exception safety don't call new yet + _shared(nullptr) + { + // Safe to call potentially throwing new now. + _shared = new shared(stream_, buff_size_, increment_); + ++_shared->_ref_count; + _shared->_clients.push_back(this); + } + + basic_stream_shared_iterator(const basic_stream_shared_iterator &rhs_) : + _master(false), + _live(false), + _index(rhs_._master ? rhs_._shared->lowest() : rhs_._index), + _shared(rhs_._shared) + { + if (_shared) + { + // New copy of an iterator. + // The assumption is that any copy must be live + // even if the rhs is not (otherwise we will never + // have a record of the start of the current range!) + ++_shared->_ref_count; + _shared->_clients.push_back(this); + _live = true; + } + } + + ~basic_stream_shared_iterator() + { + if (_shared) + { + --_shared->_ref_count; + _shared->erase(this); + + if (_shared->_ref_count == 0) + { + delete _shared; + _shared = nullptr; + } + } + } + + basic_stream_shared_iterator &operator = + (const basic_stream_shared_iterator &rhs_) + { + if (this != &rhs_) + { + _master = false; + _index = rhs_._master ? rhs_._shared->lowest() : rhs_._index; + + if (!_live && !rhs_._live) + { + if (rhs_._shared) + { + ++rhs_._shared->_ref_count; + } + } + else if (!_live && rhs_._live) + { + rhs_._shared->_clients.push_back(this); + + if (!_shared) + { + ++rhs_._shared->_ref_count; + } + } + else if (_live && !rhs_._live) + { + _shared->erase(this); + + if (!rhs_._shared) + { + --_shared->_ref_count; + } + } + + _live = rhs_._live; + _shared = rhs_._shared; + } + + return *this; + } + + bool operator ==(const basic_stream_shared_iterator &rhs_) const + { + return _index == rhs_._index && + (_shared == rhs_._shared || + (_index == shared::npos() || rhs_._index == shared::npos()) && + (!_shared || !rhs_._shared)); + } + + bool operator !=(const basic_stream_shared_iterator &rhs_) const + { + return !(*this == rhs_); + } + + const char_type &operator *() + { + check_master(); + return _shared->_buffer[_index]; + } + + basic_stream_shared_iterator &operator ++() + { + check_master(); + ++_index; + update_state(); + return *this; + } + + basic_stream_shared_iterator operator ++(int) + { + basic_stream_shared_iterator iter_ = *this; + + check_master(); + ++_index; + update_state(); + return iter_; + } + +private: + class shared + { + public: + std::size_t _ref_count; + using char_vector = std::vector; + using iter_list = std::vector; + istream &_stream; + std::size_t _increment; + std::size_t _len; + char_vector _buffer; + iter_list _clients; + + shared(istream &stream_, const std::size_t buff_size_, + const std::size_t increment_) : + _ref_count(0), + _increment(increment_), + _stream(stream_) + { + _buffer.resize(buff_size_); + _stream.read(&_buffer.front(), _buffer.size()); + _len = static_cast(_stream.gcount()); + } + + bool reload_buffer() + { + const std::size_t lowest_ = lowest(); + std::size_t read_ = 0; + + if (lowest_ == 0) + { + // Resize buffer + const std::size_t old_size_ = _buffer.size(); + const std::size_t new_size_ = old_size_ + _increment; + + _buffer.resize(new_size_); + _stream.read(&_buffer.front() + old_size_, _increment); + read_ = static_cast(_stream.gcount()); + + if (read_) + { + read_ += old_size_; + _len = read_; + } + } + else + { + // Some systems have memcpy in namespace std + using namespace std; + const size_t start_ = _buffer.size() - lowest_; + const size_t len_ = _buffer.size() - start_; + + memcpy(&_buffer.front(), &_buffer[lowest_], start_ * + sizeof(char_type)); + _stream.read(&_buffer.front() + start_, len_); + read_ = static_cast(_stream.gcount()); + subtract(lowest_); + + if (read_) + { + read_ += start_; + _len = read_; + } + else + { + _len = highest(); + } + } + + return read_ != 0; + } + + void erase(basic_stream_shared_iterator *ptr_) + { + auto iter_ = std::find(_clients.begin(), _clients.end(), ptr_); + + if (iter_ != _clients.end()) + _clients.erase(iter_); + } + + std::size_t lowest() const + { + std::size_t lowest_ = npos(); + auto iter_ = _clients.cbegin(); + auto end_ = _clients.cend(); + + for (; iter_ != end_; ++iter_) + { + const basic_stream_shared_iterator *ptr_ = *iter_; + + if (ptr_->_index < lowest_) + { + lowest_ = ptr_->_index; + } + } + + if (lowest_ == npos()) + { + lowest_ = 0; + } + + return lowest_; + } + + std::size_t highest() const + { + std::size_t highest_ = 0; + auto iter_ = _clients.cbegin(); + auto end_ = _clients.cend(); + + for (; iter_ != end_; ++iter_) + { + const basic_stream_shared_iterator *ptr_ = *iter_; + + if (ptr_->_index != npos() && ptr_->_index > highest_) + { + highest_ = ptr_->_index; + } + } + + return highest_; + } + + void subtract(const std::size_t lowest_) + { + auto iter_ = _clients.begin(); + auto end_ = _clients.end(); + + for (; iter_ != end_; ++iter_) + { + basic_stream_shared_iterator *ptr_ = *iter_; + + if (ptr_->_index != npos()) + { + ptr_->_index -= lowest_; + } + } + } + + static std::size_t npos() + { + return ~static_cast(0); + } + + private: + shared &operator =(const shared &rhs_); + }; + + bool _master; + bool _live; + std::size_t _index; + shared *_shared; + + void check_master() + { + if (!_shared) + { + throw runtime_error("Cannot manipulate null (end) " + "stream_shared_iterators."); + } + + if (_master) + { + _master = false; + _live = true; + _index = _shared->lowest(); + } + } + + void update_state() + { + if (_index >= _shared->_len) + { + if (!_shared->reload_buffer()) + { + _shared->erase(this); + _index = shared::npos(); + _live = false; + } + } + } +}; + +using stream_shared_iterator = basic_stream_shared_iterator; +using wstream_shared_iterator = basic_stream_shared_iterator; +} + +#endif diff --git a/YACReaderLibrary/lexertl/string_token.hpp b/YACReaderLibrary/lexertl/string_token.hpp new file mode 100644 index 00000000..e108bd12 --- /dev/null +++ b/YACReaderLibrary/lexertl/string_token.hpp @@ -0,0 +1,439 @@ +// string_token.hpp +// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_STRING_TOKEN_HPP +#define LEXERTL_STRING_TOKEN_HPP + +#include "char_traits.hpp" +#include // Needed by GCC 4.4 +#include +#include +#include +#include + +namespace lexertl +{ +template +struct basic_string_token +{ + using char_type = ch_type; + using char_traits = basic_char_traits; + using index_type = typename char_traits::index_type; + using range = std::pair; + using range_vector = std::vector; + using string = std::basic_string; + using string_token = basic_string_token; + + range_vector _ranges; + + basic_string_token() : + _ranges() + { + } + + basic_string_token(char_type ch_) : + _ranges() + { + insert(range(ch_, ch_)); + } + + basic_string_token(char_type first_, char_type second_) : + _ranges() + { + insert(range(first_, second_)); + } + + void clear() + { + _ranges.clear(); + } + + bool empty() const + { + return _ranges.empty(); + } + + bool any() const + { + return _ranges.size() == 1 && _ranges.front().first == 0 && + _ranges.front().second == char_traits::max_val(); + } + + bool operator <(const basic_string_token &rhs_) const + { + return _ranges < rhs_._ranges; + } + + bool operator ==(const basic_string_token &rhs_) const + { + return _ranges == rhs_._ranges; + } + + bool negatable() const + { + std::size_t size_ = 0; + auto iter_ = _ranges.cbegin(); + auto end_ = _ranges.cend(); + + for (; iter_ != end_; ++iter_) + { + size_ += static_cast(iter_->second) + 1 - + static_cast(iter_->first); + } + + return size_ > static_cast(char_traits::max_val()) / 2; + } + + void swap(basic_string_token &rhs_) + { + _ranges.swap(rhs_._ranges); + } + + void insert(const basic_string_token &rhs_) + { + auto iter_ = rhs_._ranges.cbegin(); + auto end_ = rhs_._ranges.cend(); + + for (; iter_ != end_; ++iter_) + { + insert(*iter_); + } + } + + // Deliberately pass by value - may modify + typename range_vector::iterator insert(range rhs_) + { + bool insert_ = true; + auto iter_ = _ranges.begin(); + auto end_ = _ranges.end(); + auto erase_iter_ = end_; + + while (iter_ != end_) + { + // follows current item + if (rhs_.first > iter_->second) + { + if (rhs_.first == iter_->second + 1) + { + // Auto normalise + rhs_.first = iter_->first; + } + else + { + // No intersection, consider next + ++iter_; + continue; + } + } + // Precedes current item + else if (rhs_.second < iter_->first) + { + if (rhs_.second == iter_->first - 1) + { + // Auto normalise + rhs_.second = iter_->second; + } + else + { + // insert here + break; + } + } + else + { + // overlap (under) + if (rhs_.first < iter_->first) + { + if (rhs_.second < iter_->second) + { + rhs_.second = iter_->second; + } + } + // overlap (over) + else if (rhs_.second > iter_->second) + { + if (rhs_.first > iter_->first) + { + rhs_.first = iter_->first; + } + } + // subset + else + { + insert_ = false; + iter_ = _ranges.end(); + break; + } + } + + // Code minimisation: this always applies unless we have already + // exited the loop, or "continue" executed. + if (erase_iter_ == end_) + { + erase_iter_ = iter_; + } + + ++iter_; + } + + if (erase_iter_ != end_) + { + if (insert_) + { + // Re-use obsolete location + *erase_iter_ = rhs_; + ++erase_iter_; + } + + iter_ = _ranges.erase(erase_iter_, iter_); + } + else if (insert_) + { + iter_ = _ranges.insert(iter_, rhs_); + } + + return iter_; + } + + void negate() + { + index_type next_ = 0; + const index_type max_ = char_traits::max_val(); + string_token temp_; + auto iter_ = _ranges.cbegin(); + auto end_ = _ranges.cend(); + bool finished_ = false; + + for (; iter_ != end_; ++iter_) + { + if (next_ < iter_->first) + { + temp_.insert(range(next_, + static_cast(iter_->first - 1))); + } + + if (iter_->second < max_) + { + next_ = iter_->second + 1; + } + else + { + finished_ = true; + break; + } + } + + if (!finished_) + { + temp_.insert(range(next_, max_)); + } + + swap(temp_); + } + + void intersect(basic_string_token &rhs_, basic_string_token &overlap_) + { + auto lhs_iter_ = _ranges.begin(); + auto lhs_end_ = _ranges.end(); + auto rhs_iter_ = rhs_._ranges.begin(); + auto rhs_end_ = rhs_._ranges.end(); + + while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_) + { + if (rhs_iter_->first > lhs_iter_->second) + { + ++lhs_iter_; + } + else if (rhs_iter_->second < lhs_iter_->first) + { + ++rhs_iter_; + } + else + { + range range_; + + if (rhs_iter_->first > lhs_iter_->first) + { + range_.first = rhs_iter_->first; + } + else + { + range_.first = lhs_iter_->first; + } + + if (rhs_iter_->second < lhs_iter_->second) + { + range_.second = rhs_iter_->second; + } + else + { + range_.second = lhs_iter_->second; + } + + adjust(range_, *this, lhs_iter_, lhs_end_); + adjust(range_, rhs_, rhs_iter_, rhs_end_); + overlap_.insert(range_); + } + } + } + + void remove(basic_string_token &rhs_) + { + auto lhs_iter_ = _ranges.begin(); + auto lhs_end_ = _ranges.end(); + auto rhs_iter_ = rhs_._ranges.begin(); + auto rhs_end_ = rhs_._ranges.end(); + + while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_) + { + if (rhs_iter_->first > lhs_iter_->second) + { + ++lhs_iter_; + } + else if (rhs_iter_->second < lhs_iter_->first) + { + ++rhs_iter_; + } + else + { + range range_; + + if (rhs_iter_->first > lhs_iter_->first) + { + range_.first = rhs_iter_->first; + } + else + { + range_.first = lhs_iter_->first; + } + + if (rhs_iter_->second < lhs_iter_->second) + { + range_.second = rhs_iter_->second; + } + else + { + range_.second = lhs_iter_->second; + } + + adjust(range_, *this, lhs_iter_, lhs_end_); + } + } + } + + static string escape_char(const typename char_traits::index_type ch_) + { + string out_; + + switch (ch_) + { + case '\0': + out_ += '\\'; + out_ += '0'; + break; + case '\a': + out_ += '\\'; + out_ += 'a'; + break; + case '\b': + out_ += '\\'; + out_ += 'b'; + break; + case 27: + out_ += '\\'; + out_ += 'x'; + out_ += '1'; + out_ += 'b'; + break; + case '\f': + out_ += '\\'; + out_ += 'f'; + break; + case '\n': + out_ += '\\'; + out_ += 'n'; + break; + case '\r': + out_ += '\\'; + out_ += 'r'; + break; + case '\t': + out_ += '\\'; + out_ += 't'; + break; + case '\v': + out_ += '\\'; + out_ += 'v'; + break; + case '\\': + out_ += '\\'; + out_ += '\\'; + break; + case '"': + out_ += '\\'; + out_ += '"'; + break; + case '\'': + out_ += '\\'; + out_ += '\''; + break; + default: + { + if (ch_ < 32 || ch_ > 126) + { + std::basic_stringstream ss_; + + out_ += '\\'; + out_ += 'x'; + ss_ << std::hex << + static_cast(ch_); + out_ += ss_.str(); + } + else + { + out_ += ch_; + } + + break; + } + } + + return out_; + } + +private: + void adjust(const range &range_, basic_string_token &token_, + typename range_vector::iterator &iter_, + typename range_vector::iterator &end_) + { + if (range_.first > iter_->first) + { + const index_type second_ = iter_->second; + + iter_->second = range_.first - 1; + + if (range_.second < second_) + { + range new_range_(static_cast(range_.second + 1), + second_); + + iter_ = token_.insert(new_range_); + end_ = token_._ranges.end(); + } + } + else if (range_.second < iter_->second) + { + iter_->first = range_.second + 1; + } + else + { + iter_ = token_._ranges.erase(iter_); + end_ = token_._ranges.end(); + } + } +}; +} + +#endif diff --git a/YACReaderLibrary/lexertl/utf_iterators.hpp b/YACReaderLibrary/lexertl/utf_iterators.hpp new file mode 100644 index 00000000..0bd64e7c --- /dev/null +++ b/YACReaderLibrary/lexertl/utf_iterators.hpp @@ -0,0 +1,508 @@ +// utf_iterators.hpp +// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) +// Inspired by http://utfcpp.sourceforge.net/ +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#ifndef LEXERTL_UTF_ITERATORS_HPP +#define LEXERTL_UTF_ITERATORS_HPP + +#include + +namespace lexertl +{ +template +class basic_utf8_in_iterator : + public std::iterator +{ +public: + using value_type = char_type; + using difference_type = + typename std::iterator_traits::difference_type; + using iterator_category = std::forward_iterator_tag; + + basic_utf8_in_iterator() : + _it(char_iterator()), + _end(char_iterator()), + _char(0) + { + } + + explicit basic_utf8_in_iterator(const char_iterator &it_, + const char_iterator &end_) : + _it(it_), + _end(it_), + _char(0) + { + if (it_ != end_) + { + next(); + } + } + + char_type operator *() const + { + return _char; + } + + bool operator ==(const basic_utf8_in_iterator &rhs_) const + { + return _it == rhs_._it; + } + + bool operator !=(const basic_utf8_in_iterator &rhs_) const + { + return _it != rhs_._it; + } + + basic_utf8_in_iterator &operator ++() + { + _it = _end; + next(); + return *this; + } + + basic_utf8_in_iterator operator ++(int) + { + basic_utf8_in_iterator temp_ = *this; + + _it = _end; + next(); + return temp_; + } + + basic_utf8_in_iterator operator +(const std::size_t count_) const + { + basic_utf8_in_iterator temp_ = *this; + + for (std::size_t i_ = 0; i_ < count_; ++i_) + { + ++temp_; + } + + return temp_; + } + + basic_utf8_in_iterator operator -(const std::size_t count_) const + { + basic_utf8_in_iterator temp_ = *this; + + for (std::size_t i_ = 0; i_ < count_; ++i_) + { + temp_._end = temp_._it; + --temp_._it; + + while ((*temp_._it & 0xc0) == 0x80) --temp_._it; + } + + temp_.next(); + return temp_; + } + +private: + char_iterator _it; + char_iterator _end; + char_type _char; + + void next() + { + const char len_ = len(_it); + char_type ch_ = *_it & 0xff; + + switch (len_) + { + case 1: + _end = _it; + ++_end; + break; + case 2: + _end = _it; + ++_end; + + if ((*_end & 0xc0) != 0x80) break; + + ch_ = (ch_ << 6 & 0x7ff) | (*_end & 0x3f); + ++_end; + break; + case 3: + _end = _it; + ++_end; + + if ((*_end & 0xc0) != 0x80) break; + + ch_ = (ch_ << 12 & 0xffff) | ((*_end & 0xff) << 6 & 0xfff); + ++_end; + + if ((*_end & 0xc0) != 0x80) break; + + ch_ |= *_end & 0x3f; + ++_end; + break; + case 4: + _end = _it; + ++_end; + + if ((*_end & 0xc0) != 0x80) break; + + ch_ = (ch_ << 18 & 0x1fffff) | ((*_end & 0xff) << 12 & 0x3ffff); + ++_end; + + if ((*_end & 0xc0) != 0x80) break; + + ch_ |= (*_end & 0xff) << 6 & 0xfff; + ++_end; + + if ((*_end & 0xc0) != 0x80) break; + + ch_ |= *_end & 0x3f; + ++_end; + break; + } + + _char = ch_; + } + + char len(const char_iterator &it_) const + { + const unsigned char ch_ = *it_; + + return ch_ < 0x80 ? 1 : + ch_ >> 5 == 0x06 ? 2 : + ch_ >> 4 == 0x0e ? 3 : + ch_ >> 3 == 0x1e ? 4 : + 1; + } +}; + +template +class basic_utf8_out_iterator : + public std::iterator +{ +public: + using value_type = char; + using difference_type = + typename std::iterator_traits::difference_type; + using iterator_category = std::forward_iterator_tag; + + basic_utf8_out_iterator() : + _count(0), + _index(0) + { + } + + explicit basic_utf8_out_iterator(const char_iterator &it_, + const char_iterator &end_) : + _it(it_), + _count(0), + _index(0) + { + if (it_ != end_) + { + next(); + } + } + + char operator *() const + { + return _bytes[_index]; + } + + bool operator ==(const basic_utf8_out_iterator &rhs_) const + { + return _it == rhs_._it; + } + + bool operator !=(const basic_utf8_out_iterator &rhs_) const + { + return _it != rhs_._it; + } + + basic_utf8_out_iterator &operator ++() + { + ++_index; + + if (_index >= _count) + { + ++_it; + next(); + } + + return *this; + } + + basic_utf8_out_iterator operator ++(int) + { + basic_utf8_out_iterator temp_ = *this; + + ++_index; + + if (_index >= _count) + { + ++_it; + next(); + } + + return temp_; + } + +private: + char_iterator _it; + char _bytes[4]; + unsigned char _count; + unsigned char _index; + + void next() + { + const std::size_t ch_ = *_it; + + _count = len(ch_); + _index = 0; + + switch (_count) + { + case 1: + _bytes[0] = static_cast(ch_); + break; + case 2: + _bytes[0] = static_cast((ch_ >> 6) | 0xc0); + _bytes[1] = (ch_ & 0x3f) | 0x80; + break; + case 3: + _bytes[0] = static_cast((ch_ >> 12) | 0xe0); + _bytes[1] = ((ch_ >> 6) & 0x3f) | 0x80; + _bytes[2] = (ch_ & 0x3f) | 0x80; + break; + case 4: + _bytes[0] = static_cast((ch_ >> 18) | 0xf0); + _bytes[1] = ((ch_ >> 12) & 0x3f) | 0x80; + _bytes[2] = ((ch_ >> 6) & 0x3f) | 0x80; + _bytes[3] = (ch_ & 0x3f) | 0x80; + break; + } + } + + char len(const std::size_t ch_) const + { + return ch_ < 0x80 ? 1 : + ch_ < 0x800 ? 2 : + ch_ < 0x10000 ? 3 : + 4; + } +}; + +template +class basic_utf16_in_iterator : + public std::iterator +{ +public: + using value_type = char_type; + using difference_type = + typename std::iterator_traits::difference_type; + using iterator_category = std::forward_iterator_tag; + + basic_utf16_in_iterator() : + _it(char_iterator()), + _end(char_iterator()), + _char(0) + { + } + + explicit basic_utf16_in_iterator(const char_iterator &it_, + const char_iterator &end_) : + _it(it_), + _end(it_), + _char(0) + { + if (it_ != end_) + { + next(); + } + } + + char_type operator *() const + { + return _char; + } + + bool operator ==(const basic_utf16_in_iterator &rhs_) const + { + return _it == rhs_._it; + } + + bool operator !=(const basic_utf16_in_iterator &rhs_) const + { + return _it != rhs_._it; + } + + basic_utf16_in_iterator &operator ++() + { + _it = _end; + next(); + return *this; + } + + basic_utf16_in_iterator operator ++(int) + { + basic_utf16_in_iterator temp_ = *this; + + _it = _end; + next(); + return temp_; + } + + basic_utf16_in_iterator operator +(const std::size_t count_) const + { + basic_utf16_in_iterator temp_ = *this; + + for (std::size_t i_ = 0; i_ < count_; ++i_) + { + ++temp_; + } + + return temp_; + } + + basic_utf16_in_iterator operator -(const std::size_t count_) const + { + basic_utf16_in_iterator temp_ = *this; + + for (std::size_t i_ = 0; i_ < count_; ++i_) + { + temp_._end = temp_._it; + --temp_._it; + + if (*temp_._it >= 0xdc00 && *temp_._it <= 0xdfff) --temp_._it; + } + + temp_.next(); + return temp_; + } + +private: + char_iterator _it; + char_iterator _end; + char_type _char; + + void next() + { + char_type ch_ = *_it & 0xffff; + + _end = _it; + + if (ch_ >= 0xd800 && ch_ <= 0xdbff) + { + const char_type surrogate_ = *++_end & 0xffff; + + ch_ = (((ch_ - 0xd800) << 10) | (surrogate_ - 0xdc00)) + 0x10000; + } + + _char = ch_; + ++_end; + } +}; + +template +class basic_utf16_out_iterator : + public std::iterator +{ +public: + using value_type = wchar_t; + using difference_type = + typename std::iterator_traits::difference_type; + using iterator_category = std::forward_iterator_tag; + + basic_utf16_out_iterator() : + _count(0), + _index(0) + { + } + + explicit basic_utf16_out_iterator(const char_iterator &it_, + const char_iterator &end_) : + _it(it_), + _count(0), + _index(0) + { + if (it_ != end_) + { + next(); + } + } + + wchar_t operator *() const + { + return _chars[_index]; + } + + bool operator ==(const basic_utf16_out_iterator &rhs_) const + { + return _it == rhs_._it; + } + + bool operator !=(const basic_utf16_out_iterator &rhs_) const + { + return _it != rhs_._it; + } + + basic_utf16_out_iterator &operator ++() + { + ++_index; + + if (_index >= _count) + { + ++_it; + next(); + } + + return *this; + } + + basic_utf16_out_iterator operator ++(int) + { + basic_utf16_out_iterator temp_ = *this; + + ++_index; + + if (_index >= _count) + { + ++_it; + next(); + } + + return temp_; + } + +private: + char_iterator _it; + wchar_t _chars[2]; + unsigned char _count; + unsigned char _index; + + void next() + { + const std::size_t ch_ = *_it; + + _count = len(ch_); + _index = 0; + + switch (_count) + { + case 1: + _chars[0] = static_cast(ch_); + break; + case 2: + _chars[0] = static_cast((ch_ >> 10) + 0xdc00u - + (0x10000 >> 10)); + _chars[1] = static_cast((ch_ & 0x3ff) + 0xdc00u); + break; + } + } + + char len(const std::size_t ch_) const + { + return ch_ > 0xffff ? 2 : 1; + } +}; +} + +#endif From 4a50d438d037fad28835fe2f2bd32b72c8bd37fb Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Sun, 18 Nov 2018 21:25:36 +0000 Subject: [PATCH 02/32] Add query parser class --- YACReaderLibrary/YACReaderLibrary.pro | 8 +- YACReaderLibrary/db/query_parser.cpp | 237 ++++++++++++++++++++++++++ YACReaderLibrary/db/query_parser.h | 63 +++++++ 3 files changed, 305 insertions(+), 3 deletions(-) create mode 100644 YACReaderLibrary/db/query_parser.cpp create mode 100644 YACReaderLibrary/db/query_parser.h diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index 1ea60e8b..8f79b706 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -67,8 +67,8 @@ macx { QT += macextras gui-private } -unix:!macx { - CONFIG += c++11 +unix { + CONFIG += c++1z } #CONFIG += release @@ -147,6 +147,7 @@ HEADERS += comic_flow.h \ yacreader_comics_selection_helper.h \ yacreader_comic_info_helper.h \ db/reading_list.h \ + db/query_parser.h \ current_comic_view_helper.h \ lexertl/parser/tokeniser/re_token.hpp \ lexertl/parser/tokeniser/re_tokeniser.hpp \ @@ -254,7 +255,8 @@ SOURCES += comic_flow.cpp \ yacreader_comics_selection_helper.cpp \ yacreader_comic_info_helper.cpp\ db/reading_list.cpp \ - current_comic_view_helper.cpp + current_comic_view_helper.cpp \ + db/query_parser.cpp !CONFIG(no_opengl) { SOURCES += ../common/gl/yacreader_flow_gl.cpp diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp new file mode 100644 index 00000000..f5b1adee --- /dev/null +++ b/YACReaderLibrary/db/query_parser.cpp @@ -0,0 +1,237 @@ +#include "query_parser.h" + +#include +#include +#include +#include + + +const std::map> QueryParser::fieldNames { + {FieldType::numeric, {"numpages", "number", "count", "arcnumber", "arccount"}}, + {FieldType::text, {"title", "volume", "storyarc", "genere", "writer", "penciller", "inker", "colorist", "letterer", + "coverartist", "publisher", "format", "agerating", "synopsis", "characters", "notes"}}, + {FieldType::boolean, {"isbis", "color"}}, + {FieldType::date, {"date"} } }; + +int QueryParser::TreeNode::buildSqlString(std::string& sqlString, int bindPosition) const { + if (t == "token") { + ++bindPosition; + std::ostringstream oss; + if (children[0].t == "all") { + oss << "("; + for (const auto& field: fieldNames.at(FieldType::text)) { + oss << "UPPER(ci." << field << ") LIKE UPPER(:bindPosition" << bindPosition << ") OR "; + } + oss << "UPPER(c.fileName) LIKE UPPER(:bindPosition" << bindPosition << ")) "; + } else if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { + oss << "ci." << children[0].t << " = :bindPosition" << bindPosition << " "; + } else { + oss << "(UPPER(ci." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; + } + sqlString += oss.str(); + } else if (t == "not") { + sqlString += "(NOT "; + bindPosition = children[0].buildSqlString(sqlString, bindPosition); + sqlString += ")"; + } else { + sqlString += "("; + bindPosition = children[0].buildSqlString(sqlString, bindPosition); + sqlString += " " + t + " "; + bindPosition = children[1].buildSqlString(sqlString, bindPosition); + sqlString += ")"; + } + + return bindPosition; + } + + int QueryParser::TreeNode::bindValues(QSqlQuery& selectQuery, int bindPosition) const { + if (t == "token") { + std::ostringstream oss; + oss << ":bindPosition" << ++bindPosition; + if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { + selectQuery.bindValue(oss.str().c_str(), std::stoi(children[1].t)); + } else { + selectQuery.bindValue(oss.str().c_str(), ("%%"+children[1].t+"%%").c_str()); + } + } else if (t == "not") { + bindPosition = children[0].bindValues(selectQuery, bindPosition); + } else { + bindPosition = children[0].bindValues(selectQuery, bindPosition); + bindPosition = children[1].bindValues(selectQuery, bindPosition); + } + + return bindPosition; + } + + +QueryParser::QueryParser(): lexScanner(0) { + + lexScanner.push("[()]", static_cast::type>(TokenType::opcode)); + lexScanner.push("@[^:]+:[^\\\")\\s]+", static_cast::type>(TokenType::atWord)); + lexScanner.push("[^\\\"()\\s]+", static_cast::type>(TokenType::word)); + lexScanner.push("\\\".*?\\\"", static_cast::type>(TokenType::quotedWord)); + lexScanner.push("\\s+", static_cast::type>(TokenType::space)); + + lexertl::generator::build(lexScanner, sm); +} + +QueryParser::TreeNode QueryParser::parse(const std::string& expr) { + tokenize(expr); + auto prog = orExpression(); + + if (!isEof()) { + throw std::invalid_argument("Extra characters at end of search"); + } + + return prog; +} + +std::string QueryParser::toLower(const std::string& string) { + std::string res(string); + std::transform(res.begin(), res.end(), res.begin(), ::tolower); + return res; +} + +std::string QueryParser::token(bool advance) { + if (isEof()) { + return ""; + } + auto res = (tokenType() == TokenType::quotedWord)?iter->substr(1,1):iter->str(); + if (advance) { + this->advance(); + } + return res; +} + +std::string QueryParser::lcaseToken(bool advance) { + if (isEof()) { + return ""; + } + auto res = (tokenType() == TokenType::quotedWord)?iter->substr(1,1):iter->str(); + if (advance) { + this->advance(); + } + return toLower(res); +} + +QueryParser::TokenType QueryParser::tokenType() { + if (isEof()) { + return TokenType::eof; + } + return TokenType(iter->id); +} + +bool QueryParser::isEof() const { + return iter == end; +} + +void QueryParser::advance() { + ++iter; + if (tokenType() == TokenType::space) advance(); +} + +QueryParser::FieldType QueryParser::fieldType(const std::string& str) { + for (const auto& names : fieldNames) { + if (std::find(names.second.begin(), names.second.end(), toLower(str)) != names.second.end()) { + return names.first; + } + } + + return FieldType::unknown; +} + +void QueryParser::tokenize (const std::string& expr) { + // TODO: Strip out escaped backslashes, quotes and parens so that the + // lex scanner doesn't get confused. We put them back later. + + iter = lexertl::siterator(expr.begin(), expr.end(), sm); + + /* for (; !isEof() ; advance()) + { + std::cout << "Id: " << iter->id << ", Token: '" << token() << "'\n"; + } + iter = lexertl::siterator(expr.begin(), expr.end(), sm); + */ +} + +std::string QueryParser::join(const std::vector& strings, const std::string& delim) { + return std::accumulate(strings.begin(), strings.end(), std::string(), + [&delim](const std::string& a, const std::string& b) -> std::string { + return a + (a.length() > 0 && b.length() > 0 ? delim : "") + b; + } ); +} + +std::vector QueryParser::split(const std::string& string, char delim) { + std::istringstream iss(string); + std::vector words; + while(iss) { + std::string substr; + std::getline(iss, substr, delim); + words.push_back(substr); + } + return words; +} + +QueryParser::TreeNode QueryParser::orExpression() { + auto lhs = andExpression(); + if (lcaseToken() == "or") { + advance(); + return {"or", {lhs, orExpression()}}; + } + return lhs; +} + +QueryParser::TreeNode QueryParser::andExpression() { + auto lhs = notExpression(); + if (lcaseToken() == "and") { + advance(); + return {"and", {lhs, andExpression()}}; + } + + if ((isIn(tokenType(), TokenType::atWord, TokenType::word, TokenType::quotedWord) || token() == "(") && lcaseToken() != "or") { + return {"and", {lhs, andExpression()}}; + } + + return lhs; +} + +QueryParser::TreeNode QueryParser::notExpression() { + if (lcaseToken() == "not") { + advance(); + return {"not", {notExpression()}}; + } + return locationExpression(); +} + +QueryParser::TreeNode QueryParser::locationExpression() { + if (tokenType() == TokenType::opcode && token() == "(") { + advance(); + auto res = orExpression(); + if (tokenType() != TokenType::opcode || token(true) != ")") { + throw std::invalid_argument("missing )'"); + } + return res; + } + if (!isIn(tokenType(), TokenType::atWord, TokenType::word, TokenType::quotedWord)) { + throw std::invalid_argument("Invalid syntax. Expected a lookup name or a word"); + } + return baseToken(); +} + +QueryParser::TreeNode QueryParser::baseToken() { + if (tokenType() == TokenType::quotedWord) { + return {"token", {{"all", {}}, {token(true), {}}}}; + } + + auto words(split(token(true), ':')); + + if (words.size() > 1 && fieldType(words[0]) != FieldType::unknown) { + auto loc(toLower(words[0])); + words.erase(words.begin()); + if (words.size() == 1 && tokenType() == TokenType::quotedWord) { + return {"token", {{loc, {}}, {token(true), {}}}}; + } + return {"token", {{loc, {}}, {join(words, ":"), {}}}}; + } + return {"token", {{"all", {}}, {join(words, ":"), {}}}}; +} diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h new file mode 100644 index 00000000..868d2b97 --- /dev/null +++ b/YACReaderLibrary/db/query_parser.h @@ -0,0 +1,63 @@ +#ifndef QUERY_PARSER_H +#define QUERY_PARSER_H + +#include "lexertl/generator.hpp" +#include "lexertl/iterator.hpp" + +#include +#include +#include +#include + +class QSqlQuery; + +class QueryParser { +public: + + enum class TokenType {eof, opcode, atWord, word, quotedWord, space}; + + struct TreeNode { + std::string t; + std::vector children; + + int buildSqlString(std::string& sqlString, int bindPosition = 0) const; + int bindValues(QSqlQuery& selectQuery, int bindPosition = 0) const; + }; + + explicit QueryParser(); + TreeNode parse(const std::string& expr); + +private: + static std::string toLower(const std::string& string); + + std::string token(bool advance = false); + std::string lcaseToken(bool advance = false); + TokenType tokenType(); + bool isEof() const; + void advance(); + + template + static bool isIn(First &&first, T && ... t) {return ((first == t) || ...);} + + enum class FieldType {unknown, numeric, text, boolean, date}; + static FieldType fieldType(const std::string& str); + + void tokenize (const std::string& expr); + static std::string join(const std::vector& strings, const std::string& delim); + static std::vector split(const std::string& string, char delim); + + TreeNode orExpression(); + TreeNode andExpression(); + TreeNode notExpression(); + TreeNode locationExpression(); + TreeNode baseToken(); + + lexertl::rules lexScanner; + lexertl::state_machine sm; + lexertl::siterator iter; + const lexertl::siterator end; + + static const std::map> fieldNames; +}; + +#endif // QUERY_PARSER_H From 5fa7da1e4660ab64b6e62ed038c0d28746f519dc Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Sat, 24 Nov 2018 09:54:31 +0000 Subject: [PATCH 03/32] Add folder into the query parser --- YACReaderLibrary/db/query_parser.cpp | 13 ++++++++++--- YACReaderLibrary/db/query_parser.h | 2 +- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index f5b1adee..e69c8b81 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -11,20 +11,27 @@ const std::map> QueryParser::fi {FieldType::text, {"title", "volume", "storyarc", "genere", "writer", "penciller", "inker", "colorist", "letterer", "coverartist", "publisher", "format", "agerating", "synopsis", "characters", "notes"}}, {FieldType::boolean, {"isbis", "color"}}, - {FieldType::date, {"date"} } }; + {FieldType::date, {"date"}}, + {FieldType::filename, {"filename"}}, + {FieldType::folder, {"folder"}} }; int QueryParser::TreeNode::buildSqlString(std::string& sqlString, int bindPosition) const { if (t == "token") { ++bindPosition; std::ostringstream oss; - if (children[0].t == "all") { + if (toLower(children[0].t) == "all") { oss << "("; for (const auto& field: fieldNames.at(FieldType::text)) { oss << "UPPER(ci." << field << ") LIKE UPPER(:bindPosition" << bindPosition << ") OR "; } - oss << "UPPER(c.fileName) LIKE UPPER(:bindPosition" << bindPosition << ")) "; + oss << "UPPER(c.filename) LIKE UPPER(:bindPosition" << bindPosition << ") OR "; + oss << "UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; } else if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { oss << "ci." << children[0].t << " = :bindPosition" << bindPosition << " "; + } else if (fieldType(children[0].t) == FieldType::filename) { + oss << "(UPPER(c." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; + } else if (fieldType(children[0].t) == FieldType::folder) { + oss << "(UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; } else { oss << "(UPPER(ci." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; } diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index 868d2b97..d77e1243 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -39,7 +39,7 @@ private: template static bool isIn(First &&first, T && ... t) {return ((first == t) || ...);} - enum class FieldType {unknown, numeric, text, boolean, date}; + enum class FieldType {unknown, numeric, text, boolean, date, folder, filename}; static FieldType fieldType(const std::string& str); void tokenize (const std::string& expr); From 673ee1f067a7052422f9e56c9ebc3d290c809f69 Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Sat, 24 Nov 2018 09:54:59 +0000 Subject: [PATCH 04/32] Use the query parser in the comic and folder model --- YACReaderLibrary/db/comic_model.cpp | 57 ++++++++++++++-------------- YACReaderLibrary/db/folder_model.cpp | 55 +++++++++++++++------------ 2 files changed, 60 insertions(+), 52 deletions(-) diff --git a/YACReaderLibrary/db/comic_model.cpp b/YACReaderLibrary/db/comic_model.cpp index 7fee8000..5f795e6a 100644 --- a/YACReaderLibrary/db/comic_model.cpp +++ b/YACReaderLibrary/db/comic_model.cpp @@ -9,6 +9,7 @@ #include "qnaturalsorting.h" #include "comic_db.h" #include "db_helper.h" +#include "query_parser.h" //ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read #include "QsLog.h" @@ -607,40 +608,40 @@ void ComicModel::setupModelData(const SearchModifiers modifier, const QString &f QSqlDatabase db = DataBaseManagement::loadDatabase(databasePath); QSqlQuery selectQuery(db); - switch (modifier) { - case YACReader::NoModifiers: - selectQuery.prepare("SELECT ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read,ci.isBis,ci.currentPage,ci.rating,ci.hasBeenOpened " - "FROM comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) " - "WHERE UPPER(ci.title) LIKE UPPER(:filter) OR UPPER(c.fileName) LIKE UPPER(:filter) LIMIT :limit"); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - selectQuery.bindValue(":limit", 500); //TODO, load this value from settings - break; + std::string queryString("SELECT ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read,ci.isBis,ci.currentPage,ci.rating,ci.hasBeenOpened " + "FROM comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) LEFT JOIN folder f ON (f.id == c.parentId) WHERE "); - case YACReader::OnlyRead: - selectQuery.prepare("SELECT ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read,ci.isBis,ci.currentPage,ci.rating,ci.hasBeenOpened " - "FROM comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) " - "WHERE (UPPER(ci.title) LIKE UPPER(:filter) OR UPPER(c.fileName) LIKE UPPER(:filter)) AND ci.read = 1 LIMIT :limit"); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - selectQuery.bindValue(":limit", 500); //TODO, load this value from settings - break; + try { + QueryParser parser; + auto result = parser.parse(filter.toStdString()); + result.buildSqlString(queryString); - case YACReader::OnlyUnread: - selectQuery.prepare("SELECT ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read,ci.isBis,ci.currentPage,ci.rating,ci.hasBeenOpened " - "FROM comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) " - "WHERE (UPPER(ci.title) LIKE UPPER(:filter) OR UPPER(c.fileName) LIKE UPPER(:filter)) AND ci.read = 0 LIMIT :limit"); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - selectQuery.bindValue(":limit", 500); //TODO, load this value from settings - break; + switch (modifier) { + case YACReader::NoModifiers: + queryString += "LIMIT :limit"; + break; - default: - QLOG_ERROR() << "not implemented"; - break; + case YACReader::OnlyRead: + queryString += "AND ci.read = 1 LIMIT :limit"; + break; + + case YACReader::OnlyUnread: + queryString += "AND ci.read = 0 LIMIT :limit"; + break; + + default: + queryString += "LIMIT :limit"; + QLOG_ERROR() << "not implemented"; + break; + } + selectQuery.prepare(QString(queryString.c_str())); + selectQuery.bindValue(":limit", 500); //TODO, load this value from settings + result.bindValues(selectQuery); + } catch (const std::exception &e) { + QLOG_ERROR() << "Unable to parse query: " << e.what(); } - selectQuery.exec(); - QLOG_DEBUG() << selectQuery.lastError() << "--"; - setupModelData(selectQuery); connectionName = db.connectionName(); } diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index 640462d7..902ada19 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -55,6 +55,7 @@ #include "qnaturalsorting.h" #include "yacreader_global_gui.h" #include "QsLog.h" +#include "query_parser.h" #ifdef Q_OS_MAC #include @@ -689,37 +690,43 @@ void FolderModelProxy::setupFilteredModelData() selectQuery.prepare("select * from folder where id <> 1 and upper(name) like upper(:filter) order by parentId,name "); selectQuery.bindValue(":filter", "%%" + filter + "%%"); } else { - switch (modifier) { - case YACReader::NoModifiers: - selectQuery.prepare("SELECT DISTINCT f.id, f.parentId, f.name, f.path, f.finished, f.completed " + std::string queryString("SELECT DISTINCT f.id, f.parentId, f.name, f.path, f.finished, f.completed " "FROM folder f LEFT JOIN comic c ON (f.id = c.parentId) " - "WHERE f.id <> 1 AND ((UPPER(c.fileName) like UPPER(:filter)) OR (UPPER(f.name) like UPPER(:filter2))) ORDER BY f.parentId,f.name"); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - selectQuery.bindValue(":filter2", "%%" + filter + "%%"); - break; + "INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) WHERE "); - case YACReader::OnlyRead: - selectQuery.prepare("SELECT DISTINCT f.id, f.parentId, f.name, f.path, f.finished, f.completed " - "FROM folder f LEFT JOIN (comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id)) ON (f.id = c.parentId) " - "WHERE f.id <> 1 AND ((UPPER(c.fileName) like UPPER(:filter)) OR (UPPER(f.name) like UPPER(:filter2))) AND ci.read = 1 ORDER BY f.parentId,f.name;"); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - selectQuery.bindValue(":filter2", "%%" + filter + "%%"); - break; + try { + QueryParser parser; + auto result = parser.parse(filter.toStdString()); + result.buildSqlString(queryString); - case YACReader::OnlyUnread: - selectQuery.prepare("SELECT DISTINCT f.id, f.parentId, f.name, f.path, f.finished, f.completed " - "FROM folder f LEFT JOIN (comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id)) ON (f.id = c.parentId) " - "WHERE f.id <> 1 AND ((UPPER(c.fileName) like UPPER(:filter)) OR (UPPER(f.name) like UPPER(:filter2))) AND ci.read = 0 ORDER BY f.parentId,f.name;"); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - selectQuery.bindValue(":filter2", "%%" + filter + "%%"); - break; + switch (modifier) { + case YACReader::NoModifiers: + queryString += "AND f.id <> 1 ORDER BY f.parentId,f.name"; + break; - default: - QLOG_ERROR() << "not implemented"; - break; + case YACReader::OnlyRead: + queryString += "AND f.id <> 1 AND ci.read = 1 ORDER BY f.parentId,f.name"; + break; + + case YACReader::OnlyUnread: + queryString += "AND f.id <> 1 AND ci.read = 0 ORDER BY f.parentId,f.name"; + break; + + default: + queryString += "AND f.id <> 1 ORDER BY f.parentId,f.name"; + QLOG_ERROR() << "not implemented"; + break; + } + + selectQuery.prepare(QString(queryString.c_str())); + result.bindValues(selectQuery); + + } catch (const std::exception &e) { + QLOG_ERROR() << "Unable to parse query: " << e.what(); } } selectQuery.exec(); + QLOG_DEBUG() << selectQuery.lastError() << "--"; setupFilteredModelData(selectQuery, rootItem); connectionName = db.connectionName(); From 30529dca4329d32ecc70a50eedccd0ba4e0038ab Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Sat, 24 Nov 2018 10:27:09 +0000 Subject: [PATCH 05/32] Some small tidy ups --- YACReaderLibrary/db/comic_model.cpp | 10 +++++----- YACReaderLibrary/db/folder_model.cpp | 28 +++++++++++++--------------- YACReaderLibrary/db/query_parser.cpp | 10 ---------- 3 files changed, 18 insertions(+), 30 deletions(-) diff --git a/YACReaderLibrary/db/comic_model.cpp b/YACReaderLibrary/db/comic_model.cpp index 5f795e6a..d41b7806 100644 --- a/YACReaderLibrary/db/comic_model.cpp +++ b/YACReaderLibrary/db/comic_model.cpp @@ -618,23 +618,23 @@ void ComicModel::setupModelData(const SearchModifiers modifier, const QString &f switch (modifier) { case YACReader::NoModifiers: - queryString += "LIMIT :limit"; + queryString += " LIMIT :limit"; break; case YACReader::OnlyRead: - queryString += "AND ci.read = 1 LIMIT :limit"; + queryString += " AND ci.read = 1 LIMIT :limit"; break; case YACReader::OnlyUnread: - queryString += "AND ci.read = 0 LIMIT :limit"; + queryString += " AND ci.read = 0 LIMIT :limit"; break; default: - queryString += "LIMIT :limit"; + queryString += " LIMIT :limit"; QLOG_ERROR() << "not implemented"; break; } - selectQuery.prepare(QString(queryString.c_str())); + selectQuery.prepare(queryString.c_str()); selectQuery.bindValue(":limit", 500); //TODO, load this value from settings result.bindValues(selectQuery); } catch (const std::exception &e) { diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index 902ada19..76a82ec2 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -701,39 +701,37 @@ void FolderModelProxy::setupFilteredModelData() switch (modifier) { case YACReader::NoModifiers: - queryString += "AND f.id <> 1 ORDER BY f.parentId,f.name"; + queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; break; case YACReader::OnlyRead: - queryString += "AND f.id <> 1 AND ci.read = 1 ORDER BY f.parentId,f.name"; + queryString += " AND f.id <> 1 AND ci.read = 1 ORDER BY f.parentId,f.name"; break; case YACReader::OnlyUnread: - queryString += "AND f.id <> 1 AND ci.read = 0 ORDER BY f.parentId,f.name"; + queryString += " AND f.id <> 1 AND ci.read = 0 ORDER BY f.parentId,f.name"; break; default: - queryString += "AND f.id <> 1 ORDER BY f.parentId,f.name"; + queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; QLOG_ERROR() << "not implemented"; break; + + selectQuery.prepare(queryString.c_str()); + result.bindValues(selectQuery); } - - selectQuery.prepare(QString(queryString.c_str())); - result.bindValues(selectQuery); - } catch (const std::exception &e) { QLOG_ERROR() << "Unable to parse query: " << e.what(); } + selectQuery.exec(); + QLOG_DEBUG() << selectQuery.lastError() << "--"; + + setupFilteredModelData(selectQuery, rootItem); } - selectQuery.exec(); - QLOG_DEBUG() << selectQuery.lastError() << "--"; + QSqlDatabase::removeDatabase(db.connectionName()); - setupFilteredModelData(selectQuery, rootItem); - connectionName = db.connectionName(); + endResetModel(); } - QSqlDatabase::removeDatabase(connectionName); - - endResetModel(); } void FolderModelProxy::clear() diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index e69c8b81..7cd57897 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -148,17 +148,7 @@ QueryParser::FieldType QueryParser::fieldType(const std::string& str) { } void QueryParser::tokenize (const std::string& expr) { - // TODO: Strip out escaped backslashes, quotes and parens so that the - // lex scanner doesn't get confused. We put them back later. - iter = lexertl::siterator(expr.begin(), expr.end(), sm); - - /* for (; !isEof() ; advance()) - { - std::cout << "Id: " << iter->id << ", Token: '" << token() << "'\n"; - } - iter = lexertl::siterator(expr.begin(), expr.end(), sm); - */ } std::string QueryParser::join(const std::vector& strings, const std::string& delim) { From 9a660350d23ac81e78234466d675b820906051b5 Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Sat, 24 Nov 2018 10:27:29 +0000 Subject: [PATCH 06/32] Add some documentation and attribution to the query parser --- YACReaderLibrary/db/query_parser.h | 31 ++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index d77e1243..628c1beb 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -9,8 +9,35 @@ #include #include -class QSqlQuery; - +/** + * This class is used to generate an SQL query string from a search expression, + * with a syntax very similar to that used by the Google search engine. + * + * The code herin is based upon the SearchQueryParser python class written by + * Kovid Goyal as part of the Calibre eBook manager (https://calibre-ebook.com) + * + * Grammar: + * prog ::= or_expression + * or_expression ::= and_expression [ 'or' or_expression ] + * and_expression ::= not_expression [ [ 'and' ] and_expression ] + * not_expression ::= [ 'not' ] location_expression + * location_expression ::= base_token | ( '(' or_expression ')' ) + * base_token ::= a sequence of letters and colons, perhaps quoted + * + * Usage Example: + * QSqlQuery selectQuery(db); + * std::string queryString("SELECT ... FROM ... WHERE "); + * + * QueryParser parser; // Create the parser object + * TreeNode result = parser.parse(expr); // Parse the query expression + * + * result.buildSqlString(queryString); // Append the SQL query to a string + * + * selectQuery.prepare(queryString.c_str()); // Convert the string to a query + * result.bindValues(selectQuery); // Populate the SQL query variables + * + * selectQuery.exec(); + */ class QueryParser { public: From 7ccb3384554ece0672218a80a3603a1ef1e51e92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Mon, 23 Sep 2019 18:02:23 +0200 Subject: [PATCH 07/32] Apply clang-format --- YACReaderLibrary/db/query_parser.cpp | 195 +++++++++++++++------------ YACReaderLibrary/db/query_parser.h | 42 ++++-- 2 files changed, 135 insertions(+), 102 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index 7cd57897..c68502db 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -5,73 +5,75 @@ #include #include - const std::map> QueryParser::fieldNames { - {FieldType::numeric, {"numpages", "number", "count", "arcnumber", "arccount"}}, - {FieldType::text, {"title", "volume", "storyarc", "genere", "writer", "penciller", "inker", "colorist", "letterer", - "coverartist", "publisher", "format", "agerating", "synopsis", "characters", "notes"}}, - {FieldType::boolean, {"isbis", "color"}}, - {FieldType::date, {"date"}}, - {FieldType::filename, {"filename"}}, - {FieldType::folder, {"folder"}} }; + { FieldType::numeric, { "numpages", "number", "count", "arcnumber", "arccount" } }, + { FieldType::text, { "title", "volume", "storyarc", "genere", "writer", "penciller", "inker", "colorist", "letterer", "coverartist", "publisher", "format", "agerating", "synopsis", "characters", "notes" } }, + { FieldType::boolean, { "isbis", "color" } }, + { FieldType::date, { "date" } }, + { FieldType::filename, { "filename" } }, + { FieldType::folder, { "folder" } } +}; -int QueryParser::TreeNode::buildSqlString(std::string& sqlString, int bindPosition) const { - if (t == "token") { - ++bindPosition; - std::ostringstream oss; - if (toLower(children[0].t) == "all") { - oss << "("; - for (const auto& field: fieldNames.at(FieldType::text)) { - oss << "UPPER(ci." << field << ") LIKE UPPER(:bindPosition" << bindPosition << ") OR "; - } - oss << "UPPER(c.filename) LIKE UPPER(:bindPosition" << bindPosition << ") OR "; - oss << "UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; - } else if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { - oss << "ci." << children[0].t << " = :bindPosition" << bindPosition << " "; - } else if (fieldType(children[0].t) == FieldType::filename) { - oss << "(UPPER(c." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; - } else if (fieldType(children[0].t) == FieldType::folder) { - oss << "(UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; - } else { - oss << "(UPPER(ci." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; +int QueryParser::TreeNode::buildSqlString(std::string &sqlString, int bindPosition) const +{ + if (t == "token") { + ++bindPosition; + std::ostringstream oss; + if (toLower(children[0].t) == "all") { + oss << "("; + for (const auto &field : fieldNames.at(FieldType::text)) { + oss << "UPPER(ci." << field << ") LIKE UPPER(:bindPosition" << bindPosition << ") OR "; } - sqlString += oss.str(); - } else if (t == "not") { - sqlString += "(NOT "; - bindPosition = children[0].buildSqlString(sqlString, bindPosition); - sqlString += ")"; + oss << "UPPER(c.filename) LIKE UPPER(:bindPosition" << bindPosition << ") OR "; + oss << "UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; + } else if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { + oss << "ci." << children[0].t << " = :bindPosition" << bindPosition << " "; + } else if (fieldType(children[0].t) == FieldType::filename) { + oss << "(UPPER(c." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; + } else if (fieldType(children[0].t) == FieldType::folder) { + oss << "(UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; } else { - sqlString += "("; - bindPosition = children[0].buildSqlString(sqlString, bindPosition); - sqlString += " " + t + " "; - bindPosition = children[1].buildSqlString(sqlString, bindPosition); - sqlString += ")"; + oss << "(UPPER(ci." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; } - - return bindPosition; + sqlString += oss.str(); + } else if (t == "not") { + sqlString += "(NOT "; + bindPosition = children[0].buildSqlString(sqlString, bindPosition); + sqlString += ")"; + } else { + sqlString += "("; + bindPosition = children[0].buildSqlString(sqlString, bindPosition); + sqlString += " " + t + " "; + bindPosition = children[1].buildSqlString(sqlString, bindPosition); + sqlString += ")"; } - int QueryParser::TreeNode::bindValues(QSqlQuery& selectQuery, int bindPosition) const { - if (t == "token") { - std::ostringstream oss; - oss << ":bindPosition" << ++bindPosition; - if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { - selectQuery.bindValue(oss.str().c_str(), std::stoi(children[1].t)); - } else { - selectQuery.bindValue(oss.str().c_str(), ("%%"+children[1].t+"%%").c_str()); - } - } else if (t == "not") { - bindPosition = children[0].bindValues(selectQuery, bindPosition); - } else { - bindPosition = children[0].bindValues(selectQuery, bindPosition); - bindPosition = children[1].bindValues(selectQuery, bindPosition); - } + return bindPosition; +} - return bindPosition; +int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) const +{ + if (t == "token") { + std::ostringstream oss; + oss << ":bindPosition" << ++bindPosition; + if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { + selectQuery.bindValue(oss.str().c_str(), std::stoi(children[1].t)); + } else { + selectQuery.bindValue(oss.str().c_str(), ("%%" + children[1].t + "%%").c_str()); + } + } else if (t == "not") { + bindPosition = children[0].bindValues(selectQuery, bindPosition); + } else { + bindPosition = children[0].bindValues(selectQuery, bindPosition); + bindPosition = children[1].bindValues(selectQuery, bindPosition); } + return bindPosition; +} -QueryParser::QueryParser(): lexScanner(0) { +QueryParser::QueryParser() + : lexScanner(0) +{ lexScanner.push("[()]", static_cast::type>(TokenType::opcode)); lexScanner.push("@[^:]+:[^\\\")\\s]+", static_cast::type>(TokenType::atWord)); @@ -82,7 +84,8 @@ QueryParser::QueryParser(): lexScanner(0) { lexertl::generator::build(lexScanner, sm); } -QueryParser::TreeNode QueryParser::parse(const std::string& expr) { +QueryParser::TreeNode QueryParser::parse(const std::string &expr) +{ tokenize(expr); auto prog = orExpression(); @@ -93,52 +96,60 @@ QueryParser::TreeNode QueryParser::parse(const std::string& expr) { return prog; } -std::string QueryParser::toLower(const std::string& string) { +std::string QueryParser::toLower(const std::string &string) +{ std::string res(string); std::transform(res.begin(), res.end(), res.begin(), ::tolower); return res; } -std::string QueryParser::token(bool advance) { +std::string QueryParser::token(bool advance) +{ if (isEof()) { return ""; } - auto res = (tokenType() == TokenType::quotedWord)?iter->substr(1,1):iter->str(); + auto res = (tokenType() == TokenType::quotedWord) ? iter->substr(1, 1) : iter->str(); if (advance) { this->advance(); } return res; } -std::string QueryParser::lcaseToken(bool advance) { +std::string QueryParser::lcaseToken(bool advance) +{ if (isEof()) { return ""; } - auto res = (tokenType() == TokenType::quotedWord)?iter->substr(1,1):iter->str(); + auto res = (tokenType() == TokenType::quotedWord) ? iter->substr(1, 1) : iter->str(); if (advance) { this->advance(); } return toLower(res); } -QueryParser::TokenType QueryParser::tokenType() { +QueryParser::TokenType QueryParser::tokenType() +{ if (isEof()) { return TokenType::eof; } return TokenType(iter->id); } -bool QueryParser::isEof() const { +bool QueryParser::isEof() const +{ return iter == end; } -void QueryParser::advance() { +void QueryParser::advance() +{ ++iter; - if (tokenType() == TokenType::space) advance(); + if (tokenType() == TokenType::space) + advance(); } -QueryParser::FieldType QueryParser::fieldType(const std::string& str) { - for (const auto& names : fieldNames) { +QueryParser::FieldType QueryParser::fieldType(const std::string &str) +{ + for (const auto &names : fieldNames) { if (std::find(names.second.begin(), names.second.end(), toLower(str)) != names.second.end()) { return names.first; } @@ -147,21 +158,24 @@ QueryParser::FieldType QueryParser::fieldType(const std::string& str) { return FieldType::unknown; } -void QueryParser::tokenize (const std::string& expr) { +void QueryParser::tokenize(const std::string &expr) +{ iter = lexertl::siterator(expr.begin(), expr.end(), sm); } -std::string QueryParser::join(const std::vector& strings, const std::string& delim) { +std::string QueryParser::join(const std::vector &strings, const std::string &delim) +{ return std::accumulate(strings.begin(), strings.end(), std::string(), - [&delim](const std::string& a, const std::string& b) -> std::string { - return a + (a.length() > 0 && b.length() > 0 ? delim : "") + b; - } ); + [&delim](const std::string &a, const std::string &b) -> std::string { + return a + (a.length() > 0 && b.length() > 0 ? delim : "") + b; + }); } -std::vector QueryParser::split(const std::string& string, char delim) { +std::vector QueryParser::split(const std::string &string, char delim) +{ std::istringstream iss(string); std::vector words; - while(iss) { + while (iss) { std::string substr; std::getline(iss, substr, delim); words.push_back(substr); @@ -169,38 +183,42 @@ std::vector QueryParser::split(const std::string& string, char deli return words; } -QueryParser::TreeNode QueryParser::orExpression() { +QueryParser::TreeNode QueryParser::orExpression() +{ auto lhs = andExpression(); if (lcaseToken() == "or") { advance(); - return {"or", {lhs, orExpression()}}; + return { "or", { lhs, orExpression() } }; } return lhs; } -QueryParser::TreeNode QueryParser::andExpression() { +QueryParser::TreeNode QueryParser::andExpression() +{ auto lhs = notExpression(); if (lcaseToken() == "and") { advance(); - return {"and", {lhs, andExpression()}}; + return { "and", { lhs, andExpression() } }; } if ((isIn(tokenType(), TokenType::atWord, TokenType::word, TokenType::quotedWord) || token() == "(") && lcaseToken() != "or") { - return {"and", {lhs, andExpression()}}; + return { "and", { lhs, andExpression() } }; } return lhs; } -QueryParser::TreeNode QueryParser::notExpression() { +QueryParser::TreeNode QueryParser::notExpression() +{ if (lcaseToken() == "not") { advance(); - return {"not", {notExpression()}}; + return { "not", { notExpression() } }; } return locationExpression(); } -QueryParser::TreeNode QueryParser::locationExpression() { +QueryParser::TreeNode QueryParser::locationExpression() +{ if (tokenType() == TokenType::opcode && token() == "(") { advance(); auto res = orExpression(); @@ -215,9 +233,10 @@ QueryParser::TreeNode QueryParser::locationExpression() { return baseToken(); } -QueryParser::TreeNode QueryParser::baseToken() { +QueryParser::TreeNode QueryParser::baseToken() +{ if (tokenType() == TokenType::quotedWord) { - return {"token", {{"all", {}}, {token(true), {}}}}; + return { "token", { { "all", {} }, { token(true), {} } } }; } auto words(split(token(true), ':')); @@ -226,9 +245,9 @@ QueryParser::TreeNode QueryParser::baseToken() { auto loc(toLower(words[0])); words.erase(words.begin()); if (words.size() == 1 && tokenType() == TokenType::quotedWord) { - return {"token", {{loc, {}}, {token(true), {}}}}; + return { "token", { { loc, {} }, { token(true), {} } } }; } - return {"token", {{loc, {}}, {join(words, ":"), {}}}}; + return { "token", { { loc, {} }, { join(words, ":"), {} } } }; } - return {"token", {{"all", {}}, {join(words, ":"), {}}}}; + return { "token", { { "all", {} }, { join(words, ":"), {} } } }; } diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index 628c1beb..427cda30 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -38,24 +38,29 @@ * * selectQuery.exec(); */ -class QueryParser { +class QueryParser +{ public: - - enum class TokenType {eof, opcode, atWord, word, quotedWord, space}; + enum class TokenType { eof, + opcode, + atWord, + word, + quotedWord, + space }; struct TreeNode { std::string t; std::vector children; - int buildSqlString(std::string& sqlString, int bindPosition = 0) const; - int bindValues(QSqlQuery& selectQuery, int bindPosition = 0) const; + int buildSqlString(std::string &sqlString, int bindPosition = 0) const; + int bindValues(QSqlQuery &selectQuery, int bindPosition = 0) const; }; explicit QueryParser(); - TreeNode parse(const std::string& expr); + TreeNode parse(const std::string &expr); private: - static std::string toLower(const std::string& string); + static std::string toLower(const std::string &string); std::string token(bool advance = false); std::string lcaseToken(bool advance = false); @@ -63,15 +68,24 @@ private: bool isEof() const; void advance(); - template - static bool isIn(First &&first, T && ... t) {return ((first == t) || ...);} + template + static bool isIn(First &&first, T &&... t) + { + return ((first == t) || ...); + } - enum class FieldType {unknown, numeric, text, boolean, date, folder, filename}; - static FieldType fieldType(const std::string& str); + enum class FieldType { unknown, + numeric, + text, + boolean, + date, + folder, + filename }; + static FieldType fieldType(const std::string &str); - void tokenize (const std::string& expr); - static std::string join(const std::vector& strings, const std::string& delim); - static std::vector split(const std::string& string, char delim); + void tokenize(const std::string &expr); + static std::string join(const std::vector &strings, const std::string &delim); + static std::vector split(const std::string &string, char delim); TreeNode orExpression(); TreeNode andExpression(); From 2d3888b4b463d2d71cbd997456ae90d67e6ea23c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Mon, 23 Sep 2019 18:32:25 +0200 Subject: [PATCH 08/32] Fix scopes --- YACReaderLibrary/db/folder_model.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index 76a82ec2..42f13aad 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -716,13 +716,14 @@ void FolderModelProxy::setupFilteredModelData() queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; QLOG_ERROR() << "not implemented"; break; - - selectQuery.prepare(queryString.c_str()); - result.bindValues(selectQuery); } } catch (const std::exception &e) { QLOG_ERROR() << "Unable to parse query: " << e.what(); } + + selectQuery.prepare(queryString.c_str()); + selectQuery.bindValues(selectQuery); + selectQuery.exec(); QLOG_DEBUG() << selectQuery.lastError() << "--"; From 4990093e3dfa01313c99555f24e687da10554da7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Mon, 23 Sep 2019 19:55:00 +0200 Subject: [PATCH 09/32] Remove c++17 dependency --- YACReaderLibrary/YACReaderLibrary.pro | 4 ---- YACReaderLibrary/db/query_parser.cpp | 8 ++++---- YACReaderLibrary/db/query_parser.h | 6 +++--- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index 8f79b706..e6d86252 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -67,10 +67,6 @@ macx { QT += macextras gui-private } -unix { - CONFIG += c++1z -} - #CONFIG += release CONFIG -= flat QT += sql network widgets script diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index c68502db..802c11b8 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -26,7 +26,7 @@ int QueryParser::TreeNode::buildSqlString(std::string &sqlString, int bindPositi } oss << "UPPER(c.filename) LIKE UPPER(:bindPosition" << bindPosition << ") OR "; oss << "UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; - } else if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { + } else if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean })) { oss << "ci." << children[0].t << " = :bindPosition" << bindPosition << " "; } else if (fieldType(children[0].t) == FieldType::filename) { oss << "(UPPER(c." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; @@ -56,7 +56,7 @@ int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) if (t == "token") { std::ostringstream oss; oss << ":bindPosition" << ++bindPosition; - if (isIn(fieldType(children[0].t), FieldType::numeric, FieldType::boolean)) { + if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean })) { selectQuery.bindValue(oss.str().c_str(), std::stoi(children[1].t)); } else { selectQuery.bindValue(oss.str().c_str(), ("%%" + children[1].t + "%%").c_str()); @@ -201,7 +201,7 @@ QueryParser::TreeNode QueryParser::andExpression() return { "and", { lhs, andExpression() } }; } - if ((isIn(tokenType(), TokenType::atWord, TokenType::word, TokenType::quotedWord) || token() == "(") && lcaseToken() != "or") { + if ((isIn(tokenType(), { TokenType::atWord, TokenType::word, TokenType::quotedWord }) || token() == "(") && lcaseToken() != "or") { return { "and", { lhs, andExpression() } }; } @@ -227,7 +227,7 @@ QueryParser::TreeNode QueryParser::locationExpression() } return res; } - if (!isIn(tokenType(), TokenType::atWord, TokenType::word, TokenType::quotedWord)) { + if (!isIn(tokenType(), { TokenType::atWord, TokenType::word, TokenType::quotedWord })) { throw std::invalid_argument("Invalid syntax. Expected a lookup name or a word"); } return baseToken(); diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index 427cda30..0d032036 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -68,10 +68,10 @@ private: bool isEof() const; void advance(); - template - static bool isIn(First &&first, T &&... t) + template + static bool isIn(const T &e, const std::list &v) { - return ((first == t) || ...); + return std::find(v.begin(), v.end(), e) != v.end(); } enum class FieldType { unknown, From 8efb9912ee07200e84bf91601d1d2336152bdb02 Mon Sep 17 00:00:00 2001 From: Iain Benson Date: Thu, 24 Oct 2019 20:39:43 +0100 Subject: [PATCH 10/32] Use concatenation, rather than ostringstream --- YACReaderLibrary/db/query_parser.cpp | 46 +++++++++++----------------- YACReaderLibrary/db/query_parser.h | 4 +-- 2 files changed, 20 insertions(+), 30 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index 802c11b8..f08d0872 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -1,7 +1,6 @@ #include "query_parser.h" #include -#include #include #include @@ -18,24 +17,22 @@ int QueryParser::TreeNode::buildSqlString(std::string &sqlString, int bindPositi { if (t == "token") { ++bindPosition; - std::ostringstream oss; if (toLower(children[0].t) == "all") { - oss << "("; + sqlString += "("; for (const auto &field : fieldNames.at(FieldType::text)) { - oss << "UPPER(ci." << field << ") LIKE UPPER(:bindPosition" << bindPosition << ") OR "; + sqlString += "UPPER(ci." + field + ") LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ") OR "; } - oss << "UPPER(c.filename) LIKE UPPER(:bindPosition" << bindPosition << ") OR "; - oss << "UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; + sqlString += "UPPER(c.filename) LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ") OR "; + sqlString += "UPPER(f.name) LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; } else if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean })) { - oss << "ci." << children[0].t << " = :bindPosition" << bindPosition << " "; + sqlString += "ci." + children[0].t + " = :bindPosition" + std::to_string(bindPosition) + " "; } else if (fieldType(children[0].t) == FieldType::filename) { - oss << "(UPPER(c." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; + sqlString += "(UPPER(c." + children[0].t + ") LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; } else if (fieldType(children[0].t) == FieldType::folder) { - oss << "(UPPER(f.name) LIKE UPPER(:bindPosition" << bindPosition << ")) "; + sqlString += "(UPPER(f.name) LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; } else { - oss << "(UPPER(ci." << children[0].t << ") LIKE UPPER(:bindPosition" << bindPosition << ")) "; + sqlString += "(UPPER(ci." + children[0].t + ") LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; } - sqlString += oss.str(); } else if (t == "not") { sqlString += "(NOT "; bindPosition = children[0].buildSqlString(sqlString, bindPosition); @@ -54,12 +51,11 @@ int QueryParser::TreeNode::buildSqlString(std::string &sqlString, int bindPositi int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) const { if (t == "token") { - std::ostringstream oss; - oss << ":bindPosition" << ++bindPosition; + std::string bind_string(":bindPosition" + std::to_string(++bindPosition)); if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean })) { - selectQuery.bindValue(oss.str().c_str(), std::stoi(children[1].t)); + selectQuery.bindValue(bind_string.c_str(), std::stoi(children[1].t)); } else { - selectQuery.bindValue(oss.str().c_str(), ("%%" + children[1].t + "%%").c_str()); + selectQuery.bindValue(bind_string.c_str(), ("%%" + children[1].t + "%%").c_str()); } } else if (t == "not") { bindPosition = children[0].bindValues(selectQuery, bindPosition); @@ -163,23 +159,17 @@ void QueryParser::tokenize(const std::string &expr) iter = lexertl::siterator(expr.begin(), expr.end(), sm); } -std::string QueryParser::join(const std::vector &strings, const std::string &delim) +std::string QueryParser::join(const QStringList &strings, const std::string &delim) { return std::accumulate(strings.begin(), strings.end(), std::string(), - [&delim](const std::string &a, const std::string &b) -> std::string { - return a + (a.length() > 0 && b.length() > 0 ? delim : "") + b; + [&delim](const std::string &a, const QString &b) -> std::string { + return a + (a.length() > 0 && b.length() > 0 ? delim : "") + b.toStdString(); }); } -std::vector QueryParser::split(const std::string &string, char delim) +QStringList QueryParser::split(const std::string &string, char delim) { - std::istringstream iss(string); - std::vector words; - while (iss) { - std::string substr; - std::getline(iss, substr, delim); - words.push_back(substr); - } + auto words = QString(string.c_str()).split(delim); return words; } @@ -241,8 +231,8 @@ QueryParser::TreeNode QueryParser::baseToken() auto words(split(token(true), ':')); - if (words.size() > 1 && fieldType(words[0]) != FieldType::unknown) { - auto loc(toLower(words[0])); + if (words.size() > 1 && fieldType(words[0].toStdString()) != FieldType::unknown) { + auto loc(toLower(words[0].toStdString())); words.erase(words.begin()); if (words.size() == 1 && tokenType() == TokenType::quotedWord) { return { "token", { { loc, {} }, { token(true), {} } } }; diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index 0d032036..f23a7470 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -84,8 +84,8 @@ private: static FieldType fieldType(const std::string &str); void tokenize(const std::string &expr); - static std::string join(const std::vector &strings, const std::string &delim); - static std::vector split(const std::string &string, char delim); + static std::string join(const QStringList &strings, const std::string &delim); + static QStringList split(const std::string &string, char delim); TreeNode orExpression(); TreeNode andExpression(); From 255e51da865f6b83e77248a44e0804fac9582b88 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Fri, 8 Jan 2021 16:59:17 +0100 Subject: [PATCH 11/32] Fix binding values to search query This was broken while doing a rebase --- YACReaderLibrary/db/folder_model.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index 42f13aad..f454e599 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -717,17 +717,17 @@ void FolderModelProxy::setupFilteredModelData() QLOG_ERROR() << "not implemented"; break; } + + selectQuery.prepare(queryString.c_str()); + result.bindValues(selectQuery); + + selectQuery.exec(); + QLOG_DEBUG() << selectQuery.lastError() << "--"; + + setupFilteredModelData(selectQuery, rootItem); } catch (const std::exception &e) { QLOG_ERROR() << "Unable to parse query: " << e.what(); } - - selectQuery.prepare(queryString.c_str()); - selectQuery.bindValues(selectQuery); - - selectQuery.exec(); - QLOG_DEBUG() << selectQuery.lastError() << "--"; - - setupFilteredModelData(selectQuery, rootItem); } QSqlDatabase::removeDatabase(db.connectionName()); From 6438c9210f2103c7a9b8e9b297bf913fe0197689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Tue, 12 Jan 2021 18:41:09 +0100 Subject: [PATCH 12/32] Move query execution and model setup to the right scope --- YACReaderLibrary/db/comic_model.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/YACReaderLibrary/db/comic_model.cpp b/YACReaderLibrary/db/comic_model.cpp index d41b7806..59fa9887 100644 --- a/YACReaderLibrary/db/comic_model.cpp +++ b/YACReaderLibrary/db/comic_model.cpp @@ -637,6 +637,10 @@ void ComicModel::setupModelData(const SearchModifiers modifier, const QString &f selectQuery.prepare(queryString.c_str()); selectQuery.bindValue(":limit", 500); //TODO, load this value from settings result.bindValues(selectQuery); + + selectQuery.exec(); + + setupModelData(selectQuery); } catch (const std::exception &e) { QLOG_ERROR() << "Unable to parse query: " << e.what(); } From 5037f3ac9259d7b7b63f86a0e23c69c4e470d4ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Tue, 12 Jan 2021 18:41:57 +0100 Subject: [PATCH 13/32] Fix data base removal in FolderModel --- YACReaderLibrary/db/folder_model.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index f454e599..1944e1e1 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -729,10 +729,13 @@ void FolderModelProxy::setupFilteredModelData() QLOG_ERROR() << "Unable to parse query: " << e.what(); } } - QSqlDatabase::removeDatabase(db.connectionName()); + + connectionName = db.connectionName(); endResetModel(); } + + QSqlDatabase::removeDatabase(connectionName); } void FolderModelProxy::clear() From a777aa3fe8941f63c27fb6ea944874a8bb5fdcba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Tue, 12 Jan 2021 18:56:59 +0100 Subject: [PATCH 14/32] Replace lexertl with a custom lexeter implementation QueryLexeter does not parse "atWord" because I couldn't find what it is used for. --- YACReaderLibrary/YACReaderLibrary.pro | 39 +- YACReaderLibrary/db/query_lexer.cpp | 94 + YACReaderLibrary/db/query_lexer.h | 59 + YACReaderLibrary/db/query_parser.cpp | 55 +- YACReaderLibrary/db/query_parser.h | 24 +- YACReaderLibrary/lexertl/char_traits.hpp | 45 - YACReaderLibrary/lexertl/debug.hpp | 311 -- YACReaderLibrary/lexertl/dot.hpp | 293 -- YACReaderLibrary/lexertl/enums.hpp | 25 - YACReaderLibrary/lexertl/generate_cpp.hpp | 1123 ------ YACReaderLibrary/lexertl/generator.hpp | 738 ---- YACReaderLibrary/lexertl/internals.hpp | 75 - YACReaderLibrary/lexertl/iterator.hpp | 135 - YACReaderLibrary/lexertl/licence_1_0.txt | 24 - YACReaderLibrary/lexertl/lookup.hpp | 491 --- YACReaderLibrary/lexertl/match_results.hpp | 171 - YACReaderLibrary/lexertl/memory_file.hpp | 138 - YACReaderLibrary/lexertl/narrow.hpp | 25 - YACReaderLibrary/lexertl/observer_ptr.hpp | 16 - YACReaderLibrary/lexertl/parser/parser.hpp | 926 ----- .../lexertl/parser/tokeniser/re_token.hpp | 100 - .../lexertl/parser/tokeniser/re_tokeniser.hpp | 778 ---- .../parser/tokeniser/re_tokeniser_helper.hpp | 3157 ----------------- .../parser/tokeniser/re_tokeniser_state.hpp | 136 - .../lexertl/parser/tree/end_node.hpp | 111 - .../lexertl/parser/tree/iteration_node.hpp | 96 - .../lexertl/parser/tree/leaf_node.hpp | 110 - YACReaderLibrary/lexertl/parser/tree/node.hpp | 242 -- .../lexertl/parser/tree/selection_node.hpp | 104 - .../lexertl/parser/tree/sequence_node.hpp | 121 - .../lexertl/partition/charset.hpp | 72 - .../lexertl/partition/equivset.hpp | 135 - YACReaderLibrary/lexertl/rules.hpp | 1018 ------ YACReaderLibrary/lexertl/runtime_error.hpp | 23 - YACReaderLibrary/lexertl/serialise.hpp | 28 - YACReaderLibrary/lexertl/sm_to_csm.hpp | 53 - YACReaderLibrary/lexertl/sm_traits.hpp | 44 - YACReaderLibrary/lexertl/state_machine.hpp | 521 --- .../lexertl/stream_shared_iterator.hpp | 352 -- YACReaderLibrary/lexertl/string_token.hpp | 439 --- YACReaderLibrary/lexertl/utf_iterators.hpp | 508 --- 41 files changed, 187 insertions(+), 12768 deletions(-) create mode 100644 YACReaderLibrary/db/query_lexer.cpp create mode 100644 YACReaderLibrary/db/query_lexer.h delete mode 100644 YACReaderLibrary/lexertl/char_traits.hpp delete mode 100644 YACReaderLibrary/lexertl/debug.hpp delete mode 100644 YACReaderLibrary/lexertl/dot.hpp delete mode 100644 YACReaderLibrary/lexertl/enums.hpp delete mode 100644 YACReaderLibrary/lexertl/generate_cpp.hpp delete mode 100644 YACReaderLibrary/lexertl/generator.hpp delete mode 100644 YACReaderLibrary/lexertl/internals.hpp delete mode 100644 YACReaderLibrary/lexertl/iterator.hpp delete mode 100644 YACReaderLibrary/lexertl/licence_1_0.txt delete mode 100644 YACReaderLibrary/lexertl/lookup.hpp delete mode 100644 YACReaderLibrary/lexertl/match_results.hpp delete mode 100644 YACReaderLibrary/lexertl/memory_file.hpp delete mode 100644 YACReaderLibrary/lexertl/narrow.hpp delete mode 100644 YACReaderLibrary/lexertl/observer_ptr.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/parser.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tree/end_node.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tree/node.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tree/selection_node.hpp delete mode 100644 YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp delete mode 100644 YACReaderLibrary/lexertl/partition/charset.hpp delete mode 100644 YACReaderLibrary/lexertl/partition/equivset.hpp delete mode 100644 YACReaderLibrary/lexertl/rules.hpp delete mode 100644 YACReaderLibrary/lexertl/runtime_error.hpp delete mode 100644 YACReaderLibrary/lexertl/serialise.hpp delete mode 100644 YACReaderLibrary/lexertl/sm_to_csm.hpp delete mode 100644 YACReaderLibrary/lexertl/sm_traits.hpp delete mode 100644 YACReaderLibrary/lexertl/state_machine.hpp delete mode 100644 YACReaderLibrary/lexertl/stream_shared_iterator.hpp delete mode 100644 YACReaderLibrary/lexertl/string_token.hpp delete mode 100644 YACReaderLibrary/lexertl/utf_iterators.hpp diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index e6d86252..156056df 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -77,6 +77,7 @@ QT += sql network widgets script # Input HEADERS += comic_flow.h \ create_library_dialog.h \ + db/query_lexer.h \ library_creator.h \ library_window.h \ add_library_dialog.h \ @@ -144,42 +145,7 @@ HEADERS += comic_flow.h \ yacreader_comic_info_helper.h \ db/reading_list.h \ db/query_parser.h \ - current_comic_view_helper.h \ - lexertl/parser/tokeniser/re_token.hpp \ - lexertl/parser/tokeniser/re_tokeniser.hpp \ - lexertl/parser/tokeniser/re_tokeniser_helper.hpp \ - lexertl/parser/tokeniser/re_tokeniser_state.hpp \ - lexertl/parser/tree/end_node.hpp \ - lexertl/parser/tree/iteration_node.hpp \ - lexertl/parser/tree/leaf_node.hpp \ - lexertl/parser/tree/node.hpp \ - lexertl/parser/tree/selection_node.hpp \ - lexertl/parser/tree/sequence_node.hpp \ - lexertl/parser/parser.hpp \ - lexertl/partition/charset.hpp \ - lexertl/partition/equivset.hpp \ - lexertl/char_traits.hpp \ - lexertl/debug.hpp \ - lexertl/dot.hpp \ - lexertl/enums.hpp \ - lexertl/generate_cpp.hpp \ - lexertl/generator.hpp \ - lexertl/internals.hpp \ - lexertl/iterator.hpp \ - lexertl/lookup.hpp \ - lexertl/match_results.hpp \ - lexertl/memory_file.hpp \ - lexertl/narrow.hpp \ - lexertl/observer_ptr.hpp \ - lexertl/rules.hpp \ - lexertl/runtime_error.hpp \ - lexertl/serialise.hpp \ - lexertl/sm_to_csm.hpp \ - lexertl/sm_traits.hpp \ - lexertl/state_machine.hpp \ - lexertl/stream_shared_iterator.hpp \ - lexertl/string_token.hpp \ - lexertl/utf_iterators.hpp + current_comic_view_helper.h !CONFIG(no_opengl) { HEADERS += ../common/gl/yacreader_flow_gl.h @@ -187,6 +153,7 @@ HEADERS += comic_flow.h \ SOURCES += comic_flow.cpp \ create_library_dialog.cpp \ + db/query_lexer.cpp \ library_creator.cpp \ library_window.cpp \ main.cpp \ diff --git a/YACReaderLibrary/db/query_lexer.cpp b/YACReaderLibrary/db/query_lexer.cpp new file mode 100644 index 00000000..27944ff4 --- /dev/null +++ b/YACReaderLibrary/db/query_lexer.cpp @@ -0,0 +1,94 @@ +#include "query_lexer.h" + +QueryLexer::QueryLexer(const std::string &input) + : input(input) +{ +} + +Token QueryLexer::next() +{ + switch (peek()) { + case '\0': + return Token(Token::Type::eof); + case '(': + case ')': + return single(Token::Type::opcode); + case ' ': + case '\t': + case '\r': + case '\n': + return space(); + case '"': + return quotedWord(); + default: + return word(); + } +} + +char QueryLexer::peek() +{ + return input[index]; +} + +char QueryLexer::get() +{ + return input[index++]; +} + +Token QueryLexer::single(Token::Type type) +{ + return Token(type, input.substr(index++, 1)); +} + +Token QueryLexer::space() +{ + auto start = index; + get(); + while (isSpace(peek())) + get(); + return Token(Token::Type::space, input.substr(start, index - start)); +} + +Token QueryLexer::word() +{ + auto start = index; + get(); + auto current = peek(); + while (current != '\0' && !isSpace(current) && current != '"' && current != '(' && current != ')') { + get(); + current = peek(); + } + return Token(Token::Type::word, input.substr(start, index - start)); +} + +Token QueryLexer::quotedWord() +{ + auto start = index; + get(); + auto current = peek(); + while (current != '\0' && current != '"') { + get(); + current = peek(); + } + + if (current == '"') { + get(); + return Token(Token::Type::quotedWord, input.substr(start, index - start)); + } + + //This should be a lexical error, but the grammar doesn't support it + return Token(Token::Type::eof); +} + +bool QueryLexer::isSpace(char c) +{ + switch (c) { + case ' ': + case '\t': + case '\r': + case '\n': + return true; + default: + return false; + } +} diff --git a/YACReaderLibrary/db/query_lexer.h b/YACReaderLibrary/db/query_lexer.h new file mode 100644 index 00000000..b2c892f6 --- /dev/null +++ b/YACReaderLibrary/db/query_lexer.h @@ -0,0 +1,59 @@ +#ifndef QUERY_LEXER_H +#define QUERY_LEXER_H + +#include + +class Token +{ +public: + enum class Type { + eof, + opcode, + atWord, + word, + quotedWord, + space + }; + + Token(Type type, std::string lexeme = "") + : _type(type), _lexeme(std::move(lexeme)) + { + } + + Type type() const + { + return _type; + } + + std::string lexeme() const + { + return _lexeme; + } + +private: + Type _type {}; + std::string _lexeme {}; +}; + +class QueryLexer +{ +public: + QueryLexer(const std::string &input); + Token next(); + +private: + std::string input; + int index = 0; + + char peek(); + char get(); + + Token single(Token::Type type); + Token space(); + Token word(); + Token quotedWord(); + + bool isSpace(char c); +}; + +#endif // QUERY_LEXER_H diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index f08d0872..de9dc39e 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -68,21 +68,14 @@ int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) } QueryParser::QueryParser() - : lexScanner(0) { - - lexScanner.push("[()]", static_cast::type>(TokenType::opcode)); - lexScanner.push("@[^:]+:[^\\\")\\s]+", static_cast::type>(TokenType::atWord)); - lexScanner.push("[^\\\"()\\s]+", static_cast::type>(TokenType::word)); - lexScanner.push("\\\".*?\\\"", static_cast::type>(TokenType::quotedWord)); - lexScanner.push("\\s+", static_cast::type>(TokenType::space)); - - lexertl::generator::build(lexScanner, sm); } QueryParser::TreeNode QueryParser::parse(const std::string &expr) { - tokenize(expr); + lexer = QueryLexer(expr); + advance(); + auto prog = orExpression(); if (!isEof()) { @@ -104,7 +97,10 @@ std::string QueryParser::token(bool advance) if (isEof()) { return ""; } - auto res = (tokenType() == TokenType::quotedWord) ? iter->substr(1, 1) : iter->str(); + + auto lexeme = currentToken.lexeme(); + + auto res = (tokenType() == Token::Type::quotedWord) ? currentToken.lexeme().substr(1, lexeme.size() - 2) : lexeme; //TODO process quotedWordDiferently? if (advance) { this->advance(); } @@ -116,30 +112,32 @@ std::string QueryParser::lcaseToken(bool advance) if (isEof()) { return ""; } - auto res = (tokenType() == TokenType::quotedWord) ? iter->substr(1, 1) : iter->str(); + + auto lexeme = currentToken.lexeme(); + + auto res = (tokenType() == Token::Type::quotedWord) ? currentToken.lexeme().substr(1, lexeme.size() - 2) : lexeme; + if (advance) { this->advance(); } return toLower(res); } -QueryParser::TokenType QueryParser::tokenType() +Token::Type QueryParser::tokenType() { - if (isEof()) { - return TokenType::eof; - } - return TokenType(iter->id); + return currentToken.type(); } bool QueryParser::isEof() const { - return iter == end; + return currentToken.type() == Token::Type::eof; } void QueryParser::advance() { - ++iter; - if (tokenType() == TokenType::space) + currentToken = lexer.next(); + + if (tokenType() == Token::Type::space) advance(); } @@ -154,11 +152,6 @@ QueryParser::FieldType QueryParser::fieldType(const std::string &str) return FieldType::unknown; } -void QueryParser::tokenize(const std::string &expr) -{ - iter = lexertl::siterator(expr.begin(), expr.end(), sm); -} - std::string QueryParser::join(const QStringList &strings, const std::string &delim) { return std::accumulate(strings.begin(), strings.end(), std::string(), @@ -191,7 +184,7 @@ QueryParser::TreeNode QueryParser::andExpression() return { "and", { lhs, andExpression() } }; } - if ((isIn(tokenType(), { TokenType::atWord, TokenType::word, TokenType::quotedWord }) || token() == "(") && lcaseToken() != "or") { + if ((isIn(tokenType(), { Token::Type::atWord, Token::Type::word, Token::Type::quotedWord }) || token() == "(") && lcaseToken() != "or") { return { "and", { lhs, andExpression() } }; } @@ -209,15 +202,15 @@ QueryParser::TreeNode QueryParser::notExpression() QueryParser::TreeNode QueryParser::locationExpression() { - if (tokenType() == TokenType::opcode && token() == "(") { + if (tokenType() == Token::Type::opcode && token() == "(") { advance(); auto res = orExpression(); - if (tokenType() != TokenType::opcode || token(true) != ")") { + if (tokenType() != Token::Type::opcode || token(true) != ")") { throw std::invalid_argument("missing )'"); } return res; } - if (!isIn(tokenType(), { TokenType::atWord, TokenType::word, TokenType::quotedWord })) { + if (!isIn(tokenType(), { Token::Type::atWord, Token::Type::word, Token::Type::quotedWord })) { throw std::invalid_argument("Invalid syntax. Expected a lookup name or a word"); } return baseToken(); @@ -225,7 +218,7 @@ QueryParser::TreeNode QueryParser::locationExpression() QueryParser::TreeNode QueryParser::baseToken() { - if (tokenType() == TokenType::quotedWord) { + if (tokenType() == Token::Type::quotedWord) { return { "token", { { "all", {} }, { token(true), {} } } }; } @@ -234,7 +227,7 @@ QueryParser::TreeNode QueryParser::baseToken() if (words.size() > 1 && fieldType(words[0].toStdString()) != FieldType::unknown) { auto loc(toLower(words[0].toStdString())); words.erase(words.begin()); - if (words.size() == 1 && tokenType() == TokenType::quotedWord) { + if (words.size() == 1 && tokenType() == Token::Type::quotedWord) { return { "token", { { loc, {} }, { token(true), {} } } }; } return { "token", { { loc, {} }, { join(words, ":"), {} } } }; diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index f23a7470..95fd48c8 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -1,19 +1,19 @@ #ifndef QUERY_PARSER_H #define QUERY_PARSER_H -#include "lexertl/generator.hpp" -#include "lexertl/iterator.hpp" +#include "query_lexer.h" #include #include #include #include +#include /** * This class is used to generate an SQL query string from a search expression, * with a syntax very similar to that used by the Google search engine. * - * The code herin is based upon the SearchQueryParser python class written by + * The code herein is based upon the SearchQueryParser python class written by * Kovid Goyal as part of the Calibre eBook manager (https://calibre-ebook.com) * * Grammar: @@ -41,13 +41,6 @@ class QueryParser { public: - enum class TokenType { eof, - opcode, - atWord, - word, - quotedWord, - space }; - struct TreeNode { std::string t; std::vector children; @@ -64,10 +57,13 @@ private: std::string token(bool advance = false); std::string lcaseToken(bool advance = false); - TokenType tokenType(); + Token::Type tokenType(); bool isEof() const; void advance(); + QueryLexer lexer = QueryLexer(""); + Token currentToken = Token(Token::Type::eof); + template static bool isIn(const T &e, const std::list &v) { @@ -83,7 +79,6 @@ private: filename }; static FieldType fieldType(const std::string &str); - void tokenize(const std::string &expr); static std::string join(const QStringList &strings, const std::string &delim); static QStringList split(const std::string &string, char delim); @@ -93,11 +88,6 @@ private: TreeNode locationExpression(); TreeNode baseToken(); - lexertl::rules lexScanner; - lexertl::state_machine sm; - lexertl::siterator iter; - const lexertl::siterator end; - static const std::map> fieldNames; }; diff --git a/YACReaderLibrary/lexertl/char_traits.hpp b/YACReaderLibrary/lexertl/char_traits.hpp deleted file mode 100644 index e06f399a..00000000 --- a/YACReaderLibrary/lexertl/char_traits.hpp +++ /dev/null @@ -1,45 +0,0 @@ -// char_traits.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_CHAR_TRAITS_HPP -#define LEXERTL_CHAR_TRAITS_HPP - -#include - -namespace lexertl -{ -template -struct basic_char_traits -{ - using char_type = ch_type; - using index_type = ch_type; - - static index_type max_val() - { - const std::uint32_t max_ = 0x10ffff; - - return sizeof(char_type) > 2 ? - max_ : (max_ & 0xffff); - } -}; - -template<> -struct basic_char_traits -{ - using char_type = char; - using index_type = unsigned char; - - static index_type max_val() - { - // Prevent annoying warning (VC++) - index_type zero_ = 0; - - return ~zero_; - } -}; -} - -#endif diff --git a/YACReaderLibrary/lexertl/debug.hpp b/YACReaderLibrary/lexertl/debug.hpp deleted file mode 100644 index 1405f386..00000000 --- a/YACReaderLibrary/lexertl/debug.hpp +++ /dev/null @@ -1,311 +0,0 @@ -// debug.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_DEBUG_HPP -#define LEXERTL_DEBUG_HPP - -#include -#include -#include "rules.hpp" -#include "sm_to_csm.hpp" -#include "state_machine.hpp" -#include "string_token.hpp" -#include - -namespace lexertl -{ -template -class basic_debug -{ -public: - using char_state_machine = - basic_char_state_machine; - using ostream = std::basic_ostream; - using rules = basic_rules; - using string = std::basic_string; - - static void dump(const sm &sm_, rules &rules_, ostream &stream_) - { - char_state_machine csm_; - - sm_to_csm(sm_, csm_); - dump(csm_, rules_, stream_); - } - - static void dump(const sm &sm_, ostream &stream_) - { - char_state_machine csm_; - - sm_to_csm(sm_, csm_); - dump(csm_, stream_); - } - - static void dump(const char_state_machine &csm_, rules &rules_, - ostream &stream_) - { - for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) - { - lexer_state(stream_); - stream_ << rules_.state(dfa_) << std::endl << std::endl; - - dump_ex(csm_._sm_vector[dfa_], stream_); - } - } - - static void dump(const char_state_machine &csm_, ostream &stream_) - { - for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) - { - lexer_state(stream_); - stream_ << dfa_ << std::endl << std::endl; - - dump_ex(csm_._sm_vector[dfa_], stream_); - } - } - -protected: - using dfa_state = typename char_state_machine::state; - using string_token = typename dfa_state::string_token; - using stringstream = std::basic_stringstream; - - static void dump_ex(const typename char_state_machine::dfa &dfa_, - ostream &stream_) - { - const std::size_t states_ = dfa_._states.size(); - const id_type bol_index_ = dfa_._bol_index; - - for (std::size_t i_ = 0; i_ < states_; ++i_) - { - const dfa_state &state_ = dfa_._states[i_]; - - state(stream_); - stream_ << i_ << std::endl; - - if (state_._end_state) - { - end_state(stream_); - - if (state_._push_pop_dfa == dfa_state::push_dfa) - { - push(stream_); - stream_ << state_._push_dfa; - } - else if (state_._push_pop_dfa == dfa_state::pop_dfa) - { - pop(stream_); - } - - id(stream_); - stream_ << static_cast(state_._id); - user_id(stream_); - stream_ << static_cast(state_._user_id); - dfa(stream_); - stream_ << static_cast(state_._next_dfa); - stream_ << std::endl; - } - - if (i_ == 0 && bol_index_ != char_state_machine::npos()) - { - bol(stream_); - stream_ << static_cast(bol_index_) << std::endl; - } - - if (state_._eol_index != char_state_machine::npos()) - { - eol(stream_); - stream_ << static_cast(state_._eol_index) << - std::endl; - } - - for (const auto &tran_ : state_._transitions) - { - string_token token_ = tran_.second; - - open_bracket(stream_); - - if (!tran_.second.any() && tran_.second.negatable()) - { - token_.negate(); - negated(stream_); - } - - string chars_; - - for (const auto &range_ : token_._ranges) - { - if (range_.first == '-' || range_.first == '^' || - range_.first == ']') - { - stream_ << '\\'; - } - - chars_ = string_token::escape_char - (range_.first); - - if (range_.first != range_.second) - { - if (range_.first + 1 < range_.second) - { - chars_ += '-'; - } - - if (range_.second == '-' || range_.second == '^' || - range_.second == ']') - { - stream_ << '\\'; - } - - chars_ += string_token::escape_char(range_.second); - } - - stream_ << chars_; - } - - close_bracket(stream_); - stream_ << static_cast(tran_.first) << - std::endl; - } - - stream_ << std::endl; - } - } - - static void lexer_state(std::ostream &stream_) - { - stream_ << "Lexer state: "; - } - - static void lexer_state(std::wostream &stream_) - { - stream_ << L"Lexer state: "; - } - - static void state(std::ostream &stream_) - { - stream_ << "State: "; - } - - static void state(std::wostream &stream_) - { - stream_ << L"State: "; - } - - static void bol(std::ostream &stream_) - { - stream_ << " BOL -> "; - } - - static void bol(std::wostream &stream_) - { - stream_ << L" BOL -> "; - } - - static void eol(std::ostream &stream_) - { - stream_ << " EOL -> "; - } - - static void eol(std::wostream &stream_) - { - stream_ << L" EOL -> "; - } - - static void end_state(std::ostream &stream_) - { - stream_ << " END STATE"; - } - - static void end_state(std::wostream &stream_) - { - stream_ << L" END STATE"; - } - - static void id(std::ostream &stream_) - { - stream_ << ", Id = "; - } - - static void id(std::wostream &stream_) - { - stream_ << L", Id = "; - } - - static void push(std::ostream &stream_) - { - stream_ << ", PUSH "; - } - - static void push(std::wostream &stream_) - { - stream_ << L", PUSH "; - } - - static void pop(std::ostream &stream_) - { - stream_ << ", POP"; - } - - static void pop(std::wostream &stream_) - { - stream_ << L", POP"; - } - - static void user_id(std::ostream &stream_) - { - stream_ << ", User Id = "; - } - - static void user_id(std::wostream &stream_) - { - stream_ << L", User Id = "; - } - - static void open_bracket(std::ostream &stream_) - { - stream_ << " ["; - } - - static void open_bracket(std::wostream &stream_) - { - stream_ << L" ["; - } - - static void negated(std::ostream &stream_) - { - stream_ << "^"; - } - - static void negated(std::wostream &stream_) - { - stream_ << L"^"; - } - - static void close_bracket(std::ostream &stream_) - { - stream_ << "] -> "; - } - - static void close_bracket(std::wostream &stream_) - { - stream_ << L"] -> "; - } - - static void dfa(std::ostream &stream_) - { - stream_ << ", dfa = "; - } - - static void dfa(std::wostream &stream_) - { - stream_ << L", dfa = "; - } -}; - -using debug = basic_debug; -using wdebug = basic_debug; -} - -#endif diff --git a/YACReaderLibrary/lexertl/dot.hpp b/YACReaderLibrary/lexertl/dot.hpp deleted file mode 100644 index cda4d6ac..00000000 --- a/YACReaderLibrary/lexertl/dot.hpp +++ /dev/null @@ -1,293 +0,0 @@ -// dot.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// Copyright (c) 2013 Autodesk, Inc. All rights reserved. -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_DOT_HPP -#define LEXERTL_DOT_HPP - -#include -#include "rules.hpp" -#include "state_machine.hpp" -#include "sm_to_csm.hpp" - -namespace lexertl -{ -//! The class template basic_dot contains utility functions used to -//! dump a description of a finite state machine formatted in the -//! DOT language (http://www.graphviz.org/doc/info/lang.html). The -//! resulting directed graph can previewed by opening the ".dot" file -//! into the GraphViz application (http://www.graphviz.org). -template -class basic_dot -{ -public: - using char_state_machine = - basic_char_state_machine; - using rules = basic_rules; - using ostream = std::basic_ostream; - using string = std::basic_string; - - //! Dumps a description of the finite state machine expressed in - //! the DOT language to the given output stream. - static void dump(const sm &sm_, rules &rules_, ostream &stream_) - { - char_state_machine csm_; - - sm_to_csm(sm_, csm_); - dump(csm_, rules_, stream_); - } - - //! Dumps a description of the finite state machine expressed in - //! the DOT language to the given output stream. - static void dump(const char_state_machine &csm_, rules &rules_, - ostream &stream_) - { - header(stream_); - for (std::size_t dfa_ = 0, dfas_ = csm_.size(); dfa_ < dfas_; ++dfa_) - { - dump_ex(dfa_, csm_._sm_vector[dfa_], rules_, stream_); - } - trailer(stream_); - } - -protected: - using dfa_state = typename char_state_machine::state; - using string_token = typename dfa_state::string_token; - using stringstream = std::basic_stringstream; - - // Naming of nodes used in the DOT diagram. The naming is of the - // form: L_S. - static string node_name(id_type dfa_id_, id_type state_id_) - { - stringstream namestream_; - namestream_ << "L" << dfa_id_ << "_S" << state_id_; - return namestream_.str(); - } - - // Escape control characters twice. This is necessary when - // expressing character sets attached as to DOT nodes as - // labels. - static string double_escape_char(const id_type ch_) - { - stringstream out_; - - switch (ch_) - { - case '\0': - out_ << '\\'; - out_ << '\\'; - out_ << '0'; - break; - case '\a': - out_ << '\\'; - out_ << '\\'; - out_ << 'a'; - break; - case '\b': - out_ << '\\'; - out_ << '\\'; - out_ << 'b'; - break; - case '\f': - out_ << '\\'; - out_ << '\\'; - out_ << 'f'; - break; - case '\n': - out_ << '\\'; - out_ << '\\'; - out_ << 'n'; - break; - case '\r': - out_ << '\\'; - out_ << '\\'; - out_ << 'r'; - break; - case '\t': - out_ << '\\'; - out_ << '\\'; - out_ << 't'; - break; - case '\v': - out_ << '\\'; - out_ << '\\'; - out_ << 'v'; - break; - case '\\': - out_ << '\\'; - out_ << '\\'; - break; - case '"': - out_ << '\\'; - out_ << '\\'; - out_ << '"'; - break; - case '\'': - out_ << '\\'; - out_ << '\\'; - out_ << '\''; - break; - default: - { - if (ch_ < 32 || ch_ > 126) - { - out_ << '\\'; - out_ << 'x'; - out_ << std::hex << - static_cast(ch_); - } - else - { - out_ << char_type(ch_); - } - - break; - } - } - - return out_.str(); - } - - // Internal function actually performing the work of dumping the - // state machine in DOT. - static void dump_ex(id_type dfa_id_, - const typename char_state_machine::dfa &dfa_, - rules &rules_, - ostream &stream_) - { - const std::size_t states_ = dfa_._states.size(); - typename dfa_state::id_type_string_token_map::const_iterator iter_; - typename dfa_state::id_type_string_token_map::const_iterator end_; - - stream_ << std::endl; - - for (std::size_t i_ = 0; i_ < states_; ++i_) - { - const dfa_state &state_ = dfa_._states[i_]; - - const string name = node_name(dfa_id_, i_); - if (i_ == 0) - { - stream_ << " " << name << " [shape = doublecircle, xlabel=\"" - << rules_.state(dfa_id_) << "\"];" << std::endl; - } - else if (state_._end_state) - { - stream_ << " " << name << - " [shape = doublecircle, xlabel=\"id =" << - static_cast(state_._id) << "\"];" << - std::endl; - } - else { - stream_ << " " << name << " [shape = circle];" << std::endl; - } - } - - stream_ << std::endl; - - for (std::size_t i_ = 0; i_ < states_; ++i_) - { - const dfa_state &state_ = dfa_._states[i_]; - - iter_ = state_._transitions.begin(); - end_ = state_._transitions.end(); - - const string src_name = node_name(dfa_id_, i_); - - for (; iter_ != end_; ++iter_) - { - const string dst_name = node_name(dfa_id_, iter_->first); - stream_ << " " << src_name << " -> " << dst_name << - " [label = \""; - - string_token token_ = iter_->second; - - open_bracket(stream_); - - if (!iter_->second.any() && iter_->second.negatable()) - { - token_.negate(); - negated(stream_); - } - - string chars_; - auto ranges_iter_ = token_._ranges.cbegin(); - auto ranges_end_ = token_._ranges.cend(); - - for (; ranges_iter_ != ranges_end_; ++ranges_iter_) - { - if (ranges_iter_->first == '^' || - ranges_iter_->first == ']') - { - stream_ << "\\\\"; - } - - chars_ = double_escape_char(ranges_iter_->first); - - if (ranges_iter_->first != ranges_iter_->second) - { - if (ranges_iter_->first + 1 < ranges_iter_->second) - { - chars_ += '-'; - } - - if (ranges_iter_->second == '^' || - ranges_iter_->second == ']') - { - stream_ << "\\\\"; - } - - chars_ += double_escape_char(ranges_iter_->second); - } - - stream_ << chars_; - } - - close_bracket(stream_); - stream_ << "\"];" << std::endl; - } - - if (state_._end_state) { - const string dst_name = node_name(state_._next_dfa, 0); - stream_ << " " << src_name << " -> " << dst_name - << " [style = \"dashed\"];" << std::endl; - } - } - } - - static void header(ostream &stream_) - { - stream_ << "digraph DFAs {" << std::endl; - stream_ << " rankdir = LR;" << std::endl; - } - - static void trailer(ostream &stream_) - { - stream_ << "}" << std::endl; - } - - static void open_bracket(ostream &stream_) - { - stream_ << "["; - } - - static void negated(ostream &stream_) - { - stream_ << "^"; - } - - static void close_bracket(ostream &stream_) - { - stream_ << "]"; - } - -}; - -using dot = basic_dot, char>; -using wdot = basic_dot, wchar_t>; -} - -#endif diff --git a/YACReaderLibrary/lexertl/enums.hpp b/YACReaderLibrary/lexertl/enums.hpp deleted file mode 100644 index 31a6a969..00000000 --- a/YACReaderLibrary/lexertl/enums.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// enums.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_ENUMS_HPP -#define LEXERTL_ENUMS_HPP - -namespace lexertl -{ - enum regex_flags {icase = 1, dot_not_newline = 2, dot_not_cr_lf = 4, - skip_ws = 8, match_zero_len = 16}; - // 0 = end state, 1 = id, 2 = user id, 3 = push_dfa_index - // 4 = next dfa, 5 = dead state, 6 = dfa_start - enum {end_state_index, id_index, user_id_index, push_dfa_index, - next_dfa_index, eol_index, dead_state_index, transitions_index}; - // Rule flags: - enum feature_flags {bol_bit = 1, eol_bit = 2, skip_bit = 4, again_bit = 8, - multi_state_bit = 16, recursive_bit = 32, advance_bit = 64}; - // End state flags: - enum {end_state_bit = 1, pop_dfa_bit = 2}; -} - -#endif diff --git a/YACReaderLibrary/lexertl/generate_cpp.hpp b/YACReaderLibrary/lexertl/generate_cpp.hpp deleted file mode 100644 index 3e6b28a6..00000000 --- a/YACReaderLibrary/lexertl/generate_cpp.hpp +++ /dev/null @@ -1,1123 +0,0 @@ -// generate_cpp.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_GENERATE_CPP_HPP -#define LEXERTL_GENERATE_CPP_HPP - -#include "enums.hpp" -#include -#include "state_machine.hpp" - -namespace lexertl -{ -class table_based_cpp -{ -public: - template - static void generate_cpp - (const std::string &name_, - const basic_state_machine &sm_, - const bool pointers_, std::ostream &os_) - { - using sm = basic_state_machine; - using internals = typename sm::internals; - const internals &internals_ = sm_.data(); - std::size_t additional_tabs_ = 0; - - os_ << "template\n"; - os_ << "void " << name_ << " (lexertl::"; - - if (internals_._features & recursive_bit) - { - os_ << "recursive_match_results"; - } - else - { - os_ << "match_results"; - } - - os_ << " &results_)\n"; - os_ << "{\n"; - os_ << " using results = lexertl::"; - - if (internals_._features & recursive_bit) - { - os_ << "recursive_match_results"; - } - else - { - os_ << "match_results"; - } - - os_ << ";\n"; - os_ << " using char_type = typename results::char_type;\n"; - os_ << " typename results::iter_type end_token_ = " - "results_.second;\n"; - - if (internals_._features & skip_bit) - { - os_ << "skip:\n"; - } - - os_ << " typename results::iter_type curr_ = results_.second;\n\n"; - os_ << " results_.first = curr_;\n\n"; - - if (internals_._features & again_bit) - { - os_ << "again:\n"; - } - - os_ << " if (curr_ == results_.eoi)\n"; - os_ << " {\n"; - // We want a number regardless of id_type. - os_ << " results_.id = " << static_cast - (internals_._eoi) << ";\n"; - os_ << " results_.user_id = results::npos();\n"; - os_ << " return;\n"; - os_ << " }\n\n"; - - if (internals_._features & bol_bit) - { - os_ << " bool bol_ = results_.bol;\n"; - } - - dump_tables(sm_, 1, pointers_, os_); - - if (internals_._dfa.size() > 1) - { - os_ << " const id_type *lookup_ = lookups_[results_.state];\n"; - os_ << " const id_type dfa_alphabet_ = dfa_alphabets_" - "[results_.state];\n"; - os_ << " const "; - - if (pointers_) - { - os_ << "void * const"; - } - else - { - os_ << "id_type"; - } - - os_ << " *dfa_ = dfas_[results_.state];\n"; - } - - os_ << " const "; - - if (pointers_) - { - os_ << "void * const"; - } - else - { - os_ << "id_type"; - } - - os_ << " *ptr_ = dfa_ + dfa_alphabet_;\n"; - os_ << " bool end_state_ = *ptr_ != 0;\n"; - - if (internals_._features & recursive_bit) - { - os_ << " bool pop_ = ("; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*ptr_"; - - if (pointers_) - { - os_ << ')'; - } - - os_ <<" & " << pop_dfa_bit; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ") != 0;\n"; - } - - os_ << " id_type id_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << id_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - os_ << " id_type uid_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << user_id_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - - if (internals_._features & recursive_bit) - { - os_ << " id_type push_dfa_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << push_dfa_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - } - - if (internals_._dfa.size() > 1) - { - os_ << " id_type start_state_ = results_.state;\n"; - } - - if (internals_._features & bol_bit) - { - os_ << " bool end_bol_ = bol_;\n"; - } - - if (internals_._features & eol_bit) - { - os_ << " "; - - if (pointers_) - { - os_ << "const void * const *"; - } - else - { - os_ << "id_type "; - } - - os_ << "EOL_state_ = 0;\n"; - } - - os_ << '\n'; - - if (internals_._features & bol_bit) - { - os_ << " if (bol_)\n"; - os_ << " {\n"; - os_ << " const "; - - if (pointers_) - { - os_ << "void *"; - } - else - { - os_ << "id_type "; - } - - os_ << "state_ = *dfa_;\n\n"; - os_ << " if (state_)\n"; - os_ << " {\n"; - os_ << " ptr_ = "; - - if (pointers_) - { - os_ << "reinterpret_cast(state_);\n"; - } - else - { - os_ << "&dfa_[state_ * dfa_alphabet_];\n"; - } - - os_ << " }\n"; - os_ << " }\n\n"; - } - - os_ << " while (curr_ != results_.eoi)\n"; - os_ << " {\n"; - - if (internals_._features & eol_bit) - { - os_ << " EOL_state_ = "; - - if (pointers_) - { - os_ << "reinterpret_cast("; - } - - os_ << "ptr_[" << eol_index << ']'; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ";\n\n"; - os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; - os_ << " {\n"; - os_ << " ptr_ = "; - - if (pointers_) - { - os_ << "EOL_state_"; - } - else - { - os_ << "&dfa_[EOL_state_ * dfa_alphabet_]"; - } - - os_ << ";\n"; - os_ << " }\n"; - os_ << " else\n"; - os_ << " {\n"; - ++additional_tabs_; - } - - output_char_loop(internals_._features, additional_tabs_, pointers_, - os_, std::integral_constant 1)>()); - - if (internals_._features & eol_bit) - { - output_tabs(additional_tabs_, os_); - os_ << " }\n"; - --additional_tabs_; - } - - os_ << '\n'; - os_ << " if (*ptr_)\n"; - os_ << " {\n"; - os_ << " end_state_ = true;\n"; - - - if (internals_._features & recursive_bit) - { - os_ << " pop_ = ("; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*ptr_"; - - if (pointers_) - { - os_ << ')'; - } - - os_ <<" & " << pop_dfa_bit; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ") != 0;\n"; - } - - os_ << " id_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << id_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - os_ << " uid_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << user_id_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - - if (internals_._features & recursive_bit) - { - os_ << " push_dfa_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << push_dfa_index << ')'; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - } - - if (internals_._dfa.size() > 1) - { - os_ << " start_state_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << next_dfa_index << ')'; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - } - - if (internals_._features & bol_bit) - { - os_ << " end_bol_ = bol_;\n"; - } - - os_ << " end_token_ = curr_;\n"; - os_ << " }\n"; - os_ << " }\n\n"; - output_quit(os_, std::integral_constant 1)>()); - - if (internals_._features & eol_bit) - { - os_ << " if (curr_ == results_.eoi)\n"; - os_ << " {\n"; - os_ << " EOL_state_ = "; - - if (pointers_) - { - os_ << "reinterpret_cast("; - } - - os_ << "ptr_[" << eol_index << ']'; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ";\n"; - os_ << "\n"; - os_ << " if (EOL_state_)\n"; - os_ << " {\n"; - os_ << " ptr_ = "; - - if (pointers_) - { - os_ << "EOL_state_"; - } - else - { - os_ << "&dfa_[EOL_state_ * dfa_alphabet_]"; - } - - os_ << ";\n\n"; - os_ << " if (*ptr_)\n"; - os_ << " {\n"; - os_ << " end_state_ = true;\n"; - - - if (internals_._features & recursive_bit) - { - os_ << " pop_ = ("; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*ptr_"; - - if (pointers_) - { - os_ << ')'; - } - - os_ <<" & " << pop_dfa_bit; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ") != 0;\n"; - } - - os_ << " id_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << id_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - os_ << " uid_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << user_id_index << ")"; - - if (pointers_) - { - os_ << "))"; - } - - os_ <<";\n"; - - if (internals_._features & recursive_bit) - { - os_ << " push_dfa_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << push_dfa_index << ')'; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - } - - if (internals_._dfa.size() > 1) - { - os_ << " start_state_ = "; - - if (pointers_) - { - // Done this way for GCC: - os_ << "static_cast(reinterpret_cast("; - } - - os_ << "*(ptr_ + " << next_dfa_index << ')'; - - if (pointers_) - { - os_ << "))"; - } - - os_ << ";\n"; - } - - if (internals_._features & bol_bit) - { - os_ << " end_bol_ = bol_;\n"; - } - - os_ << " end_token_ = curr_;\n"; - os_ << " }\n"; - os_ << " }\n"; - os_ << " }\n\n"; - } - - os_ << " if (end_state_)\n"; - os_ << " {\n"; - os_ << " // Return longest match\n"; - - if (internals_._features & recursive_bit) - { - os_ << " if (pop_)\n"; - os_ << " {\n"; - os_ << " start_state_ = results_." - "stack.top().first;\n"; - os_ << " results_.stack.pop();\n"; - os_ << " }\n"; - os_ << " else if (push_dfa_ != results_.npos())\n"; - os_ << " {\n"; - os_ << " results_.stack.push(typename results::" - "id_type_pair\n"; - os_ << " (push_dfa_, id_));\n"; - os_ << " }\n\n"; - } - - if (internals_._dfa.size() > 1) - { - os_ << " results_.state = start_state_;\n"; - } - - if (internals_._features & bol_bit) - { - os_ << " results_.bol = end_bol_;\n"; - } - - os_ << " results_.second = end_token_;\n"; - - if (internals_._features & skip_bit) - { - // We want a number regardless of id_type. - os_ << "\n if (id_ == results_.skip()) goto skip;\n"; - } - - if (internals_._features & again_bit) - { - // We want a number regardless of id_type. - os_ << "\n if (id_ == " - << static_cast(internals_._eoi); - - if (internals_._features & recursive_bit) - { - os_ << " || (pop_ && !results_.stack.empty() &&\n"; - // We want a number regardless of id_type. - os_ << " results_.stack.top().second == " - << static_cast(internals_._eoi) << ')'; - } - - os_ << ")\n"; - os_ << " {\n"; - os_ << " curr_ = end_token_;\n"; - os_ << " goto again;\n"; - os_ << " }\n"; - } - - os_ << " }\n"; - os_ << " else\n"; - os_ << " {\n"; - os_ << " // No match causes char to be skipped\n"; - os_ << " results_.second = end_token_;\n"; - - if (internals_._features & bol_bit) - { - os_ << " results_.bol = *results_.second == '\\n';\n"; - } - - os_ << " results_.first = results_.second;\n"; - os_ << " ++results_.second;\n"; - os_ << " id_ = results::npos();\n"; - os_ << " uid_ = results::npos();\n"; - os_ << " }\n\n"; - os_ << " results_.id = id_;\n"; - os_ << " results_.user_id = uid_;\n"; - os_ << "}\n"; - } - - template - static void dump_tables - (const basic_state_machine &sm_, - const std::size_t tabs_, const bool pointers_, std::ostream &os_) - { - const typename detail::basic_internals &internals_ = - sm_.data(); - const std::size_t lookup_divisor_ = 8; - // Lookup is always 256 entries long now - const std::size_t lookup_quotient_ = 256 / lookup_divisor_; - const std::size_t dfas_ = internals_._lookup.size(); - - output_tabs(tabs_, os_); - os_ << "static const id_type lookup"; - - if (dfas_ > 1) - { - os_ << "s_[][" << 256; - } - else - { - os_ << "_["; - } - - os_ << "] = \n"; - output_tabs(tabs_ + 1, os_); - - if (dfas_ > 1) - { - os_ << '{'; - } - - for (std::size_t l_ = 0; l_ < dfas_; ++l_) - { - const id_type *ptr_ = &internals_._lookup[l_].front(); - - // We want numbers regardless of id_type. - os_ << "{0x" << std::hex << static_cast(*ptr_++); - - for (std::size_t col_ = 1; col_ < lookup_divisor_; ++col_) - { - // We want numbers regardless of id_type. - os_ << ", 0x" << std::hex << static_cast(*ptr_++); - } - - for (std::size_t row_ = 1; row_ < lookup_quotient_; ++row_) - { - os_ << ",\n"; - output_tabs(tabs_ + 1, os_); - // We want numbers regardless of id_type. - os_ << "0x" << std::hex << static_cast(*ptr_++); - - for (std::size_t col_ = 1; col_ < lookup_divisor_; ++col_) - { - // We want numbers regardless of id_type. - os_ << ", 0x" << std::hex << - static_cast(*ptr_++); - } - } - - os_ << '}'; - - if (l_ + 1 < dfas_) - { - os_ << ",\n"; - output_tabs(tabs_ + 1, os_); - } - } - - if (dfas_ > 1) - { - os_ << '}'; - } - - os_ << ";\n"; - output_tabs(tabs_, os_); - os_ << "static const id_type dfa_alphabet"; - - if (dfas_ > 1) - { - os_ << "s_[" << std::dec << dfas_ << "] = {"; - } - else - { - os_ << "_ = "; - } - - // We want numbers regardless of id_type. - os_ << "0x" << std::hex << static_cast - (internals_._dfa_alphabet[0]); - - for (std::size_t col_ = 1; col_ < dfas_; ++col_) - { - // We want numbers regardless of id_type. - os_ << ", 0x" << std::hex << static_cast(internals_. - _dfa_alphabet[col_]); - } - - if (dfas_ > 1) - { - os_ << '}'; - } - - os_ << ";\n"; - - // DFAs are usually different sizes, so dump separately - for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) - { - const id_type dfa_alphabet_ = internals_._dfa_alphabet[dfa_]; - const std::size_t rows_ = internals_._dfa[dfa_].size() / - dfa_alphabet_; - const id_type *ptr_ = &internals_._dfa[dfa_].front(); - std::string dfa_name_ = "dfa"; - - output_tabs(tabs_, os_); - os_ << "static const "; - - if (pointers_) - { - os_ << "void *"; - } - else - { - os_ << "id_type "; - } - - os_ << dfa_name_; - - if (dfas_ > 1) - { - std::ostringstream ss_; - - ss_ << dfa_; - dfa_name_ += ss_.str(); - os_ << dfa_; - } - - dfa_name_ += '_'; - os_ << "_[] = {"; - - for (std::size_t row_ = 0; row_ < rows_; ++row_) - { - dump_row(row_ == 0, ptr_, dfa_name_, dfa_alphabet_, - pointers_, os_); - - if (row_ + 1 < rows_) - { - os_ << ",\n"; - output_tabs(tabs_ + 1, os_); - } - } - - os_ << "};\n"; - } - - if (dfas_ > 1) - { - output_tabs(tabs_, os_); - os_ << "static const "; - - if (pointers_) - { - os_ << "void * const"; - } - else - { - os_ << "id_type"; - } - - os_ << " *dfas_[] = {dfa0_"; - - for (std::size_t col_ = 1; col_ < dfas_; ++col_) - { - os_ << ", dfa" << col_ << '_'; - } - - os_ << "};\n"; - } - - os_ << std::dec; - } - -protected: - template - static void dump_row(const bool first_, const id_type * &ptr_, - const std::string &dfa_name_, const id_type dfa_alphabet_, - const bool pointers_, std::ostream &os_) - { - if (pointers_) - { - bool zero_ = *ptr_ == 0; - - if (first_) - { - // We want numbers regardless of id_type. - os_ << dfa_name_ << " + 0x" << std::hex << - static_cast(*ptr_++) * dfa_alphabet_; - } - else if (!zero_) - { - os_ << "reinterpret_cast(0x" - // We want numbers regardless of id_type. - << std::hex << static_cast(*ptr_++) << ')'; - } - else - { - // We want numbers regardless of id_type. - os_ << "0x" << std::hex << static_cast(*ptr_++); - } - - for (id_type id_index_ = id_index; id_index_ < transitions_index; - ++id_index_, ++ptr_) - { - os_ << ", "; - zero_ = *ptr_ == 0; - - if (!zero_) - { - os_ << "reinterpret_cast("; - } - - // We want numbers regardless of id_type. - os_ << "0x" << std::hex << static_cast(*ptr_); - - if (!zero_) - { - os_ << ')'; - } - } - - for (id_type alphabet_ = transitions_index; - alphabet_ < dfa_alphabet_; ++alphabet_, ++ptr_) - { - // We want numbers regardless of id_type. - os_ << ", "; - - if (*ptr_ == 0) - { - os_ << 0; - } - else - { - // We want numbers regardless of id_type. - os_ << dfa_name_ + " + 0x" << std::hex << - static_cast(*ptr_) * dfa_alphabet_; - } - } - } - else - { - // We want numbers regardless of id_type. - os_ << "0x" << std::hex << static_cast(*ptr_++); - - for (id_type alphabet_ = 1; alphabet_ < dfa_alphabet_; - ++alphabet_, ++ptr_) - { - // We want numbers regardless of id_type. - os_ << ", 0x" << std::hex << static_cast(*ptr_); - } - } - } - - static void output_tabs(const std::size_t tabs_, std::ostream &os_) - { - for (std::size_t i_ = 0; i_ < tabs_; ++i_) - { - os_ << " "; - } - } - - template - static void output_char_loop(const id_type features_, - const std::size_t additional_tabs_, const bool pointers_, - std::ostream &os_, const std::false_type &) - { - output_tabs(additional_tabs_, os_); - os_ << " const typename results::char_type prev_char_ = " - "*curr_++;\n"; - output_tabs(additional_tabs_, os_); - os_ << " const "; - - if (pointers_) - { - os_ << "void * const *"; - } - else - { - os_ << "id_type "; - } - - os_ << "state_ = "; - - if (pointers_) - { - os_ << "reinterpret_cast\n "; - output_tabs(additional_tabs_, os_); - os_ << '('; - } - - os_ << "ptr_[lookup_"; - - if (!pointers_) - { - os_ << "\n "; - output_tabs(additional_tabs_, os_); - } - - os_ << "[static_cast"; - - if (pointers_) - { - os_ << "\n "; - output_tabs(additional_tabs_, os_); - } - - os_ << "(prev_char_)]]"; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ";\n\n"; - - if (features_ & bol_bit) - { - output_tabs(additional_tabs_, os_); - os_ << " bol_ = prev_char_ == '\\n';\n\n"; - } - - output_tabs(additional_tabs_, os_); - os_ << " if (state_ == 0)\n"; - output_tabs(additional_tabs_, os_); - os_ << " {\n"; - - if (features_ & eol_bit) - { - output_tabs(additional_tabs_, os_); - os_ << " EOL_state_ = 0;\n"; - } - - output_tabs(additional_tabs_, os_); - os_ << " break;\n"; - output_tabs(additional_tabs_, os_); - os_ << " }\n\n"; - output_tabs(additional_tabs_, os_); - os_ << " ptr_ = "; - - if (pointers_) - { - os_ << "state_"; - } - else - { - os_ << "&dfa_[state_ * dfa_alphabet_]"; - } - - os_ << ";\n"; - } - - template - static void output_char_loop(const id_type features_, - const std::size_t additional_tabs_, const bool pointers_, - std::ostream &os_, const std::true_type &) - { - output_tabs(additional_tabs_, os_); - os_ << " const std::size_t bytes_ =\n"; - output_tabs(additional_tabs_, os_); - os_ << " sizeof(typename results::char_type) < 3 ?\n"; - output_tabs(additional_tabs_, os_); - os_ << " sizeof(typename results::char_type) : 3;\n"; - output_tabs(additional_tabs_, os_); - os_ << " const std::size_t shift_[] = {0, 8, 16};\n"; - output_tabs(additional_tabs_, os_); - os_ << " typename results::char_type prev_char_ = " - "*curr_++;\n\n"; - - if (features_ & bol_bit) - { - output_tabs(additional_tabs_, os_); - os_ << " bol_ = prev_char_ == '\\n';\n\n"; - } - - output_tabs(additional_tabs_, os_); - os_ << " for (std::size_t i_ = 0; i_ < bytes_; ++i_)\n"; - output_tabs(additional_tabs_, os_); - os_ << " {\n"; - output_tabs(additional_tabs_, os_); - os_ << " const "; - - if (pointers_) - { - os_ << "void * const *"; - } - else - { - os_ << "id_type "; - } - - os_ << "state_ = "; - - if (pointers_) - { - os_ << "reinterpret_cast\n "; - output_tabs(additional_tabs_, os_); - os_ << '('; - } - - os_ << "ptr_[lookup_[static_cast\n"; - output_tabs(additional_tabs_, os_); - os_ << " ((prev_char_ >>\n" - " shift_[bytes_ - 1 - i_]) & 0xff)]]"; - - if (pointers_) - { - os_ << ')'; - } - - os_ << ";\n\n"; - output_tabs(additional_tabs_, os_); - os_ << " if (state_ == 0)\n"; - output_tabs(additional_tabs_, os_); - os_ << " {\n"; - - if (features_ & eol_bit) - { - output_tabs(additional_tabs_, os_); - os_ << " EOL_state_ = 0;\n"; - } - - output_tabs(additional_tabs_, os_); - os_ << " goto quit;\n"; - output_tabs(additional_tabs_, os_); - os_ << " }\n\n"; - output_tabs(additional_tabs_, os_); - os_ << " ptr_ = "; - - if (pointers_) - { - os_ << "state_"; - } - else - { - os_ << "&dfa_[state_ * dfa_alphabet_]"; - } - - os_ << ";\n"; - output_tabs(additional_tabs_, os_); - os_ << " }\n"; - } - - static void output_quit(std::ostream &, const std::false_type &) - { - // Nothing to do - } - - static void output_quit(std::ostream &os_, const std::true_type &) - { - os_ << "quit:\n"; - } -}; -} - -#endif diff --git a/YACReaderLibrary/lexertl/generator.hpp b/YACReaderLibrary/lexertl/generator.hpp deleted file mode 100644 index 581cd6e9..00000000 --- a/YACReaderLibrary/lexertl/generator.hpp +++ /dev/null @@ -1,738 +0,0 @@ -// generator.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_GENERATOR_HPP -#define LEXERTL_GENERATOR_HPP - -#include -#include "partition/charset.hpp" -#include "char_traits.hpp" -#include "partition/equivset.hpp" -#include -#include -#include "parser/parser.hpp" -#include "rules.hpp" -#include "state_machine.hpp" -#include - -namespace lexertl -{ -template > -class basic_generator -{ -public: - using id_type = typename rules::id_type; - using rules_char_type = typename rules::rules_char_type; - using sm_traits = typename sm::traits; - using parser = detail::basic_parser; - using charset_map = typename parser::charset_map; - using node = typename parser::node; - using node_ptr_vector = typename parser::node_ptr_vector; - - static void build(const rules &rules_, sm &sm_) - { - const std::size_t size_ = rules_.statemap().size(); - // Strong exception guarantee - // http://www.boost.org/community/exception_safety.html - internals internals_; - sm temp_sm_; - node_ptr_vector node_ptr_vector_; - - internals_._eoi = rules_.eoi(); - internals_.add_states(size_); - - for (id_type index_ = 0; index_ < size_; ++index_) - { - if (rules_.regexes()[index_].empty()) - { - std::ostringstream ss_; - - ss_ << "Lexer states with no rules are not allowed " - "(lexer state " << index_ << ".)"; - throw runtime_error(ss_.str()); - } - else - { - // Note that the following variables are per DFA. - // Map of regex charset tokens (strings) to index - charset_map charset_map_; - // Used to fix up $ and \n clashes. - id_type nl_id_ = sm_traits::npos(); - // Regex syntax tree - observer_ptr root_ = build_tree(rules_, index_, - node_ptr_vector_, charset_map_, nl_id_); - - build_dfa(charset_map_, root_, internals_, temp_sm_, index_, - nl_id_); - - if (internals_._dfa[index_].size() / - internals_._dfa_alphabet[index_] >= sm_traits::npos()) - { - // Overflow - throw runtime_error("The data type you have chosen " - "cannot hold this many DFA rows."); - } - } - } - - // If you get a compile error here the id_type from rules and - // state machine do no match. - create(internals_, temp_sm_, rules_.features(), lookup()); - sm_.swap(temp_sm_); - } - - static observer_ptr build_tree(const rules &rules_, - const std::size_t dfa_, node_ptr_vector &node_ptr_vector_, - charset_map &charset_map_, id_type &nl_id_) - { - parser parser_(rules_.locale(), node_ptr_vector_, charset_map_, - rules_.eoi()); - const auto ®exes_ = rules_.regexes(); - auto regex_iter_ = regexes_[dfa_].cbegin(); - auto regex_iter_end_ = regexes_[dfa_].cend(); - const auto &ids_ = rules_.ids(); - const auto &user_ids_ = rules_.user_ids(); - auto id_iter_ = ids_[dfa_].cbegin(); - auto user_id_iter_ = user_ids_[dfa_].cbegin(); - const auto &next_dfas_ = rules_.next_dfas(); - const auto &pushes_ = rules_.pushes(); - const auto &pops_ = rules_.pops(); - auto next_dfa_iter_ = next_dfas_[dfa_].cbegin(); - auto push_dfa_iter_ = pushes_[dfa_].cbegin(); - auto pop_dfa_iter_ = pops_[dfa_].cbegin(); - const bool seen_bol_ = (rules_.features()[dfa_] & bol_bit) != 0; - observer_ptr root_ = nullptr; - - root_ = parser_.parse(*regex_iter_, *id_iter_, *user_id_iter_, - *next_dfa_iter_, *push_dfa_iter_, *pop_dfa_iter_, - rules_.flags(), nl_id_, seen_bol_); - ++regex_iter_; - ++id_iter_; - ++user_id_iter_; - ++next_dfa_iter_; - ++push_dfa_iter_; - ++pop_dfa_iter_; - - // Build syntax trees - while (regex_iter_ != regex_iter_end_) - { - observer_ptr rhs_ = parser_.parse(*regex_iter_, *id_iter_, - *user_id_iter_, *next_dfa_iter_, *push_dfa_iter_, - *pop_dfa_iter_, rules_.flags(), nl_id_, - (rules_.features()[dfa_] & bol_bit) != 0); - - node_ptr_vector_.emplace_back - (std::make_unique(root_, rhs_)); - root_ = node_ptr_vector_.back().get(); - - ++regex_iter_; - ++id_iter_; - ++user_id_iter_; - ++next_dfa_iter_; - ++push_dfa_iter_; - ++pop_dfa_iter_; - } - - return root_; - } - -protected: - using compressed = std::integral_constant; - using equivset = detail::basic_equivset; - using equivset_list = std::list>; - using equivset_ptr = std::unique_ptr; - using sm_char_type = typename sm_traits::char_type; - using charset = detail::basic_charset; - using charset_ptr = std::unique_ptr; - using charset_list = std::list>; - using internals = detail::basic_internals; - using id_type_set = typename std::set; - using id_type_vector = typename internals::id_type_vector; - using index_set = typename charset::index_set; - using index_set_vector = std::vector; - using is_dfa = std::integral_constant; - using lookup = std::integral_constant; - using node_set = std::set>; - using node_set_vector = std::vector>; - using node_vector = typename node::node_vector; - using node_vector_vector = std::vector>; - using selection_node = typename parser::selection_node; - using size_t_vector = typename std::vector; - using string_token = typename parser::string_token; - - static void build_dfa(const charset_map &charset_map_, - const observer_ptr root_, internals &internals_, sm &sm_, - const id_type dfa_index_, id_type &nl_id_) - { - // partitioned charset list - charset_list charset_list_; - // vector mapping token indexes to partitioned token index sets - index_set_vector set_mapping_; - auto &dfa_ = internals_._dfa[dfa_index_]; - std::size_t dfa_alphabet_ = 0; - const node_vector &followpos_ = root_->firstpos(); - node_set_vector seen_sets_; - node_vector_vector seen_vectors_; - size_t_vector hash_vector_; - id_type zero_id_ = sm_traits::npos(); - id_type_set eol_set_; - - set_mapping_.resize(charset_map_.size()); - partition_charsets(charset_map_, charset_list_, is_dfa()); - build_set_mapping(charset_list_, internals_, dfa_index_, - set_mapping_); - - if (nl_id_ != sm_traits::npos()) - { - nl_id_ = *set_mapping_[nl_id_].begin(); - zero_id_ = sm_traits::compressed ? - *set_mapping_[charset_map_.find(string_token(0, 0))-> - second].begin() : sm_traits::npos(); - } - - dfa_alphabet_ = charset_list_.size() + transitions_index + - (nl_id_ == sm_traits::npos() ? 0 : 1); - - if (dfa_alphabet_ > sm_traits::npos()) - { - // Overflow - throw runtime_error("The data type you have chosen cannot hold " - "the dfa alphabet."); - } - - internals_._dfa_alphabet[dfa_index_] = - static_cast(dfa_alphabet_); - // 'jam' state - dfa_.resize(dfa_alphabet_, 0); - closure(followpos_, seen_sets_, seen_vectors_, hash_vector_, - static_cast(dfa_alphabet_), dfa_); - - // Loop over states - for (id_type index_ = 0; index_ < static_cast - (seen_vectors_.size()); ++index_) - { - equivset_list equiv_list_; - - // Intersect charsets - build_equiv_list(*seen_vectors_[index_].get(), set_mapping_, - equiv_list_, is_dfa()); - - for (auto &equivset_ : equiv_list_) - { - const id_type transition_ = closure - (equivset_->_followpos, seen_sets_, seen_vectors_, - hash_vector_, static_cast(dfa_alphabet_), dfa_); - - if (transition_ != sm_traits::npos()) - { - observer_ptr ptr_ = &dfa_.front() + - ((index_ + 1) * dfa_alphabet_); - - // Prune abstemious transitions from end states. - if (*ptr_ && !equivset_->_greedy) continue; - - set_transitions(transition_, equivset_.get(), dfa_, ptr_, - index_, eol_set_); - } - } - } - - fix_clashes(eol_set_, nl_id_, zero_id_, dfa_, dfa_alphabet_, - compressed()); - append_dfa(charset_list_, internals_, sm_, dfa_index_, lookup()); - } - - static void set_transitions(const id_type transition_, equivset *equivset_, - typename internals::id_type_vector &dfa_, id_type *ptr_, - const id_type index_, id_type_set &eol_set_) - { - for (typename equivset::index_vector::const_iterator - equiv_iter_ = equivset_->_index_vector.begin(), - equiv_end_ = equivset_->_index_vector.end(); - equiv_iter_ != equiv_end_; ++equiv_iter_) - { - const id_type i_ = *equiv_iter_; - - if (i_ == parser::bol_token()) - { - dfa_.front() = transition_; - } - else if (i_ == parser::eol_token()) - { - ptr_[eol_index] = transition_; - eol_set_.insert(index_ + 1); - } - else - { - ptr_[i_ + transitions_index] = transition_; - } - } - } - - // Uncompressed - static void fix_clashes(const id_type_set &eol_set_, - const id_type nl_id_, const id_type /*zero_id_*/, - typename internals::id_type_vector &dfa_, - const std::size_t dfa_alphabet_, const std::false_type &) - { - for (const auto &eol_ : eol_set_) - { - observer_ptr ptr_ = &dfa_.front() + eol_ * dfa_alphabet_; - const id_type eol_state_ = ptr_[eol_index]; - const id_type nl_state_ = ptr_[nl_id_ + transitions_index]; - - if (nl_state_) - { - ptr_[transitions_index + nl_id_] = 0; - ptr_ = &dfa_.front() + eol_state_ * dfa_alphabet_; - - if (ptr_[transitions_index + nl_id_] == 0) - { - ptr_[transitions_index + nl_id_] = nl_state_; - } - } - } - } - - // Compressed - static void fix_clashes(const id_type_set &eol_set_, - const id_type nl_id_, const id_type zero_id_, - typename internals::id_type_vector &dfa_, - const std::size_t dfa_alphabet_, const std::true_type &) - { - std::size_t i_ = 0; - - for (const auto &eol_ : eol_set_) - { - observer_ptr ptr_ = &dfa_.front() + eol_ * dfa_alphabet_; - const id_type eol_state_ = ptr_[eol_index]; - id_type nl_state_ = 0; - - for (; i_ < (sm_traits::char_24_bit ? 2 : 1); ++i_) - { - ptr_ = &dfa_.front() + ptr_[transitions_index + zero_id_] * - dfa_alphabet_; - } - - nl_state_ = ptr_[transitions_index + nl_id_]; - - if (nl_state_) - { - ptr_ = &dfa_.front() + eol_state_ * dfa_alphabet_; - - if (ptr_[transitions_index + zero_id_] != 0) continue; - - ptr_[transitions_index + zero_id_] = - static_cast(dfa_.size() / dfa_alphabet_); - dfa_.resize(dfa_.size() + dfa_alphabet_, 0); - - for (i_ = 0; i_ < (sm_traits::char_24_bit ? 1 : 0); ++i_) - { - ptr_ = &dfa_.front() + dfa_.size() - dfa_alphabet_; - ptr_[transitions_index + zero_id_] = - static_cast(dfa_.size() / dfa_alphabet_); - dfa_.resize(dfa_.size() + dfa_alphabet_, 0); - } - - ptr_ = &dfa_.front() + dfa_.size() - dfa_alphabet_; - ptr_[transitions_index + nl_id_] = nl_state_; - } - } - } - - // char_state_machine version - static void append_dfa(const charset_list &charset_list_, - const internals &internals_, sm &sm_, const id_type dfa_index_, - const std::false_type &) - { - std::size_t size_ = charset_list_.size(); - typename sm::string_token_vector token_vector_; - - token_vector_.reserve(size_); - - for (const auto &charset_ : charset_list_) - { - token_vector_.push_back(charset_->_token); - } - - sm_.append(token_vector_, internals_, dfa_index_); - } - - // state_machine version - static void append_dfa(const charset_list &, const internals &, sm &, - const id_type, const std::true_type &) - { - // Nothing to do - will use create() instead - } - - // char_state_machine version - static void create(internals &, sm &, const id_type_vector &, - const std::false_type &) - { - // Nothing to do - will use append_dfa() instead - } - - // state_machine version - static void create(internals &internals_, sm &sm_, - const id_type_vector &features_, const std::true_type &) - { - for (std::size_t i_ = 0, size_ = internals_._dfa.size(); - i_ < size_; ++i_) - { - internals_._features |= features_[i_]; - } - - if (internals_._dfa.size() > 1) - { - internals_._features |= multi_state_bit; - } - - sm_.data().swap(internals_); - } - - // NFA version - static void partition_charsets(const charset_map &map_, - charset_list &lhs_, const std::false_type &) - { - fill_rhs_list(map_, lhs_); - } - - // DFA version - static void partition_charsets(const charset_map &map_, - charset_list &lhs_, const std::true_type &) - { - charset_list rhs_; - - fill_rhs_list(map_, rhs_); - - if (!rhs_.empty()) - { - typename charset_list::iterator iter_; - typename charset_list::iterator end_; - charset_ptr overlap_ = std::make_unique(); - - lhs_.emplace_back(std::move(rhs_.front())); - rhs_.pop_front(); - - while (!rhs_.empty()) - { - charset_ptr r_(rhs_.front().release()); - - rhs_.pop_front(); - iter_ = lhs_.begin(); - end_ = lhs_.end(); - - while (!r_->empty() && iter_ != end_) - { - auto l_iter_ = iter_; - - (*l_iter_)->intersect(*r_.get(), *overlap_.get()); - - if (overlap_->empty()) - { - ++iter_; - } - else if ((*l_iter_)->empty()) - { - l_iter_->reset(overlap_.release()); - overlap_ = std::make_unique(); - ++iter_; - } - else if (r_->empty()) - { - r_.reset(overlap_.release()); - overlap_ = std::make_unique(); - break; - } - else - { - iter_ = lhs_.insert(++iter_, charset_ptr()); - iter_->reset(overlap_.release()); - overlap_ = std::make_unique(); - ++iter_; - end_ = lhs_.end(); - } - } - - if (!r_->empty()) - { - lhs_.emplace_back(std::move(r_)); - } - } - } - } - - static void fill_rhs_list(const charset_map &map_, charset_list &list_) - { - for (const auto &pair_ : map_) - { - list_.emplace_back(std::make_unique - (pair_.first, pair_.second)); - } - } - - static void build_set_mapping(const charset_list &charset_list_, - internals &internals_, const id_type dfa_index_, - index_set_vector &set_mapping_) - { - auto iter_ = charset_list_.cbegin(); - auto end_ = charset_list_.cend(); - - for (id_type index_ = 0; iter_ != end_; ++iter_, ++index_) - { - observer_ptr cs_ = iter_->get(); - - fill_lookup(cs_->_token, &internals_._lookup[dfa_index_], - index_, lookup()); - - for (const id_type i_ : cs_->_index_set) - { - set_mapping_[i_].insert(index_); - } - } - } - - // char_state_machine version - static void fill_lookup(const string_token &, observer_ptr , - const id_type, const std::false_type &) - { - // Do nothing (lookup not used) - } - - // state_machine version - static void fill_lookup(const string_token &charset_, - observer_ptr lookup_, const id_type index_, - const std::true_type &) - { - observer_ptr ptr_ = &lookup_->front(); - - for (const auto &range_ : charset_._ranges) - { - for (typename char_traits::index_type char_ = range_.first; - char_ < range_.second; ++char_) - { - // Note char_ must be unsigned - ptr_[char_] = index_ + transitions_index; - } - - // Note range_.second must be unsigned - ptr_[range_.second] = index_ + transitions_index; - } - } - - static id_type closure(const node_vector &followpos_, - node_set_vector &seen_sets_, node_vector_vector &seen_vectors_, - size_t_vector &hash_vector_, const id_type size_, id_type_vector &dfa_) - { - bool end_state_ = false; - id_type id_ = 0; - id_type user_id_ = sm_traits::npos(); - id_type next_dfa_ = 0; - id_type push_dfa_ = sm_traits::npos(); - bool pop_dfa_ = false; - std::size_t hash_ = 0; - - if (followpos_.empty()) return sm_traits::npos(); - - id_type index_ = 0; - std::unique_ptr set_ptr_ = std::make_unique(); - std::unique_ptr vector_ptr_ = - std::make_unique(); - - for (observer_ptr node_ : followpos_) - { - closure_ex(node_, end_state_, id_, user_id_, next_dfa_, - push_dfa_, pop_dfa_, *set_ptr_.get(), - *vector_ptr_.get(), hash_); - } - - bool found_ = false; - auto hash_iter_ = hash_vector_.cbegin(); - auto hash_end_ = hash_vector_.cend(); - auto set_iter_ = seen_sets_.cbegin(); - - for (; hash_iter_ != hash_end_; ++hash_iter_, ++set_iter_) - { - found_ = *hash_iter_ == hash_ && *(*set_iter_) == *set_ptr_; - ++index_; - - if (found_) break; - } - - if (!found_) - { - seen_sets_.emplace_back(std::move(set_ptr_)); - seen_vectors_.emplace_back(std::move(vector_ptr_)); - hash_vector_.push_back(hash_); - // State 0 is the jam state... - index_ = static_cast(seen_sets_.size()); - - const std::size_t old_size_ = dfa_.size(); - - dfa_.resize(old_size_ + size_, 0); - - if (end_state_) - { - dfa_[old_size_] |= end_state_bit; - - if (pop_dfa_) - { - dfa_[old_size_] |= pop_dfa_bit; - } - - dfa_[old_size_ + id_index] = id_; - dfa_[old_size_ + user_id_index] = user_id_; - dfa_[old_size_ + push_dfa_index] = push_dfa_; - dfa_[old_size_ + next_dfa_index] = next_dfa_; - } - } - - return index_; - } - - static void closure_ex(observer_ptr node_, bool &end_state_, - id_type &id_, id_type &user_id_, id_type &next_dfa_, - id_type &push_dfa_, bool &pop_dfa_, node_set &set_ptr_, - node_vector &vector_ptr_, std::size_t &hash_) - { - const bool temp_end_state_ = node_->end_state(); - - if (temp_end_state_) - { - if (!end_state_) - { - end_state_ = true; - id_ = node_->id(); - user_id_ = node_->user_id(); - next_dfa_ = node_->next_dfa(); - push_dfa_ = node_->push_dfa(); - pop_dfa_ = node_->pop_dfa(); - } - } - - if (set_ptr_.insert(node_).second) - { - vector_ptr_.push_back(node_); - hash_ += reinterpret_cast(node_); - } - } - - // NFA version - static void build_equiv_list(const node_vector &vector_, - const index_set_vector &set_mapping_, equivset_list &lhs_, - const std::false_type &) - { - fill_rhs_list(vector_, set_mapping_, lhs_); - } - - // DFA version - static void build_equiv_list(const node_vector &vector_, - const index_set_vector &set_mapping_, equivset_list &lhs_, - const std::true_type &) - { - equivset_list rhs_; - - fill_rhs_list(vector_, set_mapping_, rhs_); - - if (!rhs_.empty()) - { - typename equivset_list::iterator iter_; - typename equivset_list::iterator end_; - equivset_ptr overlap_ = std::make_unique(); - - lhs_.emplace_back(std::move(rhs_.front())); - rhs_.pop_front(); - - while (!rhs_.empty()) - { - equivset_ptr r_(rhs_.front().release()); - - rhs_.pop_front(); - iter_ = lhs_.begin(); - end_ = lhs_.end(); - - while (!r_->empty() && iter_ != end_) - { - auto l_iter_ = iter_; - - (*l_iter_)->intersect(*r_.get(), *overlap_.get()); - - if (overlap_->empty()) - { - ++iter_; - } - else if ((*l_iter_)->empty()) - { - l_iter_->reset(overlap_.release()); - overlap_ = std::make_unique(); - ++iter_; - } - else if (r_->empty()) - { - r_.reset(overlap_.release()); - overlap_ = std::make_unique(); - break; - } - else - { - iter_ = lhs_.insert(++iter_, equivset_ptr()); - iter_->reset(overlap_.release()); - overlap_ = std::make_unique(); - ++iter_; - end_ = lhs_.end(); - } - } - - if (!r_->empty()) - { - lhs_.emplace_back(std::move(r_)); - } - } - } - } - - static void fill_rhs_list(const node_vector &vector_, - const index_set_vector &set_mapping_, equivset_list &list_) - { - for (observer_ptr node_ : vector_) - { - if (!node_->end_state()) - { - const id_type token_ = node_->token(); - - if (token_ != node::null_token()) - { - if (token_ == parser::bol_token() || - token_ == parser::eol_token()) - { - std::set index_set_; - - index_set_.insert(token_); - list_.emplace_back - (std::make_unique(index_set_, - token_, node_->greedy(), node_->followpos())); - } - else - { - list_.emplace_back(std::make_unique - (set_mapping_[token_], token_, node_->greedy(), - node_->followpos())); - } - } - } - } - } -}; - -using generator = basic_generator; -using wgenerator = basic_generator; -using u32generator = basic_generator; -using char_generator = basic_generator; -using wchar_generator = basic_generator; -using u32char_generator = basic_generator; -} - -#endif diff --git a/YACReaderLibrary/lexertl/internals.hpp b/YACReaderLibrary/lexertl/internals.hpp deleted file mode 100644 index a5e1dfe0..00000000 --- a/YACReaderLibrary/lexertl/internals.hpp +++ /dev/null @@ -1,75 +0,0 @@ -// internals.hpp -// Copyright (c) 2009-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_INTERNALS_HPP -#define LEXERTL_INTERNALS_HPP - -#include "enums.hpp" -#include -#include - -namespace lexertl -{ -namespace detail -{ -template -struct basic_internals -{ - using id_type_vector = std::vector; - using id_type_vector_vector = std::vector; - - id_type _eoi; - id_type_vector_vector _lookup; - id_type_vector _dfa_alphabet; - id_type _features; - id_type_vector_vector _dfa; - - basic_internals() : - _eoi(0), - _lookup(), - _dfa_alphabet(), - _features(0), - _dfa() - { - } - - void clear() - { - _eoi = 0; - _lookup.clear(); - _dfa_alphabet.clear(); - _features = 0; - _dfa.clear(); - } - - bool empty() const - { - return _dfa.empty(); - } - - void add_states(const std::size_t num_) - { - for (std::size_t index_ = 0; index_ < num_; ++index_) - { - // lookup *always* has a size 256 now. - _lookup.push_back(id_type_vector(256, dead_state_index)); - _dfa_alphabet.push_back(0); - _dfa.push_back(id_type_vector()); - } - } - - void swap(basic_internals &internals_) - { - std::swap(_eoi, internals_._eoi); - _lookup.swap(internals_._lookup); - _dfa_alphabet.swap(internals_._dfa_alphabet); - std::swap(_features, internals_._features); - _dfa.swap(internals_._dfa); - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/iterator.hpp b/YACReaderLibrary/lexertl/iterator.hpp deleted file mode 100644 index 5820ee02..00000000 --- a/YACReaderLibrary/lexertl/iterator.hpp +++ /dev/null @@ -1,135 +0,0 @@ -// iterator.hpp -// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_ITERATOR_HPP -#define LEXERTL_ITERATOR_HPP - -#include -#include "lookup.hpp" -#include "state_machine.hpp" - -namespace lexertl -{ -template -class iterator -{ -public: - using value_type = results; - using difference_type = ptrdiff_t; - using pointer = const value_type *; - using reference = const value_type &; - using iterator_category = std::forward_iterator_tag; - - iterator() : - _results(iter(), iter()), - _sm(nullptr) - { - } - - iterator(const iter &start_, const iter &end_, const sm_type &sm) : - _results(start_, end_), - _sm(&sm) - { - lookup(); - } - - // Only need this because of warnings with gcc with -Weffc++ - iterator(const iterator &rhs_) - { - _results = rhs_._results; - _sm = rhs_._sm; - } - - // Only need this because of warnings with gcc with -Weffc++ - iterator &operator =(const iterator &rhs_) - { - if (&rhs_ != this) - { - _results = rhs_._results; - _sm = rhs_._sm; - } - - return *this; - } - - iterator &operator ++() - { - lookup(); - return *this; - } - - iterator operator ++(int) - { - iterator iter_ = *this; - - lookup(); - return iter_; - } - - const value_type &operator *() const - { - return _results; - } - - const value_type *operator ->() const - { - return &_results; - } - - bool operator ==(const iterator &rhs_) const - { - return _sm == rhs_._sm && (_sm == nullptr ? true : - _results == rhs_._results); - } - - bool operator !=(const iterator &rhs_) const - { - return !(*this == rhs_); - } - - const sm_type &sm() const - { - return *_sm; - } - -private: - value_type _results; - const sm_type *_sm; - - void lookup() - { - lexertl::lookup(*_sm, _results); - - if (_results.first == _results.eoi) - { - _sm = nullptr; - } - } -}; - -using siterator = - iterator; -using citerator = iterator; -using wsiterator = - iterator; -using wciterator = iterator; -using u32siterator = iterator; -using u32citerator = iterator; - -using sriterator = - iterator; -using criterator = iterator; -using wsriterator = - iterator; -using wcriterator = - iterator; -using u32sriterator = iterator; -using u32criterator = iterator; -} - -#endif diff --git a/YACReaderLibrary/lexertl/licence_1_0.txt b/YACReaderLibrary/lexertl/licence_1_0.txt deleted file mode 100644 index 7925d62e..00000000 --- a/YACReaderLibrary/lexertl/licence_1_0.txt +++ /dev/null @@ -1,24 +0,0 @@ -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. - diff --git a/YACReaderLibrary/lexertl/lookup.hpp b/YACReaderLibrary/lexertl/lookup.hpp deleted file mode 100644 index 903413cd..00000000 --- a/YACReaderLibrary/lexertl/lookup.hpp +++ /dev/null @@ -1,491 +0,0 @@ -// lookup.hpp -// Copyright (c) 2009-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_LOOKUP_HPP -#define LEXERTL_LOOKUP_HPP - -#include -#include "match_results.hpp" -#include - -namespace lexertl -{ -namespace detail -{ -template -struct bol_state -{ - bol_state(const bool) - { - } -}; - -template<> -struct bol_state -{ - bool _bol; - bool _end_bol; - - bol_state(const bool bol_) : - _bol(bol_), - _end_bol(bol_) - { - } -}; - -template -struct eol_state -{ -}; - -template -struct eol_state -{ - id_type _EOL_state; - - eol_state() : - _EOL_state(0) - { - } -}; - -template -struct multi_state_state -{ - multi_state_state(const id_type) - { - } -}; - -template -struct multi_state_state -{ - id_type _start_state; - - multi_state_state(const id_type state_) : - _start_state(state_) - { - } -}; - -template -struct recursive_state -{ - recursive_state(const id_type *) - { - } -}; - -template -struct recursive_state -{ - bool _pop; - id_type _push_dfa; - - recursive_state(const id_type *ptr_) : - _pop((*ptr_ & pop_dfa_bit) != 0), - _push_dfa(*(ptr_ + push_dfa_index)) - { - } -}; - -template -struct lookup_state -{ - const id_type *_lookup; - id_type _dfa_alphabet; - const id_type *_dfa; - const id_type *_ptr; - bool _end_state; - id_type _id; - id_type _uid; - bol_state<(flags & bol_bit) != 0> _bol_state; - eol_state _eol_state; - multi_state_state - _multi_state_state; - recursive_state _recursive_state; - - lookup_state(const internals &internals_, const bool bol_, - const id_type state_) : - _lookup(&internals_._lookup[state_][0]), - _dfa_alphabet(internals_._dfa_alphabet[state_]), - _dfa(&internals_._dfa[state_][0]), - _ptr(_dfa + _dfa_alphabet), - _end_state(*_ptr != 0), - _id(*(_ptr + id_index)), - _uid(*(_ptr + user_id_index)), - _bol_state(bol_), - _eol_state(), - _multi_state_state(state_), - _recursive_state(_ptr) - { - } - - void reset_recursive(const std::false_type &) - { - // Do nothing - } - - void reset_recursive(const std::true_type &) - { - _recursive_state._pop = (*_ptr & pop_dfa_bit) != 0; - _recursive_state._push_dfa = *(_ptr + push_dfa_index); - } - - void bol_start_state(const std::false_type &) - { - // Do nothing - } - - void bol_start_state(const std::true_type &) - { - if (_bol_state._bol) - { - const id_type state_ = *_dfa; - - if (state_) - { - _ptr = &_dfa[state_ * _dfa_alphabet]; - } - } - } - - template - bool is_eol(const char_type, const std::false_type &) - { - return false; - } - - template - bool is_eol(const char_type curr_, const std::true_type &) - { - bool ret_ = false; - - _eol_state._EOL_state = _ptr[eol_index]; - ret_ = _eol_state._EOL_state && (curr_ == '\r' || curr_ == '\n'); - - if (ret_) - { - _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet]; - } - - return ret_; - } - - template - id_type next_char(const char_type prev_char_, const std::false_type &) - { - const id_type state_= _ptr[_lookup - [static_cast(prev_char_)]]; - - if (state_ != 0) - { - _ptr = &_dfa[state_ * _dfa_alphabet]; - } - - return state_; - } - - template - id_type next_char(const char_type prev_char_, const std::true_type &) - { - const std::size_t bytes_ = sizeof(char_type) < 3 ? - sizeof(char_type) : 3; - const std::size_t shift_[] = {0, 8, 16}; - id_type state_= 0; - - for (std::size_t i_ = 0; i_ < bytes_; ++i_) - { - state_ = _ptr[_lookup[static_cast((prev_char_ >> - shift_[bytes_ - 1 - i_]) & 0xff)]]; - - if (state_ == 0) - { - break; - } - - _ptr = &_dfa[state_ * _dfa_alphabet]; - } - - return state_; - } - - template - void bol(const char_type, const std::false_type &) - { - // Do nothing - } - - template - void bol(const char_type prev_char_, const std::true_type &) - { - _bol_state._bol = prev_char_ == '\n'; - } - - void eol(const id_type, const std::false_type &) - { - // Do nothing - } - - void eol(const id_type err_val_, const std::true_type &) - { - _eol_state._EOL_state = err_val_; - } - - void reset_start_state(const std::false_type &) - { - // Do nothing - } - - void reset_start_state(const std::true_type &) - { - _multi_state_state._start_state = *(_ptr + next_dfa_index); - } - - void reset_end_bol(const std::false_type &) - { - // Do nothing - } - - void reset_end_bol(const std::true_type &) - { - _bol_state._end_bol = _bol_state._bol; - } - - template - void end_state(iter_type &end_token_, iter_type &curr_) - { - if (*_ptr) - { - _end_state = true; - reset_end_bol - (std::integral_constant()); - _id = *(_ptr + id_index); - _uid = *(_ptr + user_id_index); - reset_recursive - (std::integral_constant()); - reset_start_state(std::integral_constant()); - end_token_ = curr_; - } - } - - template - void check_eol(iter_type &, iter_type &, const id_type, - const char_type, const std::false_type &) - { - // Do nothing - } - - template - void check_eol(iter_type &end_token_, iter_type &curr_, - const id_type npos, const char_type eoi_, const std::true_type &) - { - if (_eol_state._EOL_state != npos && curr_ == eoi_) - { - _eol_state._EOL_state = _ptr[eol_index]; - - if (_eol_state._EOL_state) - { - _ptr = &_dfa[_eol_state._EOL_state * _dfa_alphabet]; - end_state(end_token_, curr_); - } - } - } - - template - void pop(results &, const std::false_type &) - { - // Nothing to do - } - - template - void pop(results &results_, const std::true_type &) - { - if (_recursive_state._pop) - { - _multi_state_state._start_state = results_.stack.top().first; - results_.stack.pop(); - } - else if (_recursive_state._push_dfa != results::npos()) - { - results_.stack.push(typename results::id_type_pair - (_recursive_state._push_dfa, _id)); - } - } - - template - bool is_id_eoi(const id_type eoi_, const results &, const std::false_type &) - { - return _id == eoi_; - } - - template - bool is_id_eoi(const id_type eoi_, const results &results_, - const std::true_type &) - { - return _id == eoi_ || (_recursive_state._pop && - !results_.stack.empty() && results_.stack.top().second == eoi_); - } - - void start_state(id_type &, const std::false_type &) - { - // Do nothing - } - - void start_state(id_type &start_state_, const std::true_type &) - { - start_state_ = _multi_state_state._start_state; - } - - void bol(bool &, const std::false_type &) - { - // Do nothing - } - - void bol(bool &end_bol_, const std::true_type &) - { - end_bol_ = _bol_state._end_bol; - } -}; - -template -void inc_end(results &, const std::false_type &) -{ - // Do nothing -} - -template -void inc_end(results &results_, const std::true_type &) -{ - ++results_.second; -} - -template -void next(const sm_type &sm_, results &results_, - const std::integral_constant &compressed_, - const std::integral_constant &recursive_, - const std::forward_iterator_tag &) -{ - using id_type = typename sm_type::id_type; - const auto &internals_ = sm_.data(); - auto end_token_ = results_.second; - -skip: - auto curr_ = results_.second; - - results_.first = curr_; - -again: - if (curr_ == results_.eoi) - { - results_.id = internals_._eoi; - results_.user_id = results::npos(); - return; - } - - lookup_state lu_state_ - (internals_, results_.bol, results_.state); - lu_state_.bol_start_state - (std::integral_constant()); - - while (curr_ != results_.eoi) - { - if (!lu_state_.is_eol(*curr_, - std::integral_constant())) - { - const auto prev_char_ = *curr_; - const id_type state_ = lu_state_.next_char(prev_char_, - compressed_); - - ++curr_; - lu_state_.bol(prev_char_, - std::integral_constant()); - - if (state_ == 0) - { - lu_state_.is_eol(results::npos(), - std::integral_constant()); - break; - } - } - - lu_state_.end_state(end_token_, curr_); - } - - lu_state_.check_eol(end_token_, curr_, results::npos(), results_.eoi, - std::integral_constant()); - - if (lu_state_._end_state) - { - // Return longest match - lu_state_.pop(results_, recursive_); - - lu_state_.start_state(results_.state, - std::integral_constant()); - lu_state_.bol(results_.bol, - std::integral_constant()); - results_.second = end_token_; - - if (lu_state_._id == sm_.skip()) goto skip; - - if (lu_state_.is_id_eoi(internals_._eoi, results_, recursive_)) - { - curr_ = end_token_; - goto again; - } - } - else - { - results_.second = end_token_; - results_.bol = *results_.second == '\n'; - results_.first = results_.second; - // No match causes char to be skipped - inc_end(results_, - std::integral_constant()); - lu_state_._id = results::npos(); - lu_state_._uid = results::npos(); - } - - results_.id = lu_state_._id; - results_.user_id = lu_state_._uid; -} -} - -template -void lookup(const sm_type &sm_, match_results &results_) -{ - using value_type = typename std::iterator_traits::value_type; - using cat = typename std::iterator_traits::iterator_category; - - // If this asserts, you have either not defined all the correct - // flags, or you should be using recursive_match_results instead - // of match_results. - assert((sm_.data()._features & flags) == sm_.data()._features); - detail::next(sm_, results_, - std::integral_constant 1)>(), - std::false_type(), cat()); -} - -template -void lookup(const sm_type &sm_, recursive_match_results &results_) -{ - using value_type = typename std::iterator_traits::value_type; - using cat = typename std::iterator_traits::iterator_category; - - // If this asserts, you have not defined all the correct flags - assert((sm_.data()._features & flags) == sm_.data()._features); - detail::next(sm_, results_, - std::integral_constant 1)>(), - std::true_type(), cat()); -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/match_results.hpp b/YACReaderLibrary/lexertl/match_results.hpp deleted file mode 100644 index 078d8df1..00000000 --- a/YACReaderLibrary/lexertl/match_results.hpp +++ /dev/null @@ -1,171 +0,0 @@ -// match_results.hpp -// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_MATCH_RESULTS_HPP -#define LEXERTL_MATCH_RESULTS_HPP - -#include "char_traits.hpp" -#include "enums.hpp" -#include -#include -#include - -namespace lexertl -{ -template -struct match_results -{ - using iter_type = iter; - using char_type = typename std::iterator_traits::value_type; - using index_type = typename basic_char_traits::index_type; - using string = std::basic_string; - - id_type id; - id_type user_id; - iter_type first; - iter_type second; - iter_type eoi; - bool bol; - id_type state; - - match_results() : - id(0), - user_id(npos()), - first(iter_type()), - second(iter_type()), - eoi(iter_type()), - bol(true), - state(0) - { - } - - match_results(const iter_type &start_, const iter_type &end_) : - id(0), - user_id(npos()), - first(start_), - second(start_), - eoi(end_), - bol(true), - state(0) - { - } - - virtual ~match_results() - { - } - - string str() const - { - return string(first, second); - } - - string substr(const std::size_t soffset_, const std::size_t eoffset_) const - { - return string(first + soffset_, second - eoffset_); - } - - virtual void clear() - { - id = 0; - user_id = npos(); - first = eoi; - second = eoi; - bol = true; - state = 0; - } - - virtual void reset(const iter_type &start_, const iter_type &end_) - { - id = 0; - user_id = npos(); - first = start_; - second = start_; - eoi = end_; - bol = true; - state = 0; - } - - static id_type npos() - { - return static_cast(~0); - } - - static id_type skip() - { - return static_cast(~1); - } - - bool operator ==(const match_results &rhs_) const - { - return id == rhs_.id && - user_id == rhs_.user_id && - first == rhs_.first && - second == rhs_.second && - eoi == rhs_.eoi && - bol == rhs_.bol && - state == rhs_.state; - } -}; - -template -struct recursive_match_results : public match_results -{ - using id_type_pair = std::pair; - std::stack stack; - - recursive_match_results() : - match_results(), - stack() - { - } - - recursive_match_results(const iter &start_, const iter &end_) : - match_results(start_, end_), - stack() - { - } - - virtual ~recursive_match_results() override - { - } - - virtual void clear() override - { - match_results::clear(); - - while (!stack.empty()) stack.pop(); - } - - virtual void reset(const iter &start_, const iter &end_) override - { - match_results::reset(start_, end_); - - while (!stack.empty()) stack.pop(); - } -}; - -using smatch = match_results; -using cmatch = match_results; -using wsmatch = match_results; -using wcmatch = match_results; -using u32smatch = match_results; -using u32cmatch = match_results; - -using srmatch = - recursive_match_results; -using crmatch = recursive_match_results; -using wsrmatch = - recursive_match_results; -using wcrmatch = recursive_match_results; -using u32srmatch = - recursive_match_results; -using u32crmatch = recursive_match_results; -} - -#endif diff --git a/YACReaderLibrary/lexertl/memory_file.hpp b/YACReaderLibrary/lexertl/memory_file.hpp deleted file mode 100644 index 4ea42a4d..00000000 --- a/YACReaderLibrary/lexertl/memory_file.hpp +++ /dev/null @@ -1,138 +0,0 @@ -// memory_file.hpp -// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) -// Inspired by http://en.wikibooks.org/wiki/Optimizing_C%2B%2B/ -// General_optimization_techniques/Input/Output#Memory-mapped_file -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_MEMORY_FILE_HPP -#define LEXERTL_MEMORY_FILE_HPP - -#include - -#ifdef _WIN32 -#include -#else -#include -#include -#include -#include -#endif - -// Only files small enough to fit into memory are supported. -namespace lexertl -{ -template -class basic_memory_file -{ -public: - basic_memory_file() - { - } - - basic_memory_file(const char *pathname_) - { - open(pathname_); - } - - ~basic_memory_file() - { - close(); - } - - void open(const char *pathname_) - { - if (_data) close(); - -#ifdef _WIN32 - _fh = ::CreateFileA(pathname_, GENERIC_READ, FILE_SHARE_READ, 0, - OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - _fmh = 0; - - if (_fh != INVALID_HANDLE_VALUE) - { - _fmh = ::CreateFileMapping(_fh, 0, PAGE_READONLY, 0, 0, 0); - - if (_fmh != 0) - { - _data = static_cast(::MapViewOfFile - (_fmh, FILE_MAP_READ, 0, 0, 0)); - - if (_data) _size = ::GetFileSize(_fh, 0) / sizeof(char_type); - } - } -#else - _fh = ::open(pathname_, O_RDONLY); - - if (_fh > -1) - { - struct stat sbuf_; - - if (::fstat(_fh, &sbuf_) > -1) - { - _data = static_cast - (::mmap(0, sbuf_.st_size, PROT_READ, MAP_SHARED, _fh, 0)); - - if (_data == MAP_FAILED) - { - _data = nullptr; - } - else - { - _size = sbuf_.st_size / sizeof(char_type); - } - } - } -#endif - } - - const char_type *data() const - { - return _data; - } - - std::size_t size() const - { - return _size; - } - - void close() - { -#ifdef _WIN32 - ::UnmapViewOfFile(_data); - ::CloseHandle(_fmh); - ::CloseHandle(_fh); -#else - ::munmap(const_cast(_data), _size); - ::close(_fh); -#endif - _data = nullptr; - _size = 0; - _fh = 0; -#ifdef _WIN32 - _fmh = 0; -#endif - } - -private: - const char_type *_data = nullptr; - std::size_t _size = 0; -#ifdef _WIN32 - HANDLE _fh = 0; - HANDLE _fmh = 0; -#else - int _fh = 0; -#endif - - // No copy construction. - basic_memory_file(const basic_memory_file &) = delete; - // No assignment. - basic_memory_file &operator =(const basic_memory_file &) = delete; -}; - -using memory_file = basic_memory_file; -using wmemory_file = basic_memory_file; -} - -#endif diff --git a/YACReaderLibrary/lexertl/narrow.hpp b/YACReaderLibrary/lexertl/narrow.hpp deleted file mode 100644 index 94b2f6c2..00000000 --- a/YACReaderLibrary/lexertl/narrow.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// narrow.hpp -// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_NARROW_HPP -#define LEXERTL_NARROW_HPP - -#include - -namespace lexertl -{ -template -void narrow(const char_type *str_, std::ostringstream &ss_) -{ - while (*str_) - { - // Safe to simply cast to char. - // when string only contains ASCII. - ss_ << static_cast(*str_++); - } -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/observer_ptr.hpp b/YACReaderLibrary/lexertl/observer_ptr.hpp deleted file mode 100644 index 9ecd8255..00000000 --- a/YACReaderLibrary/lexertl/observer_ptr.hpp +++ /dev/null @@ -1,16 +0,0 @@ -// observer_ptr.hpp -// Copyright (c) 2017-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_OBSERVER_PTR_HPP -#define LEXERTL_OBSERVER_PTR_HPP - -namespace lexertl -{ - template - using observer_ptr = T *; -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/parser.hpp b/YACReaderLibrary/lexertl/parser/parser.hpp deleted file mode 100644 index 002f8d87..00000000 --- a/YACReaderLibrary/lexertl/parser/parser.hpp +++ /dev/null @@ -1,926 +0,0 @@ -// parser.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_PARSER_HPP -#define LEXERTL_PARSER_HPP - -#include -#include -#include "tree/end_node.hpp" -#include "tree/iteration_node.hpp" -#include "tree/leaf_node.hpp" -#include -#include "tokeniser/re_tokeniser.hpp" -#include "../runtime_error.hpp" -#include "tree/selection_node.hpp" -#include "tree/sequence_node.hpp" -#include -#include - -namespace lexertl -{ -namespace detail -{ -/* - General principles of regex parsing: - - Every regex is a sequence of sub-regexes. - - Regexes consist of operands and operators - - All operators decompose to sequence, selection ('|') and iteration ('*') - - Regex tokens are stored on a stack. - - When a complete sequence of regex tokens is on the stack it is processed. - -Grammar: - - -> - -> | '|' - -> - -> | - -> - -> charset | macro | '('')' | - -> '?' | '??' | '*' | '*?' | '+' | '+?' | '{n[,[m]]}' | - '{n[,[m]]}?' -*/ - -template -class basic_parser -{ -public: - enum {char_24_bit = sm_traits::char_24_bit}; - using char_type = typename sm_traits::char_type; - using id_type = typename sm_traits::id_type; - using end_node = basic_end_node; - using input_char_type = typename sm_traits::input_char_type; - using input_string_token = basic_string_token; - using iteration_node = basic_iteration_node; - using leaf_node = basic_leaf_node; - using tokeniser = - basic_re_tokeniser; - using node = basic_node; - using node_ptr_vector = typename node::node_ptr_vector; - using string = std::basic_string; - using string_token = basic_string_token; - using selection_node = basic_selection_node; - using sequence_node = basic_sequence_node; - using charset_map = std::map; - using charset_pair = std::pair; - using compressed = std::integral_constant; - using token = basic_re_token; - static_assert(std::is_move_assignable::value && - std::is_move_constructible::value, - "token is not movable."); - using token_vector = std::vector; - - basic_parser(const std::locale &locale_, - node_ptr_vector &node_ptr_vector_, - charset_map &charset_map_, const id_type eoi_) : - _locale(locale_), - _node_ptr_vector(node_ptr_vector_), - _charset_map(charset_map_), - _eoi(eoi_), - _token_stack(), - _tree_node_stack() - { - } - - observer_ptr parse(const token_vector ®ex_, const id_type id_, - const id_type user_id_, const id_type next_dfa_, - const id_type push_dfa_, const bool pop_dfa_, - const std::size_t flags_, id_type &nl_id_, const bool seen_bol_) - { - auto iter_ = regex_.cbegin(); - auto end_ = regex_.cend(); - observer_ptr root_ = nullptr; - observer_ptr lhs_token_ = nullptr; - // There cannot be less than 2 tokens - auto rhs_token_ = std::make_unique(*iter_++); - char action_ = 0; - - _token_stack.emplace(std::move(rhs_token_)); - rhs_token_ = std::make_unique(*iter_); - - if (iter_ + 1 != end_) ++iter_; - - do - { - lhs_token_ = _token_stack.top().get(); - action_ = lhs_token_->precedence(rhs_token_->_type); - - switch (action_) - { - case '<': - case '=': - _token_stack.emplace(std::move(rhs_token_)); - rhs_token_ = std::make_unique(*iter_); - - if (iter_ + 1 != end_) ++iter_; - - break; - case '>': - reduce(nl_id_); - break; - default: - { - std::ostringstream ss_; - - ss_ << "A syntax error occurred: '" << - lhs_token_->precedence_string() << - "' against '" << rhs_token_->precedence_string() << - " in rule id " << id_ << '.'; - throw runtime_error(ss_.str()); - break; - } - } - } while (!_token_stack.empty()); - - if (_tree_node_stack.empty()) - { - std::ostringstream ss_; - - ss_ << "Empty rules are not allowed in rule id " << - id_ << '.'; - throw runtime_error(ss_.str()); - } - - assert(_tree_node_stack.size() == 1); - - observer_ptr lhs_node_ = _tree_node_stack.top(); - - _tree_node_stack.pop(); - _node_ptr_vector.emplace_back(std::make_unique - (id_, user_id_, next_dfa_, push_dfa_, pop_dfa_)); - - observer_ptr rhs_node_ = _node_ptr_vector.back().get(); - - _node_ptr_vector.emplace_back(std::make_unique - (lhs_node_, rhs_node_)); - root_ = _node_ptr_vector.back().get(); - - if (seen_bol_) - { - fixup_bol(root_); - } - - if ((flags_ & match_zero_len) == 0) - { - const auto &firstpos_ = root_->firstpos(); - - for (observer_ptr node_ : firstpos_) - { - if (node_->end_state()) - { - std::ostringstream ss_; - - ss_ << "Rules that match zero characters are not allowed " - "as this can cause an infinite loop in user code. The " - "match_zero_len flag overrides this check. Rule id " << - id_ << '.'; - throw runtime_error(ss_.str()); - } - } - } - - return root_; - } - - static id_type bol_token() - { - return static_cast(~1); - } - - static id_type eol_token() - { - return static_cast(~2); - } - -private: - using input_range = typename input_string_token::range; - using range = typename string_token::range; - using string_token_vector = std::vector>; - using token_stack = std::stack>; - using tree_node_stack = typename node::node_stack; - - const std::locale &_locale; - node_ptr_vector &_node_ptr_vector; - charset_map &_charset_map; - id_type _eoi; - token_stack _token_stack; - tree_node_stack _tree_node_stack; - - void reduce(id_type &nl_id_) - { - observer_ptr lhs_ = nullptr; - observer_ptr rhs_ = nullptr; - token_stack handle_; - char action_ = 0; - - do - { - handle_.emplace(); - rhs_ = _token_stack.top().release(); - handle_.top().reset(rhs_); - _token_stack.pop(); - - if (!_token_stack.empty()) - { - lhs_ = _token_stack.top().get(); - action_ = lhs_->precedence(rhs_->_type); - } - } while (!_token_stack.empty() && action_ == '='); - - assert(_token_stack.empty() || action_ == '<'); - - switch (rhs_->_type) - { - case BEGIN: - // finished processing so exit - break; - case REGEX: - // finished parsing, nothing to do - break; - case OREXP: - orexp(handle_); - break; - case SEQUENCE: - _token_stack.emplace(std::make_unique(OREXP)); - break; - case SUB: - sub(handle_); - break; - case EXPRESSION: - _token_stack.emplace(std::make_unique(SUB)); - break; - case REPEAT: - repeat(handle_); - break; - case BOL: - bol(handle_); - break; - case EOL: - eol(handle_, nl_id_); - break; - case CHARSET: - charset(handle_, compressed()); - break; - case OPENPAREN: - openparen(handle_); - break; - case OPT: - case AOPT: - optional(rhs_->_type == OPT); - _token_stack.emplace(std::make_unique(DUP)); - break; - case ZEROORMORE: - case AZEROORMORE: - zero_or_more(rhs_->_type == ZEROORMORE); - _token_stack.emplace(std::make_unique(DUP)); - break; - case ONEORMORE: - case AONEORMORE: - one_or_more(rhs_->_type == ONEORMORE); - _token_stack.emplace(std::make_unique(DUP)); - break; - case REPEATN: - case AREPEATN: - repeatn(rhs_->_type == REPEATN, handle_.top().get()); - _token_stack.emplace(std::make_unique(DUP)); - break; - default: - throw runtime_error - ("Internal error in regex_parser::reduce."); - break; - } - } - - void orexp(token_stack &handle_) - { - assert(handle_.top()->_type == OREXP && - (handle_.size() == 1 || handle_.size() == 3)); - - if (handle_.size() == 1) - { - _token_stack.emplace(std::make_unique(REGEX)); - } - else - { - handle_.pop(); - assert(handle_.top()->_type == OR); - handle_.pop(); - assert(handle_.top()->_type == SEQUENCE); - perform_or(); - _token_stack.emplace(std::make_unique(OREXP)); - } - } - - void perform_or() - { - // perform or - observer_ptr rhs_ = _tree_node_stack.top(); - - _tree_node_stack.pop(); - - observer_ptr lhs_ = _tree_node_stack.top(); - - _node_ptr_vector.emplace_back - (std::make_unique(lhs_, rhs_)); - _tree_node_stack.top() = _node_ptr_vector.back().get(); - } - - void sub(token_stack &handle_) - { - assert((handle_.top()->_type == SUB && - handle_.size() == 1) || handle_.size() == 2); - - if (handle_.size() == 1) - { - _token_stack.emplace(std::make_unique(SEQUENCE)); - } - else - { - handle_.pop(); - assert(handle_.top()->_type == EXPRESSION); - // perform join - sequence(); - _token_stack.emplace(std::make_unique(SUB)); - } - } - - void repeat(token_stack &handle_) - { - assert(handle_.top()->_type == REPEAT && - handle_.size() >= 1 && handle_.size() <= 3); - - if (handle_.size() == 1) - { - _token_stack.emplace(std::make_unique(EXPRESSION)); - } - else - { - handle_.pop(); - assert(handle_.top()->_type == DUP); - _token_stack.emplace(std::make_unique(REPEAT)); - } - } - -#ifndef NDEBUG - void bol(token_stack &handle_) -#else - void bol(token_stack &) -#endif - { - assert(handle_.top()->_type == BOL && - handle_.size() == 1); - - // store charset - _node_ptr_vector.emplace_back - (std::make_unique(bol_token(), true)); - _tree_node_stack.push(_node_ptr_vector.back().get()); - _token_stack.emplace(std::make_unique(REPEAT)); - } - -#ifndef NDEBUG - void eol(token_stack &handle_, id_type &nl_id_) -#else - void eol(token_stack &, id_type &nl_id_) -#endif - { - const string_token nl_('\n'); - const id_type temp_nl_id_ = lookup(nl_); - - assert(handle_.top()->_type == EOL && - handle_.size() == 1); - - if (temp_nl_id_ != ~static_cast(0)) - { - nl_id_ = temp_nl_id_; - } - - // store charset - _node_ptr_vector.emplace_back - (std::make_unique(eol_token(), true)); - _tree_node_stack.push(_node_ptr_vector.back().get()); - _token_stack.emplace(std::make_unique(REPEAT)); - } - - // Uncompressed - void charset(token_stack &handle_, const std::false_type &) - { - assert(handle_.top()->_type == CHARSET && - handle_.size() == 1); - - const id_type id_ = lookup(handle_.top()->_str); - - // store charset - _node_ptr_vector.emplace_back(std::make_unique(id_, true)); - _tree_node_stack.push(_node_ptr_vector.back().get()); - _token_stack.emplace(std::make_unique(REPEAT)); - } - - // Compressed - void charset(token_stack &handle_, const std::true_type &) - { - assert(handle_.top()->_type == CHARSET && - handle_.size() == 1); - - std::unique_ptr token_(handle_.top().release()); - - handle_.pop(); - create_sequence(token_); - } - - // Slice wchar_t into sequence of char. - void create_sequence(std::unique_ptr &token_) - { - string_token_vector data_[char_24_bit ? 3 : 2]; - - for (const input_range &range_ : token_->_str._ranges) - { - slice_range(range_, data_, - std::integral_constant()); - } - - push_ranges(data_, std::integral_constant()); - - _token_stack.emplace(std::make_unique(OPENPAREN)); - _token_stack.emplace(std::make_unique(REGEX)); - _token_stack.emplace(std::make_unique(CLOSEPAREN)); - } - - // 16 bit unicode - void slice_range(const input_range &range_, string_token_vector data_[2], - const std::false_type &) - { - const unsigned char first_msb_ = static_cast - ((range_.first >> 8) & 0xff); - const unsigned char first_lsb_ = static_cast - (range_.first & 0xff); - const unsigned char second_msb_ = static_cast - ((range_.second >> 8) & 0xff); - const unsigned char second_lsb_ = static_cast - (range_.second & 0xff); - - if (first_msb_ == second_msb_) - { - insert_range(first_msb_, first_msb_, first_lsb_, - second_lsb_, data_); - } - else - { - insert_range(first_msb_, first_msb_, first_lsb_, 0xff, data_); - - if (second_msb_ > first_msb_ + 1) - { - insert_range(first_msb_ + 1, second_msb_ - 1, 0, 0xff, data_); - } - - insert_range(second_msb_, second_msb_, 0, second_lsb_, data_); - } - } - - // 24 bit unicode - void slice_range(const input_range &range_, string_token_vector data_[3], - const std::true_type &) - { - const unsigned char first_msb_ = static_cast - ((range_.first >> 16) & 0xff); - const unsigned char first_mid_ = static_cast - ((range_.first >> 8) & 0xff); - const unsigned char first_lsb_ = static_cast - (range_.first & 0xff); - const unsigned char second_msb_ = static_cast - ((range_.second >> 16) & 0xff); - const unsigned char second_mid_ = static_cast - ((range_.second >> 8) & 0xff); - const unsigned char second_lsb_ = static_cast - (range_.second & 0xff); - - if (first_msb_ == second_msb_) - { - string_token_vector data2_[2]; - - // Re-use 16 bit slice function - slice_range(range_, data2_, std::false_type()); - - for (std::size_t i_ = 0, size_ = data2_[0].size(); - i_ < size_; ++i_) - { - insert_range(string_token(first_msb_, first_msb_), - *data2_[0][i_], *data2_[1][i_], data_); - } - } - else - { - insert_range(first_msb_, first_msb_, - first_mid_, first_mid_, - first_lsb_, 0xff, data_); - - if (first_mid_ != 0xff) - { - insert_range(first_msb_, first_msb_, - first_mid_ + 1, 0xff, - 0, 0xff, data_); - } - - if (second_msb_ > first_msb_ + 1) - { - insert_range(first_mid_ + 1, second_mid_ - 1, - 0, 0xff, - 0, 0xff, data_); - } - - if (second_mid_ != 0) - { - insert_range(second_msb_, second_msb_, - 0, second_mid_ - 1, - 0, 0xff, data_); - insert_range(second_msb_, second_msb_, - second_mid_, second_mid_, - 0, second_lsb_, data_); - } - else - { - insert_range(second_msb_, second_msb_, - 0, second_mid_, - 0, second_lsb_, data_); - } - } - } - - // 16 bit unicode - void insert_range(const unsigned char first_, const unsigned char second_, - const unsigned char first2_, const unsigned char second2_, - string_token_vector data_[2]) - { - const string_token token_(first_ > second_ ? second_ : first_, - first_ > second_ ? first_ : second_); - const string_token token2_(first2_ > second2_ ? second2_ : first2_, - first2_ > second2_ ? first2_ : second2_); - - insert_range(token_, token2_, data_); - } - - void insert_range(const string_token &token_, const string_token &token2_, - string_token_vector data_[2]) - { - typename string_token_vector::const_iterator iter_ = - std::find_if(data_[0].begin(), data_[0].end(), - [&token_](const std::unique_ptr &rhs_) - { - return token_ == *rhs_.get(); - }); - - if (iter_ == data_[0].end()) - { - data_[0].emplace_back(std::make_unique(token_)); - data_[1].emplace_back(std::make_unique(token2_)); - } - else - { - const std::size_t index_ = iter_ - data_[0].begin(); - - data_[1][index_]->insert(token2_); - } - } - - // 24 bit unicode - void insert_range(const unsigned char first_, const unsigned char second_, - const unsigned char first2_, const unsigned char second2_, - const unsigned char first3_, const unsigned char second3_, - string_token_vector data_[3]) - { - const string_token token_(first_ > second_ ? second_ : first_, - first_ > second_ ? first_ : second_); - const string_token token2_(first2_ > second2_ ? second2_ : first2_, - first2_ > second2_ ? first2_ : second2_); - const string_token token3_(first3_ > second3_ ? second3_ : first3_, - first3_ > second3_ ? first3_ : second3_); - - insert_range(token_, token2_, token3_, data_); - } - - void insert_range(const string_token &token_, const string_token &token2_, - const string_token &token3_, string_token_vector data_[3]) - { - auto iter_ = data_[0].cbegin(); - auto end_ = data_[0].cend(); - bool finished_ = false; - - do - { - iter_ = std::find_if(iter_, end_, - [&token_](const std::unique_ptr &rhs_) - { - return token_ == *rhs_.get(); - }); - - if (iter_ == end_) - { - data_[0].emplace_back(std::make_unique(token_)); - data_[1].emplace_back(std::make_unique(token2_)); - data_[2].emplace_back(std::make_unique(token3_)); - finished_ = true; - } - else - { - const std::size_t index_ = iter_ - data_[0].begin(); - - if (*data_[1][index_] == token2_) - { - data_[2][index_]->insert(token3_); - finished_ = true; - } - else - { - ++iter_; - } - } - } while (!finished_); - } - - // 16 bit unicode - void push_ranges(string_token_vector data_[2], const std::false_type &) - { - auto viter_ = data_[0].cbegin(); - auto vend_ = data_[0].cend(); - auto viter2_ = data_[1].cbegin(); - - push_range(viter_++->get()); - push_range(viter2_++->get()); - sequence(); - - while (viter_ != vend_) - { - push_range(viter_++->get()); - push_range(viter2_++->get()); - sequence(); - perform_or(); - } - } - - // 24 bit unicode - void push_ranges(string_token_vector data_[3], const std::true_type &) - { - auto viter_ = data_[0].cbegin(); - auto vend_ = data_[0].cend(); - auto viter2_ = data_[1].cbegin(); - auto viter3_ = data_[2].cbegin(); - - push_range(viter_++->get()); - push_range(viter2_++->get()); - sequence(); - push_range(viter3_++->get()); - sequence(); - - while (viter_ != vend_) - { - push_range(viter_++->get()); - push_range(viter2_++->get()); - sequence(); - push_range(viter3_++->get()); - sequence(); - perform_or(); - } - } - - void push_range(observer_ptr token_) - { - const id_type id_ = lookup(*token_); - - _node_ptr_vector.emplace_back(std::make_unique(id_, true)); - _tree_node_stack.push(_node_ptr_vector.back().get()); - } - - id_type lookup(const string_token &charset_) - { - // Converted to id_type below. - std::size_t id_ = sm_traits::npos(); - - if (static_cast(id_) < id_) - { - throw runtime_error("id_type is not large enough " - "to hold all ids."); - } - - typename charset_map::const_iterator iter_ = - _charset_map.find(charset_); - - if (iter_ == _charset_map.end()) - { - id_ = _charset_map.size(); - _charset_map.insert(charset_pair(charset_, - static_cast(id_))); - } - else - { - id_ = iter_->second; - } - - return static_cast(id_); - } - - void openparen(token_stack &handle_) - { - assert(handle_.top()->_type == OPENPAREN && - handle_.size() == 3); - - handle_.pop(); - assert(handle_.top()->_type == REGEX); - handle_.pop(); - assert(handle_.top()->_type == CLOSEPAREN); - _token_stack.emplace(std::make_unique(REPEAT)); - } - - void sequence() - { - observer_ptr rhs_ = _tree_node_stack.top(); - - _tree_node_stack.pop(); - - observer_ptr lhs_ = _tree_node_stack.top(); - - _node_ptr_vector.emplace_back - (std::make_unique(lhs_, rhs_)); - _tree_node_stack.top() = _node_ptr_vector.back().get(); - } - - void optional(const bool greedy_) - { - // perform ? - observer_ptr lhs_ = _tree_node_stack.top(); - // Don't know if lhs_ is a leaf_node, so get firstpos. - auto &firstpos_ = lhs_->firstpos(); - - for (observer_ptr node_ : firstpos_) - { - // These are leaf_nodes! - node_->greedy(greedy_); - } - - _node_ptr_vector.emplace_back(std::make_unique - (node::null_token(), greedy_)); - - observer_ptr rhs_ = _node_ptr_vector.back().get(); - - _node_ptr_vector.emplace_back - (std::make_unique(lhs_, rhs_)); - _tree_node_stack.top() = _node_ptr_vector.back().get(); - } - - void zero_or_more(const bool greedy_) - { - // perform * - observer_ptr ptr_ = _tree_node_stack.top(); - - _node_ptr_vector.emplace_back - (std::make_unique(ptr_, greedy_)); - _tree_node_stack.top() = _node_ptr_vector.back().get(); - } - - void one_or_more(const bool greedy_) - { - // perform + - observer_ptr lhs_ = _tree_node_stack.top(); - observer_ptr copy_ = lhs_->copy(_node_ptr_vector); - - _node_ptr_vector.emplace_back(std::make_unique - (copy_, greedy_)); - - observer_ptr rhs_ = _node_ptr_vector.back().get(); - - _node_ptr_vector.emplace_back - (std::make_unique(lhs_, rhs_)); - _tree_node_stack.top() = _node_ptr_vector.back().get(); - } - - // perform {n[,[m]]} - // Semantic checks have already been performed. - // {0,} = * - // {0,1} = ? - // {1,} = + - // therefore we do not check for these cases. - void repeatn(const bool greedy_, observer_ptr token_) - { - const rules_char_type *str_ = token_->_extra.c_str(); - std::size_t min_ = 0; - bool comma_ = false; - std::size_t max_ = 0; - - while (*str_>= '0' && *str_ <= '9') - { - min_ *= 10; - min_ += *str_ - '0'; - ++str_; - } - - comma_ = *str_ == ','; - - if (comma_) ++str_; - - while (*str_>= '0' && *str_ <= '9') - { - max_ *= 10; - max_ += *str_ - '0'; - ++str_; - } - - if (!(min_ == 1 && !comma_)) - { - const std::size_t top_ = min_ > 0 ? min_ : max_; - - if (min_ == 0) - { - optional(greedy_); - } - - observer_ptr prev_ = _tree_node_stack.top()-> - copy(_node_ptr_vector); - observer_ptr curr_ = nullptr; - - for (std::size_t i_ = 2; i_ < top_; ++i_) - { - curr_ = prev_->copy(_node_ptr_vector); - _tree_node_stack.push(prev_); - sequence(); - prev_ = curr_; - } - - if (comma_ && min_ > 0) - { - if (min_ > 1) - { - curr_ = prev_->copy(_node_ptr_vector); - _tree_node_stack.push(prev_); - sequence(); - prev_ = curr_; - } - - if (comma_ && max_) - { - _tree_node_stack.push(prev_); - optional(greedy_); - prev_ = _tree_node_stack.top(); - _tree_node_stack.pop(); - - const std::size_t count_ = max_ - min_; - - for (std::size_t i_ = 1; i_ < count_; ++i_) - { - curr_ = prev_->copy(_node_ptr_vector); - _tree_node_stack.push(prev_); - sequence(); - prev_ = curr_; - } - } - else - { - _tree_node_stack.push(prev_); - zero_or_more(greedy_); - prev_ = _tree_node_stack.top(); - _tree_node_stack.pop(); - } - } - - _tree_node_stack.push(prev_); - sequence(); - } - } - - void fixup_bol(observer_ptr &root_)const - { - const auto &first_ = root_->firstpos(); - bool found_ = false; - - for (observer_ptr node_ : first_) - { - found_ = !node_->end_state() && node_->token() == bol_token(); - - if (found_) break; - } - - if (!found_) - { - _node_ptr_vector.emplace_back - (std::make_unique(bol_token(), true)); - - observer_ptr lhs_ = _node_ptr_vector.back().get(); - - _node_ptr_vector.emplace_back - (std::make_unique(node::null_token(), true)); - - observer_ptr rhs_ = _node_ptr_vector.back().get(); - - _node_ptr_vector.emplace_back - (std::make_unique(lhs_, rhs_)); - lhs_ = _node_ptr_vector.back().get(); - - _node_ptr_vector.emplace_back - (std::make_unique(lhs_, root_)); - root_ = _node_ptr_vector.back().get(); - } - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp deleted file mode 100644 index 271a7b1a..00000000 --- a/YACReaderLibrary/lexertl/parser/tokeniser/re_token.hpp +++ /dev/null @@ -1,100 +0,0 @@ -// re_token.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_RE_TOKEN_HPP -#define LEXERTL_RE_TOKEN_HPP - -#include "../../string_token.hpp" - -namespace lexertl -{ -namespace detail -{ -// Note that tokens following END are never seen by parser.hpp. -enum token_type {BEGIN, REGEX, OREXP, SEQUENCE, SUB, EXPRESSION, REPEAT, - DUP, OR, CHARSET, BOL, EOL, MACRO, OPENPAREN, CLOSEPAREN, OPT, AOPT, - ZEROORMORE, AZEROORMORE, ONEORMORE, AONEORMORE, REPEATN, AREPEATN, - END, DIFF}; - -template -struct basic_re_token -{ - using string_token = basic_string_token; - using string = std::basic_string; - - token_type _type; - string _extra; - string_token _str; - - basic_re_token(const token_type type_ = BEGIN) : - _type(type_), - _extra(), - _str() - { - } - - void clear() - { - _type = BEGIN; - _extra.clear(); - _str.clear(); - } - - void swap(basic_re_token &rhs_) - { - std::swap(_type, rhs_._type); - _extra.swap(rhs_._extra); - _str.swap(rhs_._str); - } - - char precedence(const token_type type_) const - { - // Moved in here for Solaris compiler. - static const char precedence_table_[END + 1][END + 1] = { -// BEG, REG, ORE, SEQ, SUB, EXP, RPT, DUP, | , CHR, BOL, EOL, MCR, ( , ) , ? , ?? , * , *? , + , +?, {n}?, {n}, END -/*BEGIN*/{ ' ', '<', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/*REGEX*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/*OREXP*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* SEQ */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', ' ', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* SUB */{ ' ', ' ', ' ', ' ', ' ', '=', '<', ' ', '>', '<', '<', '<', '<', '<', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/*EXPRE*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* RPT */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', '=', '>', '>', '>', '>', '>', '>', '>', '<', '<', '<', '<', '<', '<', '<', '<', '>' }, -/*DUPLI*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* | */{ ' ', ' ', ' ', '=', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, -/*CHARA*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, -/* BOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, -/* EOL */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, -/*MACRO*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, -/* ( */{ ' ', '=', '<', '<', '<', '<', '<', ' ', ' ', '<', '<', '<', '<', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' }, -/* ) */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>', '>' }, -/* ? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* ?? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* * */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* *? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* + */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* +? */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/*{n,m}*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', '<', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/*{nm}?*/{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>', '>', '>', '>', '>', '>', '>', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '>' }, -/* END */{ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' } -}; - - return precedence_table_[_type][type_]; - } - - const char *precedence_string() const - { - // Moved in here for Solaris compiler. - static const char *precedence_strings_[END + 1] = - {"BEGIN", "REGEX", "OREXP", "SEQUENCE", "SUB", "EXPRESSION", - "REPEAT", "DUPLICATE", "|", "CHARSET", "^", "$", "MACRO", "(", ")", - "?", "??", "*", "*?", "+", "+?", "{n[,[m]]}", "{n[,[m]]}?", "END"}; - - return precedence_strings_[_type]; - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp deleted file mode 100644 index c7e1e52d..00000000 --- a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser.hpp +++ /dev/null @@ -1,778 +0,0 @@ -// tokeniser.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_RE_TOKENISER_HPP -#define LEXERTL_RE_TOKENISER_HPP - -#include -#include "re_token.hpp" -#include "../../runtime_error.hpp" -#include -#include "../../string_token.hpp" -#include "re_tokeniser_helper.hpp" - -namespace lexertl -{ -namespace detail -{ -template -class basic_re_tokeniser -{ -public: - using re_token = basic_re_token; - using tokeniser_helper = - basic_re_tokeniser_helper; - using char_state = typename tokeniser_helper::char_state; - using state = typename tokeniser_helper::state; - using string_token = basic_string_token; - - static void next(re_token &lhs_, state &state_, re_token &token_) - { - rules_char_type ch_ = 0; - bool eos_ = state_.next(ch_); - bool skipped_ = false; - - token_.clear(); - - do - { - // string begin/end - while (!eos_ && ch_ == '"') - { - state_._in_string ^= 1; - eos_ = state_.next(ch_); - } - - if (eos_) break; - - // (?# ...) - skipped_ = comment(eos_, ch_, state_); - - if (eos_) break; - - // skip_ws set - skipped_ |= skip(eos_, ch_, state_); - } while (!eos_ && skipped_); - - if (eos_) - { - if (state_._in_string) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing '\"')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (state_._paren_count) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing ')')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - token_._type = END; - } - else - { - if (ch_ == '\\') - { - // Even if we are in a string, respect escape sequences... - token_._type = CHARSET; - escape(state_, token_._str); - } - else if (state_._in_string) - { - // All other meta characters lose their special meaning - // inside a string. - token_._type = CHARSET; - add_char(ch_, state_, token_._str); - } - else - { - // Not an escape sequence and not inside a string, so - // check for meta characters. - switch (ch_) - { - case '(': - token_._type = OPENPAREN; - ++state_._paren_count; - read_options(state_); - break; - case ')': - --state_._paren_count; - - if (state_._paren_count < 0) - { - std::ostringstream ss_; - - ss_ << "Number of open parenthesis < 0 " - "at index " << state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - token_._type = CLOSEPAREN; - - if (!state_._flags_stack.empty()) - { - state_._flags = state_._flags_stack.top(); - state_._flags_stack.pop(); - } - - break; - case '?': - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AOPT; - state_.increment(); - } - else - { - token_._type = OPT; - } - - break; - case '*': - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AZEROORMORE; - state_.increment(); - } - else - { - token_._type = ZEROORMORE; - } - - break; - case '+': - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AONEORMORE; - state_.increment(); - } - else - { - token_._type = ONEORMORE; - } - - break; - case '{': - open_curly(lhs_, state_, token_); - break; - case '|': - token_._type = OR; - break; - case '^': - if (!state_._macro_name && - state_._curr - 1 == state_._start) - { - token_._type = BOL; - } - else - { - token_._type = CHARSET; - token_._str.insert(range(ch_, ch_)); - } - - break; - case '$': - if (!state_._macro_name && state_._curr == state_._end) - { - token_._type = EOL; - } - else - { - token_._type = CHARSET; - token_._str.insert(range(ch_, ch_)); - } - - break; - case '.': - { - token_._type = CHARSET; - - if (state_._flags & dot_not_newline) - { - token_._str.insert(range('\n', '\n')); - } - else if (state_._flags & dot_not_cr_lf) - { - token_._str.insert(range('\n', '\n')); - token_._str.insert(range('\r', '\r')); - } - - token_._str.negate(); - break; - } - case '[': - { - token_._type = CHARSET; - tokeniser_helper::charset(state_, token_._str); - break; - } - case '/': - { - std::ostringstream ss_; - - ss_ << "Lookahead ('/') is not supported yet"; - state_.error(ss_); - throw runtime_error(ss_.str()); - break; - } - default: - token_._type = CHARSET; - add_char(ch_, state_, token_._str); - break; - } - } - } - } - -private: - using range = typename string_token::range; - - static bool comment(bool &eos_, rules_char_type &ch_, state &state_) - { - bool skipped_ = false; - - if (!state_._in_string && ch_ == '(' && !state_.eos() && - *state_._curr == '?' && state_._curr + 1 < state_._end && - *(state_._curr + 1) == '#') - { - std::size_t paren_count_ = 1; - - state_.increment(); - state_.increment(); - - do - { - eos_ = state_.next(ch_); - - if (ch_ == '(') - { - ++paren_count_; - } - else if (ch_ == ')') - { - --paren_count_; - } - } while (!eos_ && !(ch_ == ')' && paren_count_ == 0)); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (unterminated comment)"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - else - { - eos_ = state_.next(ch_); - } - - skipped_ = true; - } - - return skipped_; - } - - static bool skip(bool &eos_, rules_char_type &ch_, state &state_) - { - bool skipped_ = false; - - if ((state_._flags & skip_ws) && !state_._in_string) - { - bool c_comment_ = false; - bool skip_ws_ = false; - - do - { - c_comment_ = ch_ == '/' && !state_.eos() && - *state_._curr == '*'; - skip_ws_ = !c_comment_ && (ch_ == ' ' || ch_ == '\t' || - ch_ == '\n' || ch_ == '\r' || ch_ == '\f' || ch_ == '\v'); - - if (c_comment_) - { - state_.increment(); - eos_ = state_.next(ch_); - - while (!eos_ && !(ch_ == '*' && !state_.eos() && - *state_._curr == '/')) - { - eos_ = state_.next(ch_); - } - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (unterminated C style comment)"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - else - { - state_.increment(); - eos_ = state_.next(ch_); - } - - skipped_ = true; - } - else if (skip_ws_) - { - eos_ = state_.next(ch_); - skipped_ = true; - } - } while (!eos_ && (c_comment_ || skip_ws_)); - } - - return skipped_; - } - - static void read_options(state &state_) - { - if (!state_.eos() && *state_._curr == '?') - { - rules_char_type ch_ = 0; - bool eos_ = false; - bool negate_ = false; - - state_.increment(); - eos_ = state_.next(ch_); - state_._flags_stack.push(state_._flags); - - while (!eos_ && ch_ != ':') - { - switch (ch_) - { - case '-': - negate_ ^= 1; - break; - case 'i': - if (negate_) - { - state_._flags = state_._flags & ~icase; - } - else - { - state_._flags = state_._flags | icase; - } - - negate_ = false; - break; - case 's': - if (negate_) - { -#ifdef _WIN32 - state_._flags = state_._flags | dot_not_cr_lf; -#else - state_._flags = state_._flags | dot_not_newline; -#endif - } - else - { -#ifdef _WIN32 - state_._flags = state_._flags & ~dot_not_cr_lf; -#else - state_._flags = state_._flags & ~dot_not_newline; -#endif - } - - negate_ = false; - break; - case 'x': - if (negate_) - { - state_._flags = state_._flags & ~skip_ws; - } - else - { - state_._flags = state_._flags | skip_ws; - } - - negate_ = false; - break; - default: - { - std::ostringstream ss_; - - ss_ << "Unknown option at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - eos_ = state_.next(ch_); - } - - // End of string handler will handle early termination - } - else if (!state_._flags_stack.empty()) - { - state_._flags_stack.push(state_._flags); - } - } - - static void escape(state &state_, string_token &token_) - { - char_type ch_ = 0; - std::size_t str_len_ = 0; - const char *str_ = tokeniser_helper::escape_sequence(state_, - ch_, str_len_); - - if (str_) - { - char_state state2_(str_ + 1, str_ + str_len_, state_._id, - state_._flags, state_._locale, 0); - - tokeniser_helper::charset(state2_, token_); - } - else - { - add_char(ch_, state_, token_); - } - } - - static void add_char(const char_type ch_, const state &state_, - string_token &token_) - { - range range_(ch_, ch_); - - token_.insert(range_); - - if (state_._flags & icase) - { - string_token folded_; - - tokeniser_helper::fold(range_, state_._locale, - folded_, typename tokeniser_helper::template - size()); - - if (!folded_.empty()) - { - token_.insert(folded_); - } - } - } - - static void open_curly(re_token &lhs_, state &state_, - re_token &token_) - { - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing '}')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - else if (*state_._curr == '-' || *state_._curr == '+') - { - rules_char_type ch_ = 0; - - if (lhs_._type != CHARSET) - { - std::ostringstream ss_; - - ss_ << "CHARSET must precede {" << - state_._curr << "} at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - state_.next(ch_); - token_._type = DIFF; - token_._extra = ch_; - - if (state_.next(ch_)) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing '}')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (ch_ != '}') - { - std::ostringstream ss_; - - ss_ << "Missing '}' at index " << state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - else if (*state_._curr >= '0' && *state_._curr <= '9') - { - repeat_n(state_, token_); - } - else - { - macro(state_, token_); - } - } - - // SYNTAX: - // {n[,[n]]} - // SEMANTIC RULES: - // {0} - INVALID (throw exception) - // {0,} = * - // {0,0} - INVALID (throw exception) - // {0,1} = ? - // {1,} = + - // {min,max} where min == max - {min} - // {min,max} where max < min - INVALID (throw exception) - static void repeat_n(state &state_, re_token &token_) - { - rules_char_type ch_ = 0; - bool eos_ = state_.next(ch_); - std::size_t min_ = 0; - std::size_t max_ = 0; - - while (!eos_ && ch_ >= '0' && ch_ <= '9') - { - min_ *= 10; - min_ += ch_ - '0'; - token_._extra += ch_; - eos_ = state_.next(ch_); - } - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing repeat terminator '}')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - bool min_max_ = false; - bool repeatn_ = true; - - if (ch_ == ',') - { - token_._extra += ch_; - eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing repeat terminator '}')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (ch_ == '}') - { - // Small optimisation: Check for '*' equivalency. - if (min_ == 0) - { - token_._type = ZEROORMORE; - repeatn_ = false; - } - // Small optimisation: Check for '+' equivalency. - else if (min_ == 1) - { - token_._type = ONEORMORE; - repeatn_ = false; - } - } - else - { - if (ch_ < '0' || ch_ > '9') - { - std::ostringstream ss_; - - ss_ << "Missing repeat terminator '}' at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - min_max_ = true; - - do - { - max_ *= 10; - max_ += ch_ - '0'; - token_._extra += ch_; - eos_ = state_.next(ch_); - } while (!eos_ && ch_ >= '0' && ch_ <= '9'); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing repeat terminator '}')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - // Small optimisation: Check for '?' equivalency. - if (min_ == 0 && max_ == 1) - { - token_._type = OPT; - repeatn_ = false; - } - // Small optimisation: if min == max, then min. - else if (min_ == max_) - { - token_._extra.erase(token_._extra.find(',')); - min_max_ = false; - max_ = 0; - } - } - } - - if (ch_ != '}') - { - std::ostringstream ss_; - - ss_ << "Missing repeat terminator '}' at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (repeatn_) - { - // SEMANTIC VALIDATION follows: - // NOTE: {0,} has already become * - // therefore we don't check for a comma. - if (min_ == 0 && max_ == 0) - { - std::ostringstream ss_; - - ss_ << "Cannot have exactly zero repeats preceding index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (min_max_ && max_ < min_) - { - std::ostringstream ss_; - - ss_ << "Max less than min preceding index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AREPEATN; - state_.increment(); - } - else - { - token_._type = REPEATN; - } - } - else if (token_._type == ZEROORMORE) - { - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AZEROORMORE; - state_.increment(); - } - } - else if (token_._type == ONEORMORE) - { - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AONEORMORE; - state_.increment(); - } - } - else if (token_._type == OPT) - { - if (!state_.eos() && *state_._curr == '?') - { - token_._type = AOPT; - state_.increment(); - } - } - } - - static void macro(state &state_, re_token &token_) - { - rules_char_type ch_ = 0; - bool eos_ = false; - - state_.next(ch_); - - if (ch_ != '_' && !(ch_ >= 'A' && ch_ <= 'Z') && - !(ch_ >= 'a' && ch_ <= 'z')) - { - std::ostringstream ss_; - - ss_ << "Invalid MACRO name at index " << state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - do - { - token_._extra += ch_; - eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing MACRO name terminator '}')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } while (ch_ == '_' || ch_ == '-' || (ch_ >= 'A' && ch_ <= 'Z') || - (ch_ >= 'a' && ch_ <= 'z') || (ch_ >= '0' && ch_ <= '9')); - - if (ch_ != '}') - { - std::ostringstream ss_; - - ss_ << "Missing MACRO name terminator '}' at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - token_._type = MACRO; - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp deleted file mode 100644 index f9f40cfe..00000000 --- a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_helper.hpp +++ /dev/null @@ -1,3157 +0,0 @@ -// tokeniser_helper.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_RE_TOKENISER_HELPER_HPP -#define LEXERTL_RE_TOKENISER_HELPER_HPP - -#include "../../char_traits.hpp" -// strlen() -#include -#include "re_tokeniser_state.hpp" -#include "../../runtime_error.hpp" -#include -#include "../../string_token.hpp" - -namespace lexertl -{ -namespace detail -{ -template > -class basic_re_tokeniser_helper -{ -public: - using char_state = basic_re_tokeniser_state; - using state = basic_re_tokeniser_state; - using string_token = basic_string_token; - using index_type = typename string_token::index_type; - using range = typename string_token::range; - - template - struct size - { - }; - - using one = size<1>; - using two = size<2>; - using four = size<4>; - - template - static const char *escape_sequence(state_type &state_, - char_type &ch_, std::size_t &str_len_) - { - bool eos_ = state_.eos(); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following '\\'"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - const char *str_ = charset_shortcut(state_, str_len_); - - if (str_) - { - state_.increment(); - } - else - { - ch_ = chr(state_); - } - - return str_; - } - - // This function can call itself. - template - static void charset(state_type &state_, string_token &token_) - { - bool negated_ = false; - typename state_type::char_type ch_ = 0; - bool eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following '['"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - negated_ = ch_ == '^'; - - if (negated_) - { - eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following '^'"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - bool chset_ = false; - typename string_token::char_type prev_ = 0; - - do - { - if (ch_ == '\\') - { - std::size_t str_len_ = 0; - const char *str_ = escape_sequence(state_, prev_, - str_len_); - - chset_ = str_ != 0; - - if (chset_) - { - char_state temp_state_(str_ + 1, str_ + str_len_, - state_._id, state_._flags, state_._locale, 0); - string_token temp_token_; - - charset(temp_state_, temp_token_); - token_.insert(temp_token_); - } - } - else if (ch_ == '[' && !state_.eos() && *state_._curr == ':') - { - state_.increment(); - posix(state_, token_); - chset_ = true; - } - else - { - chset_ = false; - prev_ = ch_; - } - - eos_ = state_.next(ch_); - - // Covers preceding if, else if and else - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing ']')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (ch_ == '-' && *state_._curr != ']') - { - charset_range(chset_, state_, eos_, ch_, prev_, - token_); - } - else if (!chset_) - { - range range_(prev_, prev_); - - token_.insert(range_); - - if (state_._flags & icase) - { - string_token folded_; - - fold(range_, state_._locale, folded_, - size()); - - if (!folded_.empty()) - { - token_.insert(folded_); - } - } - } - } while (ch_ != ']'); - - if (negated_) - { - token_.negate(); - } - - if (token_.empty()) - { - std::ostringstream ss_; - - ss_ << "Empty charset not allowed preceding index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - static void fold(const range &range_, const std::locale &locale_, - string_token &out_, const one &) - { - // If string_token::char_type is 16 bit may overflow, - // so use std::size_t. - std::size_t start_ = range_.first; - std::size_t end_ = range_.second; - - // In 8 bit char mode, use locale and therefore consider every char - // individually. - for (; start_ <= end_; ++start_) - { - const input_char_type upper_ = std::toupper - (static_cast(start_), locale_); - const input_char_type lower_ = std::tolower - (static_cast(start_), locale_); - - if (upper_ != static_cast(start_)) - { - out_.insert(range(upper_, upper_)); - } - - if (lower_ != static_cast(start_)) - { - out_.insert(range(lower_, lower_)); - } - } - } - - // http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt - static void fold(const range &range_, const std::locale &, - string_token &out_, const two &) - { - static const fold_pair mapping_[] = - {{{0x0041, 0x005a}, {0x0061, 0x007a}}, - {{0x0061, 0x007a}, {0x0041, 0x005a}}, - {{0x00b5, 0x00b5}, {0x039c, 0x039c}}, - {{0x00c0, 0x00d6}, {0x00e0, 0x00f6}}, - {{0x00d8, 0x00de}, {0x00f8, 0x00fe}}, - {{0x00e0, 0x00f6}, {0x00c0, 0x00d6}}, - {{0x00f8, 0x00fe}, {0x00d8, 0x00de}}, - {{0x00ff, 0x00ff}, {0x0178, 0x0178}}, - {{0x0100, 0x0101}, {0x0101, 0x0100}}, - {{0x0102, 0x0103}, {0x0103, 0x0102}}, - {{0x0104, 0x0105}, {0x0105, 0x0104}}, - {{0x0106, 0x0107}, {0x0107, 0x0106}}, - {{0x0108, 0x0109}, {0x0109, 0x0108}}, - {{0x010a, 0x010b}, {0x010b, 0x010a}}, - {{0x010c, 0x010d}, {0x010d, 0x010c}}, - {{0x010e, 0x010f}, {0x010f, 0x010e}}, - {{0x0110, 0x0111}, {0x0111, 0x0110}}, - {{0x0112, 0x0113}, {0x0113, 0x0112}}, - {{0x0114, 0x0115}, {0x0115, 0x0114}}, - {{0x0116, 0x0117}, {0x0117, 0x0116}}, - {{0x0118, 0x0119}, {0x0119, 0x0118}}, - {{0x011a, 0x011b}, {0x011b, 0x011a}}, - {{0x011c, 0x011d}, {0x011d, 0x011c}}, - {{0x011e, 0x011f}, {0x011f, 0x011e}}, - {{0x0120, 0x0121}, {0x0121, 0x0120}}, - {{0x0122, 0x0123}, {0x0123, 0x0122}}, - {{0x0124, 0x0125}, {0x0125, 0x0124}}, - {{0x0126, 0x0127}, {0x0127, 0x0126}}, - {{0x0128, 0x0129}, {0x0129, 0x0128}}, - {{0x012a, 0x012b}, {0x012b, 0x012a}}, - {{0x012c, 0x012d}, {0x012d, 0x012c}}, - {{0x012e, 0x012f}, {0x012f, 0x012e}}, - {{0x0130, 0x0130}, {0x0069, 0x0069}}, - {{0x0131, 0x0131}, {0x0049, 0x0049}}, - {{0x0132, 0x0133}, {0x0133, 0x0132}}, - {{0x0134, 0x0135}, {0x0135, 0x0134}}, - {{0x0136, 0x0137}, {0x0137, 0x0136}}, - {{0x0139, 0x013a}, {0x013a, 0x0139}}, - {{0x013b, 0x013c}, {0x013c, 0x013b}}, - {{0x013d, 0x013e}, {0x013e, 0x013d}}, - {{0x013f, 0x0140}, {0x0140, 0x013f}}, - {{0x0141, 0x0142}, {0x0142, 0x0141}}, - {{0x0143, 0x0144}, {0x0144, 0x0143}}, - {{0x0145, 0x0146}, {0x0146, 0x0145}}, - {{0x0147, 0x0148}, {0x0148, 0x0147}}, - {{0x014a, 0x014b}, {0x014b, 0x014a}}, - {{0x014c, 0x014d}, {0x014d, 0x014c}}, - {{0x014e, 0x014f}, {0x014f, 0x014e}}, - {{0x0150, 0x0151}, {0x0151, 0x0150}}, - {{0x0152, 0x0153}, {0x0153, 0x0152}}, - {{0x0154, 0x0155}, {0x0155, 0x0154}}, - {{0x0156, 0x0157}, {0x0157, 0x0156}}, - {{0x0158, 0x0159}, {0x0159, 0x0158}}, - {{0x015a, 0x015b}, {0x015b, 0x015a}}, - {{0x015c, 0x015d}, {0x015d, 0x015c}}, - {{0x015e, 0x015f}, {0x015f, 0x015e}}, - {{0x0160, 0x0161}, {0x0161, 0x0160}}, - {{0x0162, 0x0163}, {0x0163, 0x0162}}, - {{0x0164, 0x0165}, {0x0165, 0x0164}}, - {{0x0166, 0x0167}, {0x0167, 0x0166}}, - {{0x0168, 0x0169}, {0x0169, 0x0168}}, - {{0x016a, 0x016b}, {0x016b, 0x016a}}, - {{0x016c, 0x016d}, {0x016d, 0x016c}}, - {{0x016e, 0x016f}, {0x016f, 0x016e}}, - {{0x0170, 0x0171}, {0x0171, 0x0170}}, - {{0x0172, 0x0173}, {0x0173, 0x0172}}, - {{0x0174, 0x0175}, {0x0175, 0x0174}}, - {{0x0176, 0x0177}, {0x0177, 0x0176}}, - {{0x0178, 0x0178}, {0x00ff, 0x00ff}}, - {{0x0179, 0x017a}, {0x017a, 0x0179}}, - {{0x017b, 0x017c}, {0x017c, 0x017b}}, - {{0x017d, 0x017e}, {0x017e, 0x017d}}, - {{0x017f, 0x017f}, {0x0053, 0x0053}}, - {{0x0180, 0x0180}, {0x0243, 0x0243}}, - {{0x0181, 0x0181}, {0x0253, 0x0253}}, - {{0x0182, 0x0183}, {0x0183, 0x0182}}, - {{0x0184, 0x0185}, {0x0185, 0x0184}}, - {{0x0186, 0x0186}, {0x0254, 0x0254}}, - {{0x0187, 0x0188}, {0x0188, 0x0187}}, - {{0x0189, 0x018a}, {0x0256, 0x0257}}, - {{0x018b, 0x018c}, {0x018c, 0x018b}}, - {{0x018e, 0x018e}, {0x01dd, 0x01dd}}, - {{0x018f, 0x018f}, {0x0259, 0x0259}}, - {{0x0190, 0x0190}, {0x025b, 0x025b}}, - {{0x0191, 0x0192}, {0x0192, 0x0191}}, - {{0x0193, 0x0193}, {0x0260, 0x0260}}, - {{0x0194, 0x0194}, {0x0263, 0x0263}}, - {{0x0195, 0x0195}, {0x01f6, 0x01f6}}, - {{0x0196, 0x0196}, {0x0269, 0x0269}}, - {{0x0197, 0x0197}, {0x0268, 0x0268}}, - {{0x0198, 0x0199}, {0x0199, 0x0198}}, - {{0x019a, 0x019a}, {0x023d, 0x023d}}, - {{0x019c, 0x019c}, {0x026f, 0x026f}}, - {{0x019d, 0x019d}, {0x0272, 0x0272}}, - {{0x019e, 0x019e}, {0x0220, 0x0220}}, - {{0x019f, 0x019f}, {0x0275, 0x0275}}, - {{0x01a0, 0x01a1}, {0x01a1, 0x01a0}}, - {{0x01a2, 0x01a3}, {0x01a3, 0x01a2}}, - {{0x01a4, 0x01a5}, {0x01a5, 0x01a4}}, - {{0x01a6, 0x01a6}, {0x0280, 0x0280}}, - {{0x01a7, 0x01a8}, {0x01a8, 0x01a7}}, - {{0x01a9, 0x01a9}, {0x0283, 0x0283}}, - {{0x01ac, 0x01ad}, {0x01ad, 0x01ac}}, - {{0x01ae, 0x01ae}, {0x0288, 0x0288}}, - {{0x01af, 0x01b0}, {0x01b0, 0x01af}}, - {{0x01b1, 0x01b2}, {0x028a, 0x028b}}, - {{0x01b3, 0x01b4}, {0x01b4, 0x01b3}}, - {{0x01b5, 0x01b6}, {0x01b6, 0x01b5}}, - {{0x01b7, 0x01b7}, {0x0292, 0x0292}}, - {{0x01b8, 0x01b9}, {0x01b9, 0x01b8}}, - {{0x01bc, 0x01bd}, {0x01bd, 0x01bc}}, - {{0x01bf, 0x01bf}, {0x01f7, 0x01f7}}, - {{0x01c4, 0x01c4}, {0x01c6, 0x01c6}}, - {{0x01c6, 0x01c6}, {0x01c4, 0x01c4}}, - {{0x01c7, 0x01c7}, {0x01c9, 0x01c9}}, - {{0x01c9, 0x01c9}, {0x01c7, 0x01c7}}, - {{0x01ca, 0x01ca}, {0x01cc, 0x01cc}}, - {{0x01cc, 0x01cc}, {0x01ca, 0x01ca}}, - {{0x01cd, 0x01ce}, {0x01ce, 0x01cd}}, - {{0x01cf, 0x01d0}, {0x01d0, 0x01cf}}, - {{0x01d1, 0x01d2}, {0x01d2, 0x01d1}}, - {{0x01d3, 0x01d4}, {0x01d4, 0x01d3}}, - {{0x01d5, 0x01d6}, {0x01d6, 0x01d5}}, - {{0x01d7, 0x01d8}, {0x01d8, 0x01d7}}, - {{0x01d9, 0x01da}, {0x01da, 0x01d9}}, - {{0x01db, 0x01dc}, {0x01dc, 0x01db}}, - {{0x01dd, 0x01dd}, {0x018e, 0x018e}}, - {{0x01de, 0x01df}, {0x01df, 0x01de}}, - {{0x01e0, 0x01e1}, {0x01e1, 0x01e0}}, - {{0x01e2, 0x01e3}, {0x01e3, 0x01e2}}, - {{0x01e4, 0x01e5}, {0x01e5, 0x01e4}}, - {{0x01e6, 0x01e7}, {0x01e7, 0x01e6}}, - {{0x01e8, 0x01e9}, {0x01e9, 0x01e8}}, - {{0x01ea, 0x01eb}, {0x01eb, 0x01ea}}, - {{0x01ec, 0x01ed}, {0x01ed, 0x01ec}}, - {{0x01ee, 0x01ef}, {0x01ef, 0x01ee}}, - {{0x01f1, 0x01f1}, {0x01f3, 0x01f3}}, - {{0x01f3, 0x01f3}, {0x01f1, 0x01f1}}, - {{0x01f4, 0x01f5}, {0x01f5, 0x01f4}}, - {{0x01f6, 0x01f6}, {0x0195, 0x0195}}, - {{0x01f7, 0x01f7}, {0x01bf, 0x01bf}}, - {{0x01f8, 0x01f9}, {0x01f9, 0x01f8}}, - {{0x01fa, 0x01fb}, {0x01fb, 0x01fa}}, - {{0x01fc, 0x01fd}, {0x01fd, 0x01fc}}, - {{0x01fe, 0x01ff}, {0x01ff, 0x01fe}}, - {{0x0200, 0x0201}, {0x0201, 0x0200}}, - {{0x0202, 0x0203}, {0x0203, 0x0202}}, - {{0x0204, 0x0205}, {0x0205, 0x0204}}, - {{0x0206, 0x0207}, {0x0207, 0x0206}}, - {{0x0208, 0x0209}, {0x0209, 0x0208}}, - {{0x020a, 0x020b}, {0x020b, 0x020a}}, - {{0x020c, 0x020d}, {0x020d, 0x020c}}, - {{0x020e, 0x020f}, {0x020f, 0x020e}}, - {{0x0210, 0x0211}, {0x0211, 0x0210}}, - {{0x0212, 0x0213}, {0x0213, 0x0212}}, - {{0x0214, 0x0215}, {0x0215, 0x0214}}, - {{0x0216, 0x0217}, {0x0217, 0x0216}}, - {{0x0218, 0x0219}, {0x0219, 0x0218}}, - {{0x021a, 0x021b}, {0x021b, 0x021a}}, - {{0x021c, 0x021d}, {0x021d, 0x021c}}, - {{0x021e, 0x021f}, {0x021f, 0x021e}}, - {{0x0220, 0x0220}, {0x019e, 0x019e}}, - {{0x0222, 0x0223}, {0x0223, 0x0222}}, - {{0x0224, 0x0225}, {0x0225, 0x0224}}, - {{0x0226, 0x0227}, {0x0227, 0x0226}}, - {{0x0228, 0x0229}, {0x0229, 0x0228}}, - {{0x022a, 0x022b}, {0x022b, 0x022a}}, - {{0x022c, 0x022d}, {0x022d, 0x022c}}, - {{0x022e, 0x022f}, {0x022f, 0x022e}}, - {{0x0230, 0x0231}, {0x0231, 0x0230}}, - {{0x0232, 0x0233}, {0x0233, 0x0232}}, - {{0x023a, 0x023a}, {0x2c65, 0x2c65}}, - {{0x023b, 0x023c}, {0x023c, 0x023b}}, - {{0x023d, 0x023d}, {0x019a, 0x019a}}, - {{0x023e, 0x023e}, {0x2c66, 0x2c66}}, - {{0x023f, 0x0240}, {0x2c7e, 0x2c7f}}, - {{0x0241, 0x0242}, {0x0242, 0x0241}}, - {{0x0243, 0x0243}, {0x0180, 0x0180}}, - {{0x0244, 0x0244}, {0x0289, 0x0289}}, - {{0x0245, 0x0245}, {0x028c, 0x028c}}, - {{0x0246, 0x0247}, {0x0247, 0x0246}}, - {{0x0248, 0x0249}, {0x0249, 0x0248}}, - {{0x024a, 0x024b}, {0x024b, 0x024a}}, - {{0x024c, 0x024d}, {0x024d, 0x024c}}, - {{0x024e, 0x024f}, {0x024f, 0x024e}}, - {{0x0250, 0x0250}, {0x2c6f, 0x2c6f}}, - {{0x0251, 0x0251}, {0x2c6d, 0x2c6d}}, - {{0x0252, 0x0252}, {0x2c70, 0x2c70}}, - {{0x0253, 0x0253}, {0x0181, 0x0181}}, - {{0x0254, 0x0254}, {0x0186, 0x0186}}, - {{0x0256, 0x0257}, {0x0189, 0x018a}}, - {{0x0259, 0x0259}, {0x018f, 0x018f}}, - {{0x025b, 0x025b}, {0x0190, 0x0190}}, - {{0x025c, 0x025c}, {0xa7ab, 0xa7ab}}, - {{0x0260, 0x0260}, {0x0193, 0x0193}}, - {{0x0261, 0x0261}, {0xa7ac, 0xa7ac}}, - {{0x0263, 0x0263}, {0x0194, 0x0194}}, - {{0x0265, 0x0265}, {0xa78d, 0xa78d}}, - {{0x0266, 0x0266}, {0xa7aa, 0xa7aa}}, - {{0x0268, 0x0269}, {0x0197, 0x0196}}, - {{0x026b, 0x026b}, {0x2c62, 0x2c62}}, - {{0x026c, 0x026c}, {0xa7ad, 0xa7ad}}, - {{0x026f, 0x026f}, {0x019c, 0x019c}}, - {{0x0271, 0x0271}, {0x2c6e, 0x2c6e}}, - {{0x0272, 0x0272}, {0x019d, 0x019d}}, - {{0x0275, 0x0275}, {0x019f, 0x019f}}, - {{0x027d, 0x027d}, {0x2c64, 0x2c64}}, - {{0x0280, 0x0280}, {0x01a6, 0x01a6}}, - {{0x0283, 0x0283}, {0x01a9, 0x01a9}}, - {{0x0287, 0x0287}, {0xa7b1, 0xa7b1}}, - {{0x0288, 0x0288}, {0x01ae, 0x01ae}}, - {{0x0289, 0x0289}, {0x0244, 0x0244}}, - {{0x028a, 0x028b}, {0x01b1, 0x01b2}}, - {{0x028c, 0x028c}, {0x0245, 0x0245}}, - {{0x0292, 0x0292}, {0x01b7, 0x01b7}}, - {{0x029d, 0x029d}, {0xa7b2, 0xa7b2}}, - {{0x029e, 0x029e}, {0xa7b0, 0xa7b0}}, - {{0x0370, 0x0371}, {0x0371, 0x0370}}, - {{0x0372, 0x0373}, {0x0373, 0x0372}}, - {{0x0376, 0x0377}, {0x0377, 0x0376}}, - {{0x037b, 0x037d}, {0x03fd, 0x03ff}}, - {{0x037f, 0x037f}, {0x03f3, 0x03f3}}, - {{0x0386, 0x0386}, {0x03ac, 0x03ac}}, - {{0x0388, 0x038a}, {0x03ad, 0x03af}}, - {{0x038c, 0x038c}, {0x03cc, 0x03cc}}, - {{0x038e, 0x038f}, {0x03cd, 0x03ce}}, - {{0x0391, 0x03a1}, {0x03b1, 0x03c1}}, - {{0x03a3, 0x03ab}, {0x03c3, 0x03cb}}, - {{0x03ac, 0x03ac}, {0x0386, 0x0386}}, - {{0x03ad, 0x03af}, {0x0388, 0x038a}}, - {{0x03b1, 0x03c1}, {0x0391, 0x03a1}}, - {{0x03c2, 0x03c2}, {0x03a3, 0x03a3}}, - {{0x03c3, 0x03cb}, {0x03a3, 0x03ab}}, - {{0x03cc, 0x03cc}, {0x038c, 0x038c}}, - {{0x03cd, 0x03ce}, {0x038e, 0x038f}}, - {{0x03cf, 0x03cf}, {0x03d7, 0x03d7}}, - {{0x03d0, 0x03d0}, {0x0392, 0x0392}}, - {{0x03d1, 0x03d1}, {0x0398, 0x0398}}, - {{0x03d5, 0x03d5}, {0x03a6, 0x03a6}}, - {{0x03d6, 0x03d6}, {0x03a0, 0x03a0}}, - {{0x03d7, 0x03d7}, {0x03cf, 0x03cf}}, - {{0x03d8, 0x03d9}, {0x03d9, 0x03d8}}, - {{0x03da, 0x03db}, {0x03db, 0x03da}}, - {{0x03dc, 0x03dd}, {0x03dd, 0x03dc}}, - {{0x03de, 0x03df}, {0x03df, 0x03de}}, - {{0x03e0, 0x03e1}, {0x03e1, 0x03e0}}, - {{0x03e2, 0x03e3}, {0x03e3, 0x03e2}}, - {{0x03e4, 0x03e5}, {0x03e5, 0x03e4}}, - {{0x03e6, 0x03e7}, {0x03e7, 0x03e6}}, - {{0x03e8, 0x03e9}, {0x03e9, 0x03e8}}, - {{0x03ea, 0x03eb}, {0x03eb, 0x03ea}}, - {{0x03ec, 0x03ed}, {0x03ed, 0x03ec}}, - {{0x03ee, 0x03ef}, {0x03ef, 0x03ee}}, - {{0x03f0, 0x03f0}, {0x039a, 0x039a}}, - {{0x03f1, 0x03f1}, {0x03a1, 0x03a1}}, - {{0x03f2, 0x03f2}, {0x03f9, 0x03f9}}, - {{0x03f3, 0x03f3}, {0x037f, 0x037f}}, - {{0x03f4, 0x03f4}, {0x03b8, 0x03b8}}, - {{0x03f5, 0x03f5}, {0x0395, 0x0395}}, - {{0x03f7, 0x03f8}, {0x03f8, 0x03f7}}, - {{0x03f9, 0x03f9}, {0x03f2, 0x03f2}}, - {{0x03fa, 0x03fb}, {0x03fb, 0x03fa}}, - {{0x03fd, 0x03ff}, {0x037b, 0x037d}}, - {{0x0400, 0x040f}, {0x0450, 0x045f}}, - {{0x0410, 0x042f}, {0x0430, 0x044f}}, - {{0x0430, 0x044f}, {0x0410, 0x042f}}, - {{0x0450, 0x045f}, {0x0400, 0x040f}}, - {{0x0460, 0x0461}, {0x0461, 0x0460}}, - {{0x0462, 0x0463}, {0x0463, 0x0462}}, - {{0x0464, 0x0465}, {0x0465, 0x0464}}, - {{0x0466, 0x0467}, {0x0467, 0x0466}}, - {{0x0468, 0x0469}, {0x0469, 0x0468}}, - {{0x046a, 0x046b}, {0x046b, 0x046a}}, - {{0x046c, 0x046d}, {0x046d, 0x046c}}, - {{0x046e, 0x046f}, {0x046f, 0x046e}}, - {{0x0470, 0x0471}, {0x0471, 0x0470}}, - {{0x0472, 0x0473}, {0x0473, 0x0472}}, - {{0x0474, 0x0475}, {0x0475, 0x0474}}, - {{0x0476, 0x0477}, {0x0477, 0x0476}}, - {{0x0478, 0x0479}, {0x0479, 0x0478}}, - {{0x047a, 0x047b}, {0x047b, 0x047a}}, - {{0x047c, 0x047d}, {0x047d, 0x047c}}, - {{0x047e, 0x047f}, {0x047f, 0x047e}}, - {{0x0480, 0x0481}, {0x0481, 0x0480}}, - {{0x048a, 0x048b}, {0x048b, 0x048a}}, - {{0x048c, 0x048d}, {0x048d, 0x048c}}, - {{0x048e, 0x048f}, {0x048f, 0x048e}}, - {{0x0490, 0x0491}, {0x0491, 0x0490}}, - {{0x0492, 0x0493}, {0x0493, 0x0492}}, - {{0x0494, 0x0495}, {0x0495, 0x0494}}, - {{0x0496, 0x0497}, {0x0497, 0x0496}}, - {{0x0498, 0x0499}, {0x0499, 0x0498}}, - {{0x049a, 0x049b}, {0x049b, 0x049a}}, - {{0x049c, 0x049d}, {0x049d, 0x049c}}, - {{0x049e, 0x049f}, {0x049f, 0x049e}}, - {{0x04a0, 0x04a1}, {0x04a1, 0x04a0}}, - {{0x04a2, 0x04a3}, {0x04a3, 0x04a2}}, - {{0x04a4, 0x04a5}, {0x04a5, 0x04a4}}, - {{0x04a6, 0x04a7}, {0x04a7, 0x04a6}}, - {{0x04a8, 0x04a9}, {0x04a9, 0x04a8}}, - {{0x04aa, 0x04ab}, {0x04ab, 0x04aa}}, - {{0x04ac, 0x04ad}, {0x04ad, 0x04ac}}, - {{0x04ae, 0x04af}, {0x04af, 0x04ae}}, - {{0x04b0, 0x04b1}, {0x04b1, 0x04b0}}, - {{0x04b2, 0x04b3}, {0x04b3, 0x04b2}}, - {{0x04b4, 0x04b5}, {0x04b5, 0x04b4}}, - {{0x04b6, 0x04b7}, {0x04b7, 0x04b6}}, - {{0x04b8, 0x04b9}, {0x04b9, 0x04b8}}, - {{0x04ba, 0x04bb}, {0x04bb, 0x04ba}}, - {{0x04bc, 0x04bd}, {0x04bd, 0x04bc}}, - {{0x04be, 0x04bf}, {0x04bf, 0x04be}}, - {{0x04c0, 0x04c0}, {0x04cf, 0x04cf}}, - {{0x04c1, 0x04c2}, {0x04c2, 0x04c1}}, - {{0x04c3, 0x04c4}, {0x04c4, 0x04c3}}, - {{0x04c5, 0x04c6}, {0x04c6, 0x04c5}}, - {{0x04c7, 0x04c8}, {0x04c8, 0x04c7}}, - {{0x04c9, 0x04ca}, {0x04ca, 0x04c9}}, - {{0x04cb, 0x04cc}, {0x04cc, 0x04cb}}, - {{0x04cd, 0x04ce}, {0x04ce, 0x04cd}}, - {{0x04cf, 0x04cf}, {0x04c0, 0x04c0}}, - {{0x04d0, 0x04d1}, {0x04d1, 0x04d0}}, - {{0x04d2, 0x04d3}, {0x04d3, 0x04d2}}, - {{0x04d4, 0x04d5}, {0x04d5, 0x04d4}}, - {{0x04d6, 0x04d7}, {0x04d7, 0x04d6}}, - {{0x04d8, 0x04d9}, {0x04d9, 0x04d8}}, - {{0x04da, 0x04db}, {0x04db, 0x04da}}, - {{0x04dc, 0x04dd}, {0x04dd, 0x04dc}}, - {{0x04de, 0x04df}, {0x04df, 0x04de}}, - {{0x04e0, 0x04e1}, {0x04e1, 0x04e0}}, - {{0x04e2, 0x04e3}, {0x04e3, 0x04e2}}, - {{0x04e4, 0x04e5}, {0x04e5, 0x04e4}}, - {{0x04e6, 0x04e7}, {0x04e7, 0x04e6}}, - {{0x04e8, 0x04e9}, {0x04e9, 0x04e8}}, - {{0x04ea, 0x04eb}, {0x04eb, 0x04ea}}, - {{0x04ec, 0x04ed}, {0x04ed, 0x04ec}}, - {{0x04ee, 0x04ef}, {0x04ef, 0x04ee}}, - {{0x04f0, 0x04f1}, {0x04f1, 0x04f0}}, - {{0x04f2, 0x04f3}, {0x04f3, 0x04f2}}, - {{0x04f4, 0x04f5}, {0x04f5, 0x04f4}}, - {{0x04f6, 0x04f7}, {0x04f7, 0x04f6}}, - {{0x04f8, 0x04f9}, {0x04f9, 0x04f8}}, - {{0x04fa, 0x04fb}, {0x04fb, 0x04fa}}, - {{0x04fc, 0x04fd}, {0x04fd, 0x04fc}}, - {{0x04fe, 0x04ff}, {0x04ff, 0x04fe}}, - {{0x0500, 0x0501}, {0x0501, 0x0500}}, - {{0x0502, 0x0503}, {0x0503, 0x0502}}, - {{0x0504, 0x0505}, {0x0505, 0x0504}}, - {{0x0506, 0x0507}, {0x0507, 0x0506}}, - {{0x0508, 0x0509}, {0x0509, 0x0508}}, - {{0x050a, 0x050b}, {0x050b, 0x050a}}, - {{0x050c, 0x050d}, {0x050d, 0x050c}}, - {{0x050e, 0x050f}, {0x050f, 0x050e}}, - {{0x0510, 0x0511}, {0x0511, 0x0510}}, - {{0x0512, 0x0513}, {0x0513, 0x0512}}, - {{0x0514, 0x0515}, {0x0515, 0x0514}}, - {{0x0516, 0x0517}, {0x0517, 0x0516}}, - {{0x0518, 0x0519}, {0x0519, 0x0518}}, - {{0x051a, 0x051b}, {0x051b, 0x051a}}, - {{0x051c, 0x051d}, {0x051d, 0x051c}}, - {{0x051e, 0x051f}, {0x051f, 0x051e}}, - {{0x0520, 0x0521}, {0x0521, 0x0520}}, - {{0x0522, 0x0523}, {0x0523, 0x0522}}, - {{0x0524, 0x0525}, {0x0525, 0x0524}}, - {{0x0526, 0x0527}, {0x0527, 0x0526}}, - {{0x0528, 0x0529}, {0x0529, 0x0528}}, - {{0x052a, 0x052b}, {0x052b, 0x052a}}, - {{0x052c, 0x052d}, {0x052d, 0x052c}}, - {{0x052e, 0x052f}, {0x052f, 0x052e}}, - {{0x0531, 0x0556}, {0x0561, 0x0586}}, - {{0x0561, 0x0586}, {0x0531, 0x0556}}, - {{0x10a0, 0x10c5}, {0x2d00, 0x2d25}}, - {{0x10c7, 0x10c7}, {0x2d27, 0x2d27}}, - {{0x10cd, 0x10cd}, {0x2d2d, 0x2d2d}}, - {{0x13a0, 0x13ef}, {0xab70, 0xabbf}}, - {{0x13f0, 0x13f5}, {0x13f8, 0x13fd}}, - {{0x13f8, 0x13fd}, {0x13f0, 0x13f5}}, - {{0x1d79, 0x1d79}, {0xa77d, 0xa77d}}, - {{0x1d7d, 0x1d7d}, {0x2c63, 0x2c63}}, - {{0x1e00, 0x1e01}, {0x1e01, 0x1e00}}, - {{0x1e02, 0x1e03}, {0x1e03, 0x1e02}}, - {{0x1e04, 0x1e05}, {0x1e05, 0x1e04}}, - {{0x1e06, 0x1e07}, {0x1e07, 0x1e06}}, - {{0x1e08, 0x1e09}, {0x1e09, 0x1e08}}, - {{0x1e0a, 0x1e0b}, {0x1e0b, 0x1e0a}}, - {{0x1e0c, 0x1e0d}, {0x1e0d, 0x1e0c}}, - {{0x1e0e, 0x1e0f}, {0x1e0f, 0x1e0e}}, - {{0x1e10, 0x1e11}, {0x1e11, 0x1e10}}, - {{0x1e12, 0x1e13}, {0x1e13, 0x1e12}}, - {{0x1e14, 0x1e15}, {0x1e15, 0x1e14}}, - {{0x1e16, 0x1e17}, {0x1e17, 0x1e16}}, - {{0x1e18, 0x1e19}, {0x1e19, 0x1e18}}, - {{0x1e1a, 0x1e1b}, {0x1e1b, 0x1e1a}}, - {{0x1e1c, 0x1e1d}, {0x1e1d, 0x1e1c}}, - {{0x1e1e, 0x1e1f}, {0x1e1f, 0x1e1e}}, - {{0x1e20, 0x1e21}, {0x1e21, 0x1e20}}, - {{0x1e22, 0x1e23}, {0x1e23, 0x1e22}}, - {{0x1e24, 0x1e25}, {0x1e25, 0x1e24}}, - {{0x1e26, 0x1e27}, {0x1e27, 0x1e26}}, - {{0x1e28, 0x1e29}, {0x1e29, 0x1e28}}, - {{0x1e2a, 0x1e2b}, {0x1e2b, 0x1e2a}}, - {{0x1e2c, 0x1e2d}, {0x1e2d, 0x1e2c}}, - {{0x1e2e, 0x1e2f}, {0x1e2f, 0x1e2e}}, - {{0x1e30, 0x1e31}, {0x1e31, 0x1e30}}, - {{0x1e32, 0x1e33}, {0x1e33, 0x1e32}}, - {{0x1e34, 0x1e35}, {0x1e35, 0x1e34}}, - {{0x1e36, 0x1e37}, {0x1e37, 0x1e36}}, - {{0x1e38, 0x1e39}, {0x1e39, 0x1e38}}, - {{0x1e3a, 0x1e3b}, {0x1e3b, 0x1e3a}}, - {{0x1e3c, 0x1e3d}, {0x1e3d, 0x1e3c}}, - {{0x1e3e, 0x1e3f}, {0x1e3f, 0x1e3e}}, - {{0x1e40, 0x1e41}, {0x1e41, 0x1e40}}, - {{0x1e42, 0x1e43}, {0x1e43, 0x1e42}}, - {{0x1e44, 0x1e45}, {0x1e45, 0x1e44}}, - {{0x1e46, 0x1e47}, {0x1e47, 0x1e46}}, - {{0x1e48, 0x1e49}, {0x1e49, 0x1e48}}, - {{0x1e4a, 0x1e4b}, {0x1e4b, 0x1e4a}}, - {{0x1e4c, 0x1e4d}, {0x1e4d, 0x1e4c}}, - {{0x1e4e, 0x1e4f}, {0x1e4f, 0x1e4e}}, - {{0x1e50, 0x1e51}, {0x1e51, 0x1e50}}, - {{0x1e52, 0x1e53}, {0x1e53, 0x1e52}}, - {{0x1e54, 0x1e55}, {0x1e55, 0x1e54}}, - {{0x1e56, 0x1e57}, {0x1e57, 0x1e56}}, - {{0x1e58, 0x1e59}, {0x1e59, 0x1e58}}, - {{0x1e5a, 0x1e5b}, {0x1e5b, 0x1e5a}}, - {{0x1e5c, 0x1e5d}, {0x1e5d, 0x1e5c}}, - {{0x1e5e, 0x1e5f}, {0x1e5f, 0x1e5e}}, - {{0x1e60, 0x1e61}, {0x1e61, 0x1e60}}, - {{0x1e62, 0x1e63}, {0x1e63, 0x1e62}}, - {{0x1e64, 0x1e65}, {0x1e65, 0x1e64}}, - {{0x1e66, 0x1e67}, {0x1e67, 0x1e66}}, - {{0x1e68, 0x1e69}, {0x1e69, 0x1e68}}, - {{0x1e6a, 0x1e6b}, {0x1e6b, 0x1e6a}}, - {{0x1e6c, 0x1e6d}, {0x1e6d, 0x1e6c}}, - {{0x1e6e, 0x1e6f}, {0x1e6f, 0x1e6e}}, - {{0x1e70, 0x1e71}, {0x1e71, 0x1e70}}, - {{0x1e72, 0x1e73}, {0x1e73, 0x1e72}}, - {{0x1e74, 0x1e75}, {0x1e75, 0x1e74}}, - {{0x1e76, 0x1e77}, {0x1e77, 0x1e76}}, - {{0x1e78, 0x1e79}, {0x1e79, 0x1e78}}, - {{0x1e7a, 0x1e7b}, {0x1e7b, 0x1e7a}}, - {{0x1e7c, 0x1e7d}, {0x1e7d, 0x1e7c}}, - {{0x1e7e, 0x1e7f}, {0x1e7f, 0x1e7e}}, - {{0x1e80, 0x1e81}, {0x1e81, 0x1e80}}, - {{0x1e82, 0x1e83}, {0x1e83, 0x1e82}}, - {{0x1e84, 0x1e85}, {0x1e85, 0x1e84}}, - {{0x1e86, 0x1e87}, {0x1e87, 0x1e86}}, - {{0x1e88, 0x1e89}, {0x1e89, 0x1e88}}, - {{0x1e8a, 0x1e8b}, {0x1e8b, 0x1e8a}}, - {{0x1e8c, 0x1e8d}, {0x1e8d, 0x1e8c}}, - {{0x1e8e, 0x1e8f}, {0x1e8f, 0x1e8e}}, - {{0x1e90, 0x1e91}, {0x1e91, 0x1e90}}, - {{0x1e92, 0x1e93}, {0x1e93, 0x1e92}}, - {{0x1e94, 0x1e95}, {0x1e95, 0x1e94}}, - {{0x1e9b, 0x1e9b}, {0x1e60, 0x1e60}}, - {{0x1e9e, 0x1e9e}, {0x00df, 0x00df}}, - {{0x1ea0, 0x1ea1}, {0x1ea1, 0x1ea0}}, - {{0x1ea2, 0x1ea3}, {0x1ea3, 0x1ea2}}, - {{0x1ea4, 0x1ea5}, {0x1ea5, 0x1ea4}}, - {{0x1ea6, 0x1ea7}, {0x1ea7, 0x1ea6}}, - {{0x1ea8, 0x1ea9}, {0x1ea9, 0x1ea8}}, - {{0x1eaa, 0x1eab}, {0x1eab, 0x1eaa}}, - {{0x1eac, 0x1ead}, {0x1ead, 0x1eac}}, - {{0x1eae, 0x1eaf}, {0x1eaf, 0x1eae}}, - {{0x1eb0, 0x1eb1}, {0x1eb1, 0x1eb0}}, - {{0x1eb2, 0x1eb3}, {0x1eb3, 0x1eb2}}, - {{0x1eb4, 0x1eb5}, {0x1eb5, 0x1eb4}}, - {{0x1eb6, 0x1eb7}, {0x1eb7, 0x1eb6}}, - {{0x1eb8, 0x1eb9}, {0x1eb9, 0x1eb8}}, - {{0x1eba, 0x1ebb}, {0x1ebb, 0x1eba}}, - {{0x1ebc, 0x1ebd}, {0x1ebd, 0x1ebc}}, - {{0x1ebe, 0x1ebf}, {0x1ebf, 0x1ebe}}, - {{0x1ec0, 0x1ec1}, {0x1ec1, 0x1ec0}}, - {{0x1ec2, 0x1ec3}, {0x1ec3, 0x1ec2}}, - {{0x1ec4, 0x1ec5}, {0x1ec5, 0x1ec4}}, - {{0x1ec6, 0x1ec7}, {0x1ec7, 0x1ec6}}, - {{0x1ec8, 0x1ec9}, {0x1ec9, 0x1ec8}}, - {{0x1eca, 0x1ecb}, {0x1ecb, 0x1eca}}, - {{0x1ecc, 0x1ecd}, {0x1ecd, 0x1ecc}}, - {{0x1ece, 0x1ecf}, {0x1ecf, 0x1ece}}, - {{0x1ed0, 0x1ed1}, {0x1ed1, 0x1ed0}}, - {{0x1ed2, 0x1ed3}, {0x1ed3, 0x1ed2}}, - {{0x1ed4, 0x1ed5}, {0x1ed5, 0x1ed4}}, - {{0x1ed6, 0x1ed7}, {0x1ed7, 0x1ed6}}, - {{0x1ed8, 0x1ed9}, {0x1ed9, 0x1ed8}}, - {{0x1eda, 0x1edb}, {0x1edb, 0x1eda}}, - {{0x1edc, 0x1edd}, {0x1edd, 0x1edc}}, - {{0x1ede, 0x1edf}, {0x1edf, 0x1ede}}, - {{0x1ee0, 0x1ee1}, {0x1ee1, 0x1ee0}}, - {{0x1ee2, 0x1ee3}, {0x1ee3, 0x1ee2}}, - {{0x1ee4, 0x1ee5}, {0x1ee5, 0x1ee4}}, - {{0x1ee6, 0x1ee7}, {0x1ee7, 0x1ee6}}, - {{0x1ee8, 0x1ee9}, {0x1ee9, 0x1ee8}}, - {{0x1eea, 0x1eeb}, {0x1eeb, 0x1eea}}, - {{0x1eec, 0x1eed}, {0x1eed, 0x1eec}}, - {{0x1eee, 0x1eef}, {0x1eef, 0x1eee}}, - {{0x1ef0, 0x1ef1}, {0x1ef1, 0x1ef0}}, - {{0x1ef2, 0x1ef3}, {0x1ef3, 0x1ef2}}, - {{0x1ef4, 0x1ef5}, {0x1ef5, 0x1ef4}}, - {{0x1ef6, 0x1ef7}, {0x1ef7, 0x1ef6}}, - {{0x1ef8, 0x1ef9}, {0x1ef9, 0x1ef8}}, - {{0x1efa, 0x1efb}, {0x1efb, 0x1efa}}, - {{0x1efc, 0x1efd}, {0x1efd, 0x1efc}}, - {{0x1efe, 0x1eff}, {0x1eff, 0x1efe}}, - {{0x1f00, 0x1f07}, {0x1f08, 0x1f0f}}, - {{0x1f08, 0x1f0f}, {0x1f00, 0x1f07}}, - {{0x1f10, 0x1f15}, {0x1f18, 0x1f1d}}, - {{0x1f18, 0x1f1d}, {0x1f10, 0x1f15}}, - {{0x1f20, 0x1f27}, {0x1f28, 0x1f2f}}, - {{0x1f28, 0x1f2f}, {0x1f20, 0x1f27}}, - {{0x1f30, 0x1f37}, {0x1f38, 0x1f3f}}, - {{0x1f38, 0x1f3f}, {0x1f30, 0x1f37}}, - {{0x1f40, 0x1f45}, {0x1f48, 0x1f4d}}, - {{0x1f48, 0x1f4d}, {0x1f40, 0x1f45}}, - {{0x1f51, 0x1f51}, {0x1f59, 0x1f59}}, - {{0x1f53, 0x1f53}, {0x1f5b, 0x1f5b}}, - {{0x1f55, 0x1f55}, {0x1f5d, 0x1f5d}}, - {{0x1f57, 0x1f57}, {0x1f5f, 0x1f5f}}, - {{0x1f59, 0x1f59}, {0x1f51, 0x1f51}}, - {{0x1f5b, 0x1f5b}, {0x1f53, 0x1f53}}, - {{0x1f5d, 0x1f5d}, {0x1f55, 0x1f55}}, - {{0x1f5f, 0x1f5f}, {0x1f57, 0x1f57}}, - {{0x1f60, 0x1f67}, {0x1f68, 0x1f6f}}, - {{0x1f68, 0x1f6f}, {0x1f60, 0x1f67}}, - {{0x1f70, 0x1f71}, {0x1fba, 0x1fbb}}, - {{0x1f72, 0x1f75}, {0x1fc8, 0x1fcb}}, - {{0x1f76, 0x1f77}, {0x1fda, 0x1fdb}}, - {{0x1f78, 0x1f79}, {0x1ff8, 0x1ff9}}, - {{0x1f7a, 0x1f7b}, {0x1fea, 0x1feb}}, - {{0x1f7c, 0x1f7d}, {0x1ffa, 0x1ffb}}, - {{0x1f80, 0x1f87}, {0x1f88, 0x1f8f}}, - {{0x1f90, 0x1f97}, {0x1f98, 0x1f9f}}, - {{0x1fa0, 0x1fa7}, {0x1fa8, 0x1faf}}, - {{0x1fb0, 0x1fb1}, {0x1fb8, 0x1fb9}}, - {{0x1fb3, 0x1fb3}, {0x1fbc, 0x1fbc}}, - {{0x1fb8, 0x1fb9}, {0x1fb0, 0x1fb1}}, - {{0x1fba, 0x1fbb}, {0x1f70, 0x1f71}}, - {{0x1fbe, 0x1fbe}, {0x0399, 0x0399}}, - {{0x1fc3, 0x1fc3}, {0x1fcc, 0x1fcc}}, - {{0x1fc8, 0x1fcb}, {0x1f72, 0x1f75}}, - {{0x1fd0, 0x1fd1}, {0x1fd8, 0x1fd9}}, - {{0x1fd8, 0x1fd9}, {0x1fd0, 0x1fd1}}, - {{0x1fda, 0x1fdb}, {0x1f76, 0x1f77}}, - {{0x1fe0, 0x1fe1}, {0x1fe8, 0x1fe9}}, - {{0x1fe5, 0x1fe5}, {0x1fec, 0x1fec}}, - {{0x1fe8, 0x1fe9}, {0x1fe0, 0x1fe1}}, - {{0x1fea, 0x1feb}, {0x1f7a, 0x1f7b}}, - {{0x1fec, 0x1fec}, {0x1fe5, 0x1fe5}}, - {{0x1ff3, 0x1ff3}, {0x1ffc, 0x1ffc}}, - {{0x1ff8, 0x1ff9}, {0x1f78, 0x1f79}}, - {{0x1ffa, 0x1ffb}, {0x1f7c, 0x1f7d}}, - {{0x2126, 0x2126}, {0x03c9, 0x03c9}}, - {{0x212a, 0x212a}, {0x006b, 0x006b}}, - {{0x212b, 0x212b}, {0x00e5, 0x00e5}}, - {{0x2132, 0x2132}, {0x214e, 0x214e}}, - {{0x214e, 0x214e}, {0x2132, 0x2132}}, - {{0x2183, 0x2184}, {0x2184, 0x2183}}, - {{0x2c00, 0x2c2e}, {0x2c30, 0x2c5e}}, - {{0x2c30, 0x2c5e}, {0x2c00, 0x2c2e}}, - {{0x2c60, 0x2c61}, {0x2c61, 0x2c60}}, - {{0x2c62, 0x2c62}, {0x026b, 0x026b}}, - {{0x2c63, 0x2c63}, {0x1d7d, 0x1d7d}}, - {{0x2c64, 0x2c64}, {0x027d, 0x027d}}, - {{0x2c65, 0x2c65}, {0x023a, 0x023a}}, - {{0x2c66, 0x2c66}, {0x023e, 0x023e}}, - {{0x2c67, 0x2c68}, {0x2c68, 0x2c67}}, - {{0x2c69, 0x2c6a}, {0x2c6a, 0x2c69}}, - {{0x2c6b, 0x2c6c}, {0x2c6c, 0x2c6b}}, - {{0x2c6d, 0x2c6d}, {0x0251, 0x0251}}, - {{0x2c6e, 0x2c6e}, {0x0271, 0x0271}}, - {{0x2c6f, 0x2c6f}, {0x0250, 0x0250}}, - {{0x2c70, 0x2c70}, {0x0252, 0x0252}}, - {{0x2c72, 0x2c73}, {0x2c73, 0x2c72}}, - {{0x2c75, 0x2c76}, {0x2c76, 0x2c75}}, - {{0x2c7e, 0x2c7f}, {0x023f, 0x0240}}, - {{0x2c80, 0x2c81}, {0x2c81, 0x2c80}}, - {{0x2c82, 0x2c83}, {0x2c83, 0x2c82}}, - {{0x2c84, 0x2c85}, {0x2c85, 0x2c84}}, - {{0x2c86, 0x2c87}, {0x2c87, 0x2c86}}, - {{0x2c88, 0x2c89}, {0x2c89, 0x2c88}}, - {{0x2c8a, 0x2c8b}, {0x2c8b, 0x2c8a}}, - {{0x2c8c, 0x2c8d}, {0x2c8d, 0x2c8c}}, - {{0x2c8e, 0x2c8f}, {0x2c8f, 0x2c8e}}, - {{0x2c90, 0x2c91}, {0x2c91, 0x2c90}}, - {{0x2c92, 0x2c93}, {0x2c93, 0x2c92}}, - {{0x2c94, 0x2c95}, {0x2c95, 0x2c94}}, - {{0x2c96, 0x2c97}, {0x2c97, 0x2c96}}, - {{0x2c98, 0x2c99}, {0x2c99, 0x2c98}}, - {{0x2c9a, 0x2c9b}, {0x2c9b, 0x2c9a}}, - {{0x2c9c, 0x2c9d}, {0x2c9d, 0x2c9c}}, - {{0x2c9e, 0x2c9f}, {0x2c9f, 0x2c9e}}, - {{0x2ca0, 0x2ca1}, {0x2ca1, 0x2ca0}}, - {{0x2ca2, 0x2ca3}, {0x2ca3, 0x2ca2}}, - {{0x2ca4, 0x2ca5}, {0x2ca5, 0x2ca4}}, - {{0x2ca6, 0x2ca7}, {0x2ca7, 0x2ca6}}, - {{0x2ca8, 0x2ca9}, {0x2ca9, 0x2ca8}}, - {{0x2caa, 0x2cab}, {0x2cab, 0x2caa}}, - {{0x2cac, 0x2cad}, {0x2cad, 0x2cac}}, - {{0x2cae, 0x2caf}, {0x2caf, 0x2cae}}, - {{0x2cb0, 0x2cb1}, {0x2cb1, 0x2cb0}}, - {{0x2cb2, 0x2cb3}, {0x2cb3, 0x2cb2}}, - {{0x2cb4, 0x2cb5}, {0x2cb5, 0x2cb4}}, - {{0x2cb6, 0x2cb7}, {0x2cb7, 0x2cb6}}, - {{0x2cb8, 0x2cb9}, {0x2cb9, 0x2cb8}}, - {{0x2cba, 0x2cbb}, {0x2cbb, 0x2cba}}, - {{0x2cbc, 0x2cbd}, {0x2cbd, 0x2cbc}}, - {{0x2cbe, 0x2cbf}, {0x2cbf, 0x2cbe}}, - {{0x2cc0, 0x2cc1}, {0x2cc1, 0x2cc0}}, - {{0x2cc2, 0x2cc3}, {0x2cc3, 0x2cc2}}, - {{0x2cc4, 0x2cc5}, {0x2cc5, 0x2cc4}}, - {{0x2cc6, 0x2cc7}, {0x2cc7, 0x2cc6}}, - {{0x2cc8, 0x2cc9}, {0x2cc9, 0x2cc8}}, - {{0x2cca, 0x2ccb}, {0x2ccb, 0x2cca}}, - {{0x2ccc, 0x2ccd}, {0x2ccd, 0x2ccc}}, - {{0x2cce, 0x2ccf}, {0x2ccf, 0x2cce}}, - {{0x2cd0, 0x2cd1}, {0x2cd1, 0x2cd0}}, - {{0x2cd2, 0x2cd3}, {0x2cd3, 0x2cd2}}, - {{0x2cd4, 0x2cd5}, {0x2cd5, 0x2cd4}}, - {{0x2cd6, 0x2cd7}, {0x2cd7, 0x2cd6}}, - {{0x2cd8, 0x2cd9}, {0x2cd9, 0x2cd8}}, - {{0x2cda, 0x2cdb}, {0x2cdb, 0x2cda}}, - {{0x2cdc, 0x2cdd}, {0x2cdd, 0x2cdc}}, - {{0x2cde, 0x2cdf}, {0x2cdf, 0x2cde}}, - {{0x2ce0, 0x2ce1}, {0x2ce1, 0x2ce0}}, - {{0x2ce2, 0x2ce3}, {0x2ce3, 0x2ce2}}, - {{0x2ceb, 0x2cec}, {0x2cec, 0x2ceb}}, - {{0x2ced, 0x2cee}, {0x2cee, 0x2ced}}, - {{0x2cf2, 0x2cf3}, {0x2cf3, 0x2cf2}}, - {{0x2d00, 0x2d25}, {0x10a0, 0x10c5}}, - {{0x2d27, 0x2d27}, {0x10c7, 0x10c7}}, - {{0x2d2d, 0x2d2d}, {0x10cd, 0x10cd}}, - {{0xa640, 0xa641}, {0xa641, 0xa640}}, - {{0xa642, 0xa643}, {0xa643, 0xa642}}, - {{0xa644, 0xa645}, {0xa645, 0xa644}}, - {{0xa646, 0xa647}, {0xa647, 0xa646}}, - {{0xa648, 0xa649}, {0xa649, 0xa648}}, - {{0xa64a, 0xa64b}, {0xa64b, 0xa64a}}, - {{0xa64c, 0xa64d}, {0xa64d, 0xa64c}}, - {{0xa64e, 0xa64f}, {0xa64f, 0xa64e}}, - {{0xa650, 0xa651}, {0xa651, 0xa650}}, - {{0xa652, 0xa653}, {0xa653, 0xa652}}, - {{0xa654, 0xa655}, {0xa655, 0xa654}}, - {{0xa656, 0xa657}, {0xa657, 0xa656}}, - {{0xa658, 0xa659}, {0xa659, 0xa658}}, - {{0xa65a, 0xa65b}, {0xa65b, 0xa65a}}, - {{0xa65c, 0xa65d}, {0xa65d, 0xa65c}}, - {{0xa65e, 0xa65f}, {0xa65f, 0xa65e}}, - {{0xa660, 0xa661}, {0xa661, 0xa660}}, - {{0xa662, 0xa663}, {0xa663, 0xa662}}, - {{0xa664, 0xa665}, {0xa665, 0xa664}}, - {{0xa666, 0xa667}, {0xa667, 0xa666}}, - {{0xa668, 0xa669}, {0xa669, 0xa668}}, - {{0xa66a, 0xa66b}, {0xa66b, 0xa66a}}, - {{0xa66c, 0xa66d}, {0xa66d, 0xa66c}}, - {{0xa680, 0xa681}, {0xa681, 0xa680}}, - {{0xa682, 0xa683}, {0xa683, 0xa682}}, - {{0xa684, 0xa685}, {0xa685, 0xa684}}, - {{0xa686, 0xa687}, {0xa687, 0xa686}}, - {{0xa688, 0xa689}, {0xa689, 0xa688}}, - {{0xa68a, 0xa68b}, {0xa68b, 0xa68a}}, - {{0xa68c, 0xa68d}, {0xa68d, 0xa68c}}, - {{0xa68e, 0xa68f}, {0xa68f, 0xa68e}}, - {{0xa690, 0xa691}, {0xa691, 0xa690}}, - {{0xa692, 0xa693}, {0xa693, 0xa692}}, - {{0xa694, 0xa695}, {0xa695, 0xa694}}, - {{0xa696, 0xa697}, {0xa697, 0xa696}}, - {{0xa698, 0xa699}, {0xa699, 0xa698}}, - {{0xa69a, 0xa69b}, {0xa69b, 0xa69a}}, - {{0xa722, 0xa723}, {0xa723, 0xa722}}, - {{0xa724, 0xa725}, {0xa725, 0xa724}}, - {{0xa726, 0xa727}, {0xa727, 0xa726}}, - {{0xa728, 0xa729}, {0xa729, 0xa728}}, - {{0xa72a, 0xa72b}, {0xa72b, 0xa72a}}, - {{0xa72c, 0xa72d}, {0xa72d, 0xa72c}}, - {{0xa72e, 0xa72f}, {0xa72f, 0xa72e}}, - {{0xa732, 0xa733}, {0xa733, 0xa732}}, - {{0xa734, 0xa735}, {0xa735, 0xa734}}, - {{0xa736, 0xa737}, {0xa737, 0xa736}}, - {{0xa738, 0xa739}, {0xa739, 0xa738}}, - {{0xa73a, 0xa73b}, {0xa73b, 0xa73a}}, - {{0xa73c, 0xa73d}, {0xa73d, 0xa73c}}, - {{0xa73e, 0xa73f}, {0xa73f, 0xa73e}}, - {{0xa740, 0xa741}, {0xa741, 0xa740}}, - {{0xa742, 0xa743}, {0xa743, 0xa742}}, - {{0xa744, 0xa745}, {0xa745, 0xa744}}, - {{0xa746, 0xa747}, {0xa747, 0xa746}}, - {{0xa748, 0xa749}, {0xa749, 0xa748}}, - {{0xa74a, 0xa74b}, {0xa74b, 0xa74a}}, - {{0xa74c, 0xa74d}, {0xa74d, 0xa74c}}, - {{0xa74e, 0xa74f}, {0xa74f, 0xa74e}}, - {{0xa750, 0xa751}, {0xa751, 0xa750}}, - {{0xa752, 0xa753}, {0xa753, 0xa752}}, - {{0xa754, 0xa755}, {0xa755, 0xa754}}, - {{0xa756, 0xa757}, {0xa757, 0xa756}}, - {{0xa758, 0xa759}, {0xa759, 0xa758}}, - {{0xa75a, 0xa75b}, {0xa75b, 0xa75a}}, - {{0xa75c, 0xa75d}, {0xa75d, 0xa75c}}, - {{0xa75e, 0xa75f}, {0xa75f, 0xa75e}}, - {{0xa760, 0xa761}, {0xa761, 0xa760}}, - {{0xa762, 0xa763}, {0xa763, 0xa762}}, - {{0xa764, 0xa765}, {0xa765, 0xa764}}, - {{0xa766, 0xa767}, {0xa767, 0xa766}}, - {{0xa768, 0xa769}, {0xa769, 0xa768}}, - {{0xa76a, 0xa76b}, {0xa76b, 0xa76a}}, - {{0xa76c, 0xa76d}, {0xa76d, 0xa76c}}, - {{0xa76e, 0xa76f}, {0xa76f, 0xa76e}}, - {{0xa779, 0xa77a}, {0xa77a, 0xa779}}, - {{0xa77b, 0xa77c}, {0xa77c, 0xa77b}}, - {{0xa77d, 0xa77d}, {0x1d79, 0x1d79}}, - {{0xa77e, 0xa77f}, {0xa77f, 0xa77e}}, - {{0xa780, 0xa781}, {0xa781, 0xa780}}, - {{0xa782, 0xa783}, {0xa783, 0xa782}}, - {{0xa784, 0xa785}, {0xa785, 0xa784}}, - {{0xa786, 0xa787}, {0xa787, 0xa786}}, - {{0xa78b, 0xa78c}, {0xa78c, 0xa78b}}, - {{0xa78d, 0xa78d}, {0x0265, 0x0265}}, - {{0xa790, 0xa791}, {0xa791, 0xa790}}, - {{0xa792, 0xa793}, {0xa793, 0xa792}}, - {{0xa796, 0xa797}, {0xa797, 0xa796}}, - {{0xa798, 0xa799}, {0xa799, 0xa798}}, - {{0xa79a, 0xa79b}, {0xa79b, 0xa79a}}, - {{0xa79c, 0xa79d}, {0xa79d, 0xa79c}}, - {{0xa79e, 0xa79f}, {0xa79f, 0xa79e}}, - {{0xa7a0, 0xa7a1}, {0xa7a1, 0xa7a0}}, - {{0xa7a2, 0xa7a3}, {0xa7a3, 0xa7a2}}, - {{0xa7a4, 0xa7a5}, {0xa7a5, 0xa7a4}}, - {{0xa7a6, 0xa7a7}, {0xa7a7, 0xa7a6}}, - {{0xa7a8, 0xa7a9}, {0xa7a9, 0xa7a8}}, - {{0xa7aa, 0xa7aa}, {0x0266, 0x0266}}, - {{0xa7ab, 0xa7ab}, {0x025c, 0x025c}}, - {{0xa7ac, 0xa7ac}, {0x0261, 0x0261}}, - {{0xa7ad, 0xa7ad}, {0x026c, 0x026c}}, - {{0xa7b0, 0xa7b0}, {0x029e, 0x029e}}, - {{0xa7b1, 0xa7b1}, {0x0287, 0x0287}}, - {{0xa7b2, 0xa7b2}, {0x029d, 0x029d}}, - {{0xa7b3, 0xa7b3}, {0xab53, 0xab53}}, - {{0xa7b4, 0xa7b5}, {0xa7b5, 0xa7b4}}, - {{0xa7b6, 0xa7b7}, {0xa7b7, 0xa7b6}}, - {{0xab53, 0xab53}, {0xa7b3, 0xa7b3}}, - {{0xab70, 0xabbf}, {0x13a0, 0x13ef}}, - {{0xff21, 0xff3a}, {0xff41, 0xff5a}}, - {{0xff41, 0xff5a}, {0xff21, 0xff3a}}, - {{0, 0}, {0, 0}}}; - const fold_pair *ptr_ = mapping_; - - for (; ptr_->from.first != 0; ++ptr_) - { - if (range_.second < ptr_->from.first) break; - - if (range_.first >= ptr_->from.first && - range_.first <= ptr_->from.second) - { - if (ptr_->to.first <= ptr_->to.second) - { - const index_type first_ = ptr_->to.first + - (range_.first - ptr_->from.first); - - out_.insert(range(first_, - range_.second > ptr_->from.second ? - ptr_->to.second : - static_cast(ptr_->to.first + - (range_.second - ptr_->from.first)))); - } - else - { - const index_type first_ = ptr_->to.second + - (range_.first - ptr_->from.first); - - out_.insert(range(first_, - range_.second > ptr_->from.second ? - ptr_->to.first : - static_cast(ptr_->to.second + - (range_.second - ptr_->from.first)))); - } - } - else if (range_.second >= ptr_->from.first && - range_.second <= ptr_->from.second) - { - if (ptr_->to.first <= ptr_->to.second) - { - const index_type second_ = ptr_->to.first + - (range_.second - ptr_->from.first); - - out_.insert(range(ptr_->to.first, second_)); - } - else - { - const index_type second_ = ptr_->to.second + - (range_.second - ptr_->from.first); - - out_.insert(range(ptr_->to.second, second_)); - } - } - // Either range fully encompasses from range or not at all. - else if (ptr_->from.first >= range_.first && - ptr_->from.first <= range_.second) - { - if (ptr_->to.first <= ptr_->to.second) - { - out_.insert(range(ptr_->to.first, ptr_->to.second)); - } - else - { - out_.insert(range(ptr_->to.second, ptr_->to.first)); - } - } - } - } - - static void fold(const range &range_, const std::locale &locale_, - string_token &out_, const four &) - { - if (range_.first < 0x10000) - { - fold(range_, locale_, out_, two()); - } - - static const fold_pair mapping_[] = - {{{0x10400, 0x10427}, {0x10428, 0x1044f}}, - {{0x10428, 0x1044f}, {0x10400, 0x10427}}, - {{0x10c80, 0x10cb2}, {0x10cc0, 0x10cf2}}, - {{0x10cc0, 0x10cf2}, {0x10c80, 0x10cb2}}, - {{0x118a0, 0x118bf}, {0x118c0, 0x118df}}, - {{0x118c0, 0x118df}, {0x118a0, 0x118bf}}, - {{0, 0}, {0, 0}}}; - const fold_pair *ptr_ = mapping_; - - for (; ptr_->from.first != 0; ++ptr_) - { - if (range_.second < ptr_->from.first) break; - - if (range_.first >= ptr_->from.first && - range_.first <= ptr_->from.second) - { - out_.insert(range(ptr_->to.first + - (range_.first - ptr_->from.first), - range_.second > ptr_->from.second ? - ptr_->to.second : - ptr_->to.first + (range_.second - ptr_->from.first))); - } - else if (range_.second >= ptr_->from.first && - range_.second <= ptr_->from.second) - { - out_.insert(range(ptr_->to.first, - ptr_->to.first + (range_.second - ptr_->from.first))); - } - // Either range fully encompasses from range or not at all. - else if (ptr_->from.first >= range_.first && - ptr_->from.first <= range_.second) - { - out_.insert(range(ptr_->to.first, ptr_->to.second)); - } - } - } - - template - static input_char_type chr(state_type &state_) - { - input_char_type ch_ = 0; - - // eos_ has already been checked for. - switch (*state_._curr) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - ch_ = decode_octal(state_); - break; - case 'a': - ch_ = '\a'; - state_.increment(); - break; - case 'b': - ch_ = '\b'; - state_.increment(); - break; - case 'c': - ch_ = decode_control_char(state_); - break; - case 'e': - ch_ = 27; // '\e' not recognised by compiler - state_.increment(); - break; - case 'f': - ch_ = '\f'; - state_.increment(); - break; - case 'n': - ch_ = '\n'; - state_.increment(); - break; - case 'r': - ch_ = '\r'; - state_.increment(); - break; - case 't': - ch_ = '\t'; - state_.increment(); - break; - case 'v': - ch_ = '\v'; - state_.increment(); - break; - case 'x': - ch_ = decode_hex(state_); - break; - default: - ch_ = *state_._curr; - state_.increment(); - break; - } - - return ch_; - } - -private: - struct char_pair - { - input_char_type first; - input_char_type second; - }; - - struct fold_pair - { - char_pair from; - char_pair to; - }; - - template - static void posix(state_type &state_, string_token &token_) - { - bool negate_ = false; - - if (!state_.eos() && *state_._curr == '^') - { - negate_ = true; - state_.increment(); - } - - if (state_.eos()) - { - unterminated_posix(state_); - } - else - { - switch (*state_._curr) - { - case 'a': - // alnum - // alpha - alnum_alpha(state_, token_, negate_); - break; - case 'b': - // blank - blank(state_, token_, negate_); - break; - case 'c': - // cntrl - cntrl(state_, token_, negate_); - break; - case 'd': - // digit - digit(state_, token_, negate_); - break; - case 'g': - // graph - graph(state_, token_, negate_); - break; - case 'l': - // lower - lower(state_, token_, negate_); - break; - case 'p': - // print - // punct - print_punct(state_, token_, negate_); - break; - case 's': - // space - space(state_, token_, negate_); - break; - case 'u': - // upper - upper(state_, token_, negate_); - break; - case 'x': - // xdigit - xdigit(state_, token_, negate_); - break; - default: - unknown_posix(state_); - break; - } - } - } - - template - static void alnum_alpha(state_type &state_, string_token &token_, - const bool negate_) - { - enum {unknown, alnum, alpha}; - std::size_t type_ = unknown; - - state_.increment(); - - if (!state_.eos() && *state_._curr == 'l') - { - state_.increment(); - - if (!state_.eos()) - { - if (*state_._curr == 'n') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'u') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'm') - { - state_.increment(); - type_ = alnum; - } - } - } - else if (*state_._curr == 'p') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'h') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'a') - { - state_.increment(); - type_ = alpha; - } - } - } - } - } - - if (type_ == unknown) - { - unknown_posix(state_); - } - else - { - std::string str_; - - check_posix_termination(state_); - - if (type_ == alnum) - { - // alnum - str_ = sizeof(input_char_type) == 1 ? - make_alnum(state_._locale) : - std::string("[\\p{Ll}\\p{Lu}\\p{Nd}]"); - } - else - { - // alpha - str_ = sizeof(input_char_type) == 1 ? - make_alpha(state_._locale) : - std::string("[\\p{Ll}\\p{Lu}]"); - } - - insert_charset(str_.c_str(), state_, token_, negate_); - } - } - - static std::string make_alnum(std::locale &locale_) - { - std::string str_(1, '['); - - for (std::size_t i_ = 0; i_ < 256; ++i_) - { - if (std::use_facet >(locale_). - is(std::ctype_base::alnum, static_cast(i_))) - { - str_ += static_cast(i_); - } - } - - str_ += ']'; - return str_; - } - - static std::string make_alpha(std::locale &locale_) - { - std::string str_(1, '['); - - for (std::size_t i_ = 0; i_ < 256; ++i_) - { - if (std::use_facet >(locale_). - is(std::ctype_base::alpha, static_cast(i_))) - { - str_ += static_cast(i_); - } - } - - str_ += ']'; - return str_; - } - - template - static void blank(state_type &state_, string_token &token_, - const bool negate_) - { - const char *blank_ = "lank"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *blank_ && - static_cast(*state_._curr) == - static_cast(*blank_)) - { - state_.increment(); - ++blank_; - } - - if (*blank_) - { - unknown_posix(state_); - } - else - { - const char *str_ = sizeof(input_char_type) == 1 ? - "[ \t]" : "[\\p{Zs}\t]"; - - check_posix_termination(state_); - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void cntrl(state_type &state_, string_token &token_, - const bool negate_) - { - const char *cntrl_ = "ntrl"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *cntrl_ && - static_cast(*state_._curr) == - static_cast(*cntrl_)) - { - state_.increment(); - ++cntrl_; - } - - if (*cntrl_) - { - unknown_posix(state_); - } - else - { - const char *str_ = sizeof(input_char_type) == 1 ? - "[\\x00-\x1f\x7f]" : "[\\p{Cc}]"; - - check_posix_termination(state_); - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void digit(state_type &state_, string_token &token_, - const bool negate_) - { - const char *digit_ = "igit"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *digit_ && - static_cast(*state_._curr) == - static_cast(*digit_)) - { - state_.increment(); - ++digit_; - } - - if (*digit_) - { - unknown_posix(state_); - } - else - { - const char *str_ = sizeof(input_char_type) == 1 ? - "[0-9]" : "[\\p{Nd}]"; - - check_posix_termination(state_); - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void graph(state_type &state_, string_token &token_, - const bool negate_) - { - const char *graph_ = "raph"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *graph_ && - static_cast(*state_._curr) == - static_cast(*graph_)) - { - state_.increment(); - ++graph_; - } - - if (*graph_) - { - unknown_posix(state_); - } - else - { - const char *str_ = sizeof(input_char_type) == 1 ? - "[\x21-\x7e]" : "[^\\p{Z}\\p{C}]"; - - check_posix_termination(state_); - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void lower(state_type &state_, string_token &token_, - const bool negate_) - { - const char *lower_ = "ower"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *lower_ && - static_cast(*state_._curr) == - static_cast(*lower_)) - { - state_.increment(); - ++lower_; - } - - if (*lower_) - { - unknown_posix(state_); - } - else - { - std::string str_ = sizeof(input_char_type) == 1 ? - create_lower(state_._locale) : - std::string("[\\p{Ll}]"); - - check_posix_termination(state_); - insert_charset(str_.c_str(), state_, token_, negate_); - } - } - - static std::string create_lower(std::locale &locale_) - { - std::string str_(1, '['); - - for (std::size_t i_ = 0; i_ < 256; ++i_) - { - if (std::use_facet >(locale_). - is(std::ctype_base::lower, static_cast(i_))) - { - str_ += static_cast(i_); - } - } - - str_ += ']'; - return str_; - } - - template - static void print_punct(state_type &state_, string_token &token_, - const bool negate_) - { - enum {unknown, print, punct}; - std::size_t type_ = unknown; - - state_.increment(); - - if (!state_.eos()) - { - if (*state_._curr == 'r') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'i') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'n') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 't') - { - state_.increment(); - type_ = print; - } - } - } - } - else if (*state_._curr == 'u') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'n') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 'c') - { - state_.increment(); - - if (!state_.eos() && *state_._curr == 't') - { - state_.increment(); - type_ = punct; - } - } - } - } - } - - if (type_ == unknown) - { - unknown_posix(state_); - } - else - { - const char *str_ = nullptr; - - check_posix_termination(state_); - - if (type_ == print) - { - // print - str_ = sizeof(input_char_type) == 1 ? - "[\x20-\x7e]" : "[\\p{C}]"; - } - else - { - // punct - str_ = sizeof(input_char_type) == 1 ? - "[!\"#$%&'()*+,\\-./:;<=>?@[\\\\\\]^_`{|}~]" : - "[\\p{P}\\p{S}]"; - } - - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void space(state_type &state_, string_token &token_, - const bool negate_) - { - const char *space_ = "pace"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *space_ && - static_cast(*state_._curr) == - static_cast(*space_)) - { - state_.increment(); - ++space_; - } - - if (*space_) - { - unknown_posix(state_); - } - else - { - const char *str_ = sizeof(input_char_type) == 1 ? - "[ \t\r\n\v\f]" : "[\\p{Z}\t\r\n\v\f]"; - - check_posix_termination(state_); - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void upper(state_type &state_, string_token &token_, - const bool negate_) - { - const char *upper_ = "pper"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *upper_ && - static_cast(*state_._curr) == - static_cast(*upper_)) - { - state_.increment(); - ++upper_; - } - - if (*upper_) - { - unknown_posix(state_); - } - else - { - std::string str_ = sizeof(input_char_type) == 1 ? - create_upper(state_._locale) : - std::string("[\\p{Lu}]"); - - check_posix_termination(state_); - insert_charset(str_.c_str(), state_, token_, negate_); - } - } - - static std::string create_upper(std::locale &locale_) - { - std::string str_(1, '['); - - for (std::size_t i_ = 0; i_ < 256; ++i_) - { - if (std::use_facet >(locale_). - is(std::ctype_base::upper, static_cast(i_))) - { - str_ += static_cast(i_); - } - } - - str_ += ']'; - return str_; - } - - template - static void xdigit(state_type &state_, string_token &token_, - const bool negate_) - { - const char *xdigit_ = "digit"; - - state_.increment(); - - // Casts to prevent warnings (VC++ 2012) - while (!state_.eos() && *xdigit_ && - static_cast(*state_._curr) == - static_cast(*xdigit_)) - { - state_.increment(); - ++xdigit_; - } - - if (*xdigit_) - { - unknown_posix(state_); - } - else - { - const char *str_ = "[0-9A-Fa-f]"; - - check_posix_termination(state_); - insert_charset(str_, state_, token_, negate_); - } - } - - template - static void check_posix_termination(state_type &state_) - { - if (state_.eos()) - { - unterminated_posix(state_); - } - - if (*state_._curr != ':') - { - std::ostringstream ss_; - - ss_ << "Missing ':' at index " << state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - state_.increment(); - - if (state_.eos()) - { - unterminated_posix(state_); - } - - if (*state_._curr != ']') - { - std::ostringstream ss_; - - ss_ << "Missing ']' at index " << state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - state_.increment(); - } - - template - static void unterminated_posix(state_type &state_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (unterminated POSIX charset)"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - template - static void unknown_posix(state_type &state_) - { - std::ostringstream ss_; - - ss_ << "Unknown POSIX charset at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - template - static void insert_charset(const char *str_, state_type &state_, - string_token &token_, const bool negate_) - { - // Some systems have strlen in namespace std. - using namespace std; - - char_state temp_state_(str_ + 1, str_ + strlen(str_), - state_._id, state_._flags, state_._locale, 0); - string_token temp_token_; - - charset(temp_state_, temp_token_); - - if (negate_) temp_token_.negate(); - - token_.insert(temp_token_); - } - - template - static const char *charset_shortcut - (state_type &state_, std::size_t &str_len_) - { - const char *str_ = nullptr; - - switch (*state_._curr) - { - case 'd': - str_ = "[0-9]"; - break; - case 'D': - str_ = "[^0-9]"; - break; - case 'p': - str_ = unicode_escape(state_); - break; - case 's': - str_ = "[ \t\n\r\f\v]"; - break; - case 'S': - str_ = "[^ \t\n\r\f\v]"; - break; - case 'w': - str_ = "[_0-9A-Za-z]"; - break; - case 'W': - str_ = "[^_0-9A-Za-z]"; - break; - } - - if (str_) - { - // Some systems have strlen in namespace std. - using namespace std; - - str_len_ = strlen(str_); - } - else - { - str_len_ = 0; - } - - return str_; - } - - template - static const char *unicode_escape(state_type &state_) - { - const char *str_ = nullptr; - - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (*state_._curr != '{') - { - std::ostringstream ss_; - - ss_ << "Missing '{' following \\p at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case 'C': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{C"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Cc}\\p{Cf}\\p{Co}\\p{Cs}]"; - break; - case 'c': - str_ = other_control(); - state_.increment(); - break; - case 'f': - str_ = other_format(); - state_.increment(); - break; -// case 'n': -// break; - case 'o': - str_ = other_private(); - state_.increment(); - break; - case 's': - str_ = other_surrogate(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{C at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - case 'L': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{L"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Ll}\\p{Lm}\\p{Lo}\\p{Lt}\\p{Lu}]"; - break; - case 'C': - str_ = "[\\p{Ll}\\p{Lt}\\p{Lu}]"; - state_.increment(); - break; - case 'l': - str_ = letter_lowercase(); - state_.increment(); - break; - case 'm': - str_ = letter_modifier(); - state_.increment(); - break; - case 'o': - str_ = letter_other(); - state_.increment(); - break; - case 't': - str_ = letter_titlecase(); - state_.increment(); - break; - case 'u': - str_ = letter_uppercase(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{L at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - case 'M': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{M"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Mc}\\p{Me}\\p{Mn}]"; - break; - case 'c': - str_ = mark_combining(); - state_.increment(); - break; - case 'e': - str_ = mark_enclosing(); - state_.increment(); - break; - case 'n': - str_ = mark_nonspacing(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{M at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - case 'N': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{N"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Nd}\\p{Nl}\\p{No}]"; - break; - case 'd': - str_ = number_decimal(); - state_.increment(); - break; - case 'l': - str_ = number_letter(); - state_.increment(); - break; - case 'o': - str_ = number_other(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{N at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - case 'P': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{P"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Pc}\\p{Pd}\\p{Pe}\\p{Pf}\\p{Pi}\\p{Po}" - "\\p{Ps}]"; - break; - case 'c': - str_ = punctuation_connector(); - state_.increment(); - break; - case 'd': - str_ = punctuation_dash(); - state_.increment(); - break; - case 'e': - str_ = punctuation_close(); - state_.increment(); - break; - case 'f': - str_ = punctuation_final(); - state_.increment(); - break; - case 'i': - str_ = punctuation_initial(); - state_.increment(); - break; - case 'o': - str_ = punctuation_other(); - state_.increment(); - break; - case 's': - str_ = punctuation_open(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{P at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - case 'S': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{S"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Sc}\\p{Sk}\\p{Sm}\\p{So}]"; - break; - case 'c': - str_ = symbol_currency(); - state_.increment(); - break; - case 'k': - str_ = symbol_modifier(); - state_.increment(); - break; - case 'm': - str_ = symbol_math(); - state_.increment(); - break; - case 'o': - str_ = symbol_other(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{S at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - case 'Z': - state_.increment(); - - if (state_.eos()) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\p{Z"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - switch (*state_._curr) - { - case '}': - str_ = "[\\p{Zl}\\p{Zp}\\p{Zs}]"; - break; - case 'l': - str_ = separator_line(); - state_.increment(); - break; - case 'p': - str_ = separator_paragraph(); - state_.increment(); - break; - case 's': - str_ = separator_space(); - state_.increment(); - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{Z at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - break; - default: - { - std::ostringstream ss_; - - ss_ << "Syntax error following \\p{ at index " << - state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - if (*state_._curr != '}') - { - std::ostringstream ss_; - - ss_ << "Missing '}' at index " << state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - return str_; - } - - static const char *other_control() - { - return "[\\x0-\\x1f\\x7f-\\x9f]"; - } - - static const char *other_format() - { - return "[\\xad\\x600-\\x605\\x61c\\x6dd\\x70f\\x180e\\x200b-\\x200f" - "\\x202a-\\x202e\\x2060-\\x2064\\x2066-\\x206f\\xfeff" - "\\xfff9-\\xfffb\\x110bd\\x1bca0-\\x1bca3\\x1d173-\\x1d17a\\xe0001" - "\\xe0020-\\xe007f]"; - } - - static const char *other_private() - { - return "[\\xe000\\xf8ff\\xf0000\\xffffd\\x100000\\x10fffd]"; - } - - static const char *other_surrogate() - { - return "[\\xd800\\xdb7f\\xdb80\\xdbff\\xdc00\\xdfff]"; - } - - static const char *letter_lowercase() - { - return "[\\x61-\\x7a\\xb5\\xdf-\\xf6\\xf8-\\xff\\x101\\x103\\x105\\x107" - "\\x109\\x10b\\x10d\\x10f\\x111\\x113\\x115\\x117\\x119\\x11b\\x11d" - "\\x11f\\x121\\x123\\x125\\x127\\x129\\x12b\\x12d\\x12f\\x131\\x133" - "\\x135\\x137\\x138\\x13a\\x13c\\x13e\\x140\\x142\\x144\\x146" - "\\x148\\x149\\x14b\\x14d\\x14f\\x151\\x153\\x155\\x157\\x159\\x15b" - "\\x15d\\x15f\\x161\\x163\\x165\\x167\\x169\\x16b\\x16d\\x16f\\x171" - "\\x173\\x175\\x177\\x17a\\x17c\\x17e-\\x180\\x183\\x185\\x188" - "\\x18c\\x18d\\x192\\x195\\x199-\\x19b\\x19e\\x1a1\\x1a3\\x1a5" - "\\x1a8\\x1aa\\x1ab\\x1ad\\x1b0\\x1b4\\x1b6\\x1b9\\x1ba" - "\\x1bd-\\x1bf\\x1c6\\x1c9\\x1cc\\x1ce\\x1d0\\x1d2\\x1d4\\x1d6" - "\\x1d8\\x1da\\x1dc\\x1dd\\x1df\\x1e1\\x1e3\\x1e5\\x1e7\\x1e9\\x1eb" - "\\x1ed\\x1ef\\x1f0\\x1f3\\x1f5\\x1f9\\x1fb\\x1fd\\x1ff\\x201\\x203" - "\\x205\\x207\\x209\\x20b\\x20d\\x20f\\x211\\x213\\x215\\x217\\x219" - "\\x21b\\x21d\\x21f\\x221\\x223\\x225\\x227\\x229\\x22b\\x22d\\x22f" - "\\x231\\x233-\\x239\\x23c\\x23f\\x240\\x242\\x247\\x249\\x24b" - "\\x24d\\x24f-\\x293\\x295-\\x2af\\x371\\x373\\x377\\x37b-\\x37d" - "\\x390\\x3ac-\\x3ce\\x3d0\\x3d1\\x3d5-\\x3d7\\x3d9\\x3db\\x3dd" - "\\x3df\\x3e1\\x3e3\\x3e5\\x3e7\\x3e9\\x3eb\\x3ed\\x3ef-\\x3f3" - "\\x3f5\\x3f8\\x3fb\\x3fc\\x430-\\x45f\\x461\\x463\\x465\\x467" - "\\x469\\x46b\\x46d\\x46f\\x471\\x473\\x475\\x477\\x479\\x47b\\x47d" - "\\x47f\\x481\\x48b\\x48d\\x48f\\x491\\x493\\x495\\x497\\x499\\x49b" - "\\x49d\\x49f\\x4a1\\x4a3\\x4a5\\x4a7\\x4a9\\x4ab\\x4ad\\x4af\\x4b1" - "\\x4b3\\x4b5\\x4b7\\x4b9\\x4bb\\x4bd\\x4bf\\x4c2\\x4c4\\x4c6\\x4c8" - "\\x4ca\\x4cc\\x4ce\\x4cf\\x4d1\\x4d3\\x4d5\\x4d7\\x4d9\\x4db\\x4dd" - "\\x4df\\x4e1\\x4e3\\x4e5\\x4e7\\x4e9\\x4eb\\x4ed\\x4ef\\x4f1\\x4f3" - "\\x4f5\\x4f7\\x4f9\\x4fb\\x4fd\\x4ff\\x501\\x503\\x505\\x507\\x509" - "\\x50b\\x50d\\x50f\\x511\\x513\\x515\\x517\\x519\\x51b\\x51d\\x51f" - "\\x521\\x523\\x525\\x527\\x529\\x52b\\x52d\\x52f\\x561-\\x587" - "\\x13f8-\\x13fd\\x1d00-\\x1d2b\\x1d6b-\\x1d77\\x1d79-\\x1d9a" - "\\x1e01\\x1e03\\x1e05\\x1e07\\x1e09\\x1e0b\\x1e0d\\x1e0f\\x1e11" - "\\x1e13\\x1e15\\x1e17\\x1e19\\x1e1b\\x1e1d\\x1e1f\\x1e21\\x1e23" - "\\x1e25\\x1e27\\x1e29\\x1e2b\\x1e2d\\x1e2f\\x1e31\\x1e33\\x1e35" - "\\x1e37\\x1e39\\x1e3b\\x1e3d\\x1e3f\\x1e41\\x1e43\\x1e45\\x1e47" - "\\x1e49\\x1e4b\\x1e4d\\x1e4f\\x1e51\\x1e53\\x1e55\\x1e57\\x1e59" - "\\x1e5b\\x1e5d\\x1e5f\\x1e61\\x1e63\\x1e65\\x1e67\\x1e69\\x1e6b" - "\\x1e6d\\x1e6f\\x1e71\\x1e73\\x1e75\\x1e77\\x1e79\\x1e7b\\x1e7d" - "\\x1e7f\\x1e81\\x1e83\\x1e85\\x1e87\\x1e89\\x1e8b\\x1e8d\\x1e8f" - "\\x1e91\\x1e93\\x1e95-\\x1e9d\\x1e9f\\x1ea1\\x1ea3\\x1ea5\\x1ea7" - "\\x1ea9\\x1eab\\x1ead\\x1eaf\\x1eb1\\x1eb3\\x1eb5\\x1eb7\\x1eb9" - "\\x1ebb\\x1ebd\\x1ebf\\x1ec1\\x1ec3\\x1ec5\\x1ec7\\x1ec9\\x1ecb" - "\\x1ecd\\x1ecf\\x1ed1\\x1ed3\\x1ed5\\x1ed7\\x1ed9\\x1edb\\x1edd" - "\\x1edf\\x1ee1\\x1ee3\\x1ee5\\x1ee7\\x1ee9\\x1eeb\\x1eed\\x1eef" - "\\x1ef1\\x1ef3\\x1ef5\\x1ef7\\x1ef9\\x1efb\\x1efd\\x1eff-\\x1f07" - "\\x1f10-\\x1f15\\x1f20-\\x1f27\\x1f30-\\x1f37\\x1f40-\\x1f45" - "\\x1f50-\\x1f57\\x1f60-\\x1f67\\x1f70-\\x1f7d\\x1f80-\\x1f87" - "\\x1f90-\\x1f97\\x1fa0-\\x1fa7\\x1fb0-\\x1fb4\\x1fb6\\x1fb7\\x1fbe" - "\\x1fc2-\\x1fc4\\x1fc6\\x1fc7\\x1fd0-\\x1fd3\\x1fd6\\x1fd7" - "\\x1fe0-\\x1fe7\\x1ff2-\\x1ff4\\x1ff6\\x1ff7\\x210a\\x210e\\x210f" - "\\x2113\\x212f\\x2134\\x2139\\x213c\\x213d\\x2146-\\x2149\\x214e" - "\\x2184\\x2c30-\\x2c5e\\x2c61\\x2c65\\x2c66\\x2c68\\x2c6a\\x2c6c" - "\\x2c71\\x2c73\\x2c74\\x2c76-\\x2c7b\\x2c81\\x2c83\\x2c85\\x2c87" - "\\x2c89\\x2c8b\\x2c8d\\x2c8f\\x2c91\\x2c93\\x2c95\\x2c97\\x2c99" - "\\x2c9b\\x2c9d\\x2c9f\\x2ca1\\x2ca3\\x2ca5\\x2ca7\\x2ca9\\x2cab" - "\\x2cad\\x2caf\\x2cb1\\x2cb3\\x2cb5\\x2cb7\\x2cb9\\x2cbb\\x2cbd" - "\\x2cbf\\x2cc1\\x2cc3\\x2cc5\\x2cc7\\x2cc9\\x2ccb\\x2ccd\\x2ccf" - "\\x2cd1\\x2cd3\\x2cd5\\x2cd7\\x2cd9\\x2cdb\\x2cdd\\x2cdf\\x2ce1" - "\\x2ce3\\x2ce4\\x2cec\\x2cee\\x2cf3\\x2d00-\\x2d25\\x2d27\\x2d2d" - "\\xa641\\xa643\\xa645\\xa647\\xa649\\xa64b\\xa64d\\xa64f\\xa651" - "\\xa653\\xa655\\xa657\\xa659\\xa65b\\xa65d\\xa65f\\xa661\\xa663" - "\\xa665\\xa667\\xa669\\xa66b\\xa66d\\xa681\\xa683\\xa685\\xa687" - "\\xa689\\xa68b\\xa68d\\xa68f\\xa691\\xa693\\xa695\\xa697\\xa699" - "\\xa69b\\xa723\\xa725\\xa727\\xa729\\xa72b\\xa72d\\xa72f-\\xa731" - "\\xa733\\xa735\\xa737\\xa739\\xa73b\\xa73d\\xa73f\\xa741\\xa743" - "\\xa745\\xa747\\xa749\\xa74b\\xa74d\\xa74f\\xa751\\xa753\\xa755" - "\\xa757\\xa759\\xa75b\\xa75d\\xa75f\\xa761\\xa763\\xa765\\xa767" - "\\xa769\\xa76b\\xa76d\\xa76f\\xa771-\\xa778\\xa77a\\xa77c\\xa77f" - "\\xa781\\xa783\\xa785\\xa787\\xa78c\\xa78e\\xa791\\xa793-\\xa795" - "\\xa797\\xa799\\xa79b\\xa79d\\xa79f\\xa7a1\\xa7a3\\xa7a5\\xa7a7" - "\\xa7a9\\xa7b5\\xa7b7\\xa7fa\\xab30-\\xab5a\\xab60-\\xab65" - "\\xab70-\\xabbf\\xfb00-\\xfb06\\xfb13-\\xfb17\\xff41-\\xff5a" - "\\x10428-\\x1044f\\x10cc0-\\x10cf2\\x118c0-\\x118df" - "\\x1d41a-\\x1d433\\x1d44e-\\x1d454\\x1d456-\\x1d467" - "\\x1d482-\\x1d49b\\x1d4b6-\\x1d4b9\\x1d4bb\\x1d4bd-\\x1d4c3" - "\\x1d4c5-\\x1d4cf\\x1d4ea-\\x1d503\\x1d51e-\\x1d537" - "\\x1d552-\\x1d56b\\x1d586-\\x1d59f\\x1d5ba-\\x1d5d3" - "\\x1d5ee-\\x1d607\\x1d622-\\x1d63b\\x1d656-\\x1d66f" - "\\x1d68a-\\x1d6a5\\x1d6c2-\\x1d6da\\x1d6dc-\\x1d6e1" - "\\x1d6fc-\\x1d714\\x1d716-\\x1d71b\\x1d736-\\x1d74e" - "\\x1d750-\\x1d755\\x1d770-\\x1d788\\x1d78a-\\x1d78f" - "\\x1d7aa-\\x1d7c2\\x1d7c4-\\x1d7c9\\x1d7cb]"; - } - - static const char *letter_modifier() - { - return "[\\x2b0-\\x2c1\\x2c6-\\x2d1\\x2e0-\\x2e4\\x2ec\\x2ee\\x374" - "\\x37a\\x559\\x640\\x6e5\\x6e6\\x7f4\\x7f5\\x7fa\\x81a\\x824\\x828" - "\\x971\\xe46\\xec6\\x10fc\\x17d7\\x1843\\x1aa7\\x1c78-\\x1c7d" - "\\x1d2c-\\x1d6a\\x1d78\\x1d9b-\\x1dbf\\x2071\\x207f\\x2090-\\x209c" - "\\x2c7c\\x2c7d\\x2d6f\\x2e2f\\x3005\\x3031-\\x3035\\x303b" - "\\x309d\\x309e\\x30fc-\\x30fe\\xa015\\xa4f8-\\xa4fd\\xa60c\\xa67f" - "\\xa69c\\xa69d\\xa717-\\xa71f\\xa770\\xa788\\xa7f8\\xa7f9\\xa9cf" - "\\xa9e6\\xaa70\\xaadd\\xaaf3\\xaaf4\\xab5c-\\xab5f\\xff70" - "\\xff9e\\xff9f\\x16b40-\\x16b43\\x16f93-\\x16f9f]"; - } - - static const char *letter_other() - { - return "[\\xaa\\xba\\x1bb\\x1c0-\\x1c3\\x294\\x5d0-\\x5ea\\x5f0-\\x5f2" - "\\x620-\\x63f\\x641-\\x64a\\x66e\\x66f\\x671-\\x6d3\\x6d5" - "\\x6ee\\x6ef\\x6fa-\\x6fc\\x6ff\\x710\\x712-\\x72f\\x74d-\\x7a5" - "\\x7b1\\x7ca-\\x7ea\\x800-\\x815\\x840-\\x858\\x8a0-\\x8b4" - "\\x904-\\x939\\x93d\\x950\\x958-\\x961\\x972-\\x980\\x985-\\x98c" - "\\x98f\\x990\\x993-\\x9a8\\x9aa-\\x9b0\\x9b2\\x9b6-\\x9b9\\x9bd" - "\\x9ce\\x9dc\\x9dd\\x9df-\\x9e1\\x9f0\\x9f1\\xa05-\\xa0a" - "\\xa0f\\xa10\\xa13-\\xa28\\xa2a-\\xa30\\xa32\\xa33\\xa35\\xa36" - "\\xa38\\xa39\\xa59-\\xa5c\\xa5e\\xa72-\\xa74\\xa85-\\xa8d" - "\\xa8f-\\xa91\\xa93-\\xaa8\\xaaa-\\xab0\\xab2\\xab3\\xab5-\\xab9" - "\\xabd\\xad0\\xae0\\xae1\\xaf9\\xb05-\\xb0c\\xb0f\\xb10" - "\\xb13-\\xb28\\xb2a-\\xb30\\xb32\\xb33\\xb35-\\xb39\\xb3d" - "\\xb5c\\xb5d\\xb5f-\\xb61\\xb71\\xb83\\xb85-\\xb8a\\xb8e-\\xb90" - "\\xb92-\\xb95\\xb99\\xb9a\\xb9c\\xb9e\\xb9f\\xba3\\xba4" - "\\xba8-\\xbaa\\xbae-\\xbb9\\xbd0\\xc05-\\xc0c\\xc0e-\\xc10" - "\\xc12-\\xc28\\xc2a-\\xc39\\xc3d\\xc58-\\xc5a\\xc60\\xc61" - "\\xc85-\\xc8c\\xc8e-\\xc90\\xc92-\\xca8\\xcaa-\\xcb3\\xcb5-\\xcb9" - "\\xcbd\\xcde\\xce0\\xce1\\xcf1\\xcf2\\xd05-\\xd0c\\xd0e-\\xd10" - "\\xd12-\\xd3a\\xd3d\\xd4e\\xd5f-\\xd61\\xd7a-\\xd7f\\xd85-\\xd96" - "\\xd9a-\\xdb1\\xdb3-\\xdbb\\xdbd\\xdc0-\\xdc6\\xe01-\\xe30" - "\\xe32\\xe33\\xe40-\\xe45\\xe81\\xe82\\xe84\\xe87\\xe88\\xe8a" - "\\xe8d\\xe94-\\xe97\\xe99-\\xe9f\\xea1-\\xea3\\xea5\\xea7" - "\\xeaa\\xeab\\xead-\\xeb0\\xeb2\\xeb3\\xebd\\xec0-\\xec4" - "\\xedc-\\xedf\\xf00\\xf40-\\xf47\\xf49-\\xf6c\\xf88-\\xf8c" - "\\x1000-\\x102a\\x103f\\x1050-\\x1055\\x105a-\\x105d\\x1061" - "\\x1065\\x1066\\x106e-\\x1070\\x1075-\\x1081\\x108e\\x10d0-\\x10fa" - "\\x10fd-\\x1248\\x124a-\\x124d\\x1250-\\x1256\\x1258" - "\\x125a-\\x125d\\x1260-\\x1288\\x128a-\\x128d\\x1290-\\x12b0" - "\\x12b2-\\x12b5\\x12b8-\\x12be\\x12c0\\x12c2-\\x12c5" - "\\x12c8-\\x12d6\\x12d8-\\x1310\\x1312-\\x1315\\x1318-\\x135a" - "\\x1380-\\x138f\\x1401-\\x166c\\x166f-\\x167f\\x1681-\\x169a" - "\\x16a0-\\x16ea\\x16f1-\\x16f8\\x1700-\\x170c\\x170e-\\x1711" - "\\x1720-\\x1731\\x1740-\\x1751\\x1760-\\x176c\\x176e-\\x1770" - "\\x1780-\\x17b3\\x17dc\\x1820-\\x1842\\x1844-\\x1877" - "\\x1880-\\x18a8\\x18aa\\x18b0-\\x18f5\\x1900-\\x191e" - "\\x1950-\\x196d\\x1970-\\x1974\\x1980-\\x19ab\\x19b0-\\x19c9" - "\\x1a00-\\x1a16\\x1a20-\\x1a54\\x1b05-\\x1b33\\x1b45-\\x1b4b" - "\\x1b83-\\x1ba0\\x1bae\\x1baf\\x1bba-\\x1be5\\x1c00-\\x1c23" - "\\x1c4d-\\x1c4f\\x1c5a-\\x1c77\\x1ce9-\\x1cec\\x1cee-\\x1cf1" - "\\x1cf5\\x1cf6\\x2135-\\x2138\\x2d30-\\x2d67\\x2d80-\\x2d96" - "\\x2da0-\\x2da6\\x2da8-\\x2dae\\x2db0-\\x2db6\\x2db8-\\x2dbe" - "\\x2dc0-\\x2dc6\\x2dc8-\\x2dce\\x2dd0-\\x2dd6\\x2dd8-\\x2dde" - "\\x3006\\x303c\\x3041-\\x3096\\x309f\\x30a1-\\x30fa\\x30ff" - "\\x3105-\\x312d\\x3131-\\x318e\\x31a0-\\x31ba\\x31f0-\\x31ff" - "\\x3400\\x4db5\\x4e00\\x9fd5\\xa000-\\xa014\\xa016-\\xa48c" - "\\xa4d0-\\xa4f7\\xa500-\\xa60b\\xa610-\\xa61f\\xa62a\\xa62b\\xa66e" - "\\xa6a0-\\xa6e5\\xa78f\\xa7f7\\xa7fb-\\xa801\\xa803-\\xa805" - "\\xa807-\\xa80a\\xa80c-\\xa822\\xa840-\\xa873\\xa882-\\xa8b3" - "\\xa8f2-\\xa8f7\\xa8fb\\xa8fd\\xa90a-\\xa925\\xa930-\\xa946" - "\\xa960-\\xa97c\\xa984-\\xa9b2\\xa9e0-\\xa9e4\\xa9e7-\\xa9ef" - "\\xa9fa-\\xa9fe\\xaa00-\\xaa28\\xaa40-\\xaa42\\xaa44-\\xaa4b" - "\\xaa60-\\xaa6f\\xaa71-\\xaa76\\xaa7a\\xaa7e-\\xaaaf\\xaab1" - "\\xaab5\\xaab6\\xaab9-\\xaabd\\xaac0\\xaac2\\xaadb\\xaadc" - "\\xaae0-\\xaaea\\xaaf2\\xab01-\\xab06\\xab09-\\xab0e" - "\\xab11-\\xab16\\xab20-\\xab26\\xab28-\\xab2e\\xabc0-\\xabe2" - "\\xac00\\xd7a3\\xd7b0-\\xd7c6\\xd7cb-\\xd7fb\\xf900-\\xfa6d" - "\\xfa70-\\xfad9\\xfb1d\\xfb1f-\\xfb28\\xfb2a-\\xfb36" - "\\xfb38-\\xfb3c\\xfb3e\\xfb40\\xfb41\\xfb43\\xfb44\\xfb46-\\xfbb1" - "\\xfbd3-\\xfd3d\\xfd50-\\xfd8f\\xfd92-\\xfdc7\\xfdf0-\\xfdfb" - "\\xfe70-\\xfe74\\xfe76-\\xfefc\\xff66-\\xff6f\\xff71-\\xff9d" - "\\xffa0-\\xffbe\\xffc2-\\xffc7\\xffca-\\xffcf\\xffd2-\\xffd7" - "\\xffda-\\xffdc\\x10000-\\x1000b\\x1000d-\\x10026\\x10028-\\x1003a" - "\\x1003c\\x1003d\\x1003f-\\x1004d\\x10050-\\x1005d" - "\\x10080-\\x100fa\\x10280-\\x1029c\\x102a0-\\x102d0" - "\\x10300-\\x1031f\\x10330-\\x10340\\x10342-\\x10349" - "\\x10350-\\x10375\\x10380-\\x1039d\\x103a0-\\x103c3" - "\\x103c8-\\x103cf\\x10450-\\x1049d\\x10500-\\x10527" - "\\x10530-\\x10563\\x10600-\\x10736\\x10740-\\x10755" - "\\x10760-\\x10767\\x10800-\\x10805\\x10808\\x1080a-\\x10835" - "\\x10837\\x10838\\x1083c\\x1083f-\\x10855\\x10860-\\x10876" - "\\x10880-\\x1089e\\x108e0-\\x108f2\\x108f4\\x108f5" - "\\x10900-\\x10915\\x10920-\\x10939\\x10980-\\x109b7" - "\\x109be\\x109bf\\x10a00\\x10a10-\\x10a13\\x10a15-\\x10a17" - "\\x10a19-\\x10a33\\x10a60-\\x10a7c\\x10a80-\\x10a9c" - "\\x10ac0-\\x10ac7\\x10ac9-\\x10ae4\\x10b00-\\x10b35" - "\\x10b40-\\x10b55\\x10b60-\\x10b72\\x10b80-\\x10b91" - "\\x10c00-\\x10c48\\x11003-\\x11037\\x11083-\\x110af" - "\\x110d0-\\x110e8\\x11103-\\x11126\\x11150-\\x11172\\x11176" - "\\x11183-\\x111b2\\x111c1-\\x111c4\\x111da\\x111dc" - "\\x11200-\\x11211\\x11213-\\x1122b\\x11280-\\x11286\\x11288" - "\\x1128a-\\x1128d\\x1128f-\\x1129d\\x1129f-\\x112a8" - "\\x112b0-\\x112de\\x11305-\\x1130c\\x1130f\\x11310" - "\\x11313-\\x11328\\x1132a-\\x11330\\x11332\\x11333" - "\\x11335-\\x11339\\x1133d\\x11350\\x1135d-\\x11361" - "\\x11480-\\x114af\\x114c4\\x114c5\\x114c7\\x11580-\\x115ae" - "\\x115d8-\\x115db\\x11600-\\x1162f\\x11644\\x11680-\\x116aa" - "\\x11700-\\x11719\\x118ff\\x11ac0-\\x11af8\\x12000-\\x12399" - "\\x12480-\\x12543\\x13000-\\x1342e\\x14400-\\x14646" - "\\x16800-\\x16a38\\x16a40-\\x16a5e\\x16ad0-\\x16aed" - "\\x16b00-\\x16b2f\\x16b63-\\x16b77\\x16b7d-\\x16b8f" - "\\x16f00-\\x16f44\\x16f50\\x1b000\\x1b001\\x1bc00-\\x1bc6a" - "\\x1bc70-\\x1bc7c\\x1bc80-\\x1bc88\\x1bc90-\\x1bc99" - "\\x1e800-\\x1e8c4\\x1ee00-\\x1ee03\\x1ee05-\\x1ee1f" - "\\x1ee21\\x1ee22\\x1ee24\\x1ee27\\x1ee29-\\x1ee32\\x1ee34-\\x1ee37" - "\\x1ee39\\x1ee3b\\x1ee42\\x1ee47\\x1ee49\\x1ee4b\\x1ee4d-\\x1ee4f" - "\\x1ee51\\x1ee52\\x1ee54\\x1ee57\\x1ee59\\x1ee5b\\x1ee5d\\x1ee5f" - "\\x1ee61\\x1ee62\\x1ee64\\x1ee67-\\x1ee6a\\x1ee6c-\\x1ee72" - "\\x1ee74-\\x1ee77\\x1ee79-\\x1ee7c\\x1ee7e\\x1ee80-\\x1ee89" - "\\x1ee8b-\\x1ee9b\\x1eea1-\\x1eea3\\x1eea5-\\x1eea9" - "\\x1eeab-\\x1eebb\\x20000\\x2a6d6\\x2a700\\x2b734\\x2b740\\x2b81d" - "\\x2b820\\x2cea1\\x2f800-\\x2fa1d]"; - } - - static const char *letter_titlecase() - { - return "[\\x1c5\\x1c8\\x1cb\\x1f2\\x1f88-\\x1f8f\\x1f98-\\x1f9f" - "\\x1fa8-\\x1faf\\x1fbc\\x1fcc\\x1ffc]"; - } - - static const char *letter_uppercase() - { - return "[\\x41-\\x5a\\xc0-\\xd6\\xd8-\\xde\\x100\\x102\\x104\\x106" - "\\x108\\x10a\\x10c\\x10e\\x110\\x112\\x114\\x116\\x118\\x11a\\x11c" - "\\x11e\\x120\\x122\\x124\\x126\\x128\\x12a\\x12c\\x12e\\x130\\x132" - "\\x134\\x136\\x139\\x13b\\x13d\\x13f\\x141\\x143\\x145\\x147\\x14a" - "\\x14c\\x14e\\x150\\x152\\x154\\x156\\x158\\x15a\\x15c\\x15e\\x160" - "\\x162\\x164\\x166\\x168\\x16a\\x16c\\x16e\\x170\\x172\\x174\\x176" - "\\x178\\x179\\x17b\\x17d\\x181\\x182\\x184\\x186\\x187" - "\\x189-\\x18b\\x18e-\\x191\\x193\\x194\\x196-\\x198\\x19c\\x19d" - "\\x19f\\x1a0\\x1a2\\x1a4\\x1a6\\x1a7\\x1a9\\x1ac\\x1ae\\x1af" - "\\x1b1-\\x1b3\\x1b5\\x1b7\\x1b8\\x1bc\\x1c4\\x1c7\\x1ca\\x1cd" - "\\x1cf\\x1d1\\x1d3\\x1d5\\x1d7\\x1d9\\x1db\\x1de\\x1e0\\x1e2\\x1e4" - "\\x1e6\\x1e8\\x1ea\\x1ec\\x1ee\\x1f1\\x1f4\\x1f6-\\x1f8\\x1fa" - "\\x1fc\\x1fe\\x200\\x202\\x204\\x206\\x208\\x20a\\x20c\\x20e\\x210" - "\\x212\\x214\\x216\\x218\\x21a\\x21c\\x21e\\x220\\x222\\x224\\x226" - "\\x228\\x22a\\x22c\\x22e\\x230\\x232\\x23a\\x23b\\x23d\\x23e\\x241" - "\\x243-\\x246\\x248\\x24a\\x24c\\x24e\\x370\\x372\\x376\\x37f" - "\\x386\\x388-\\x38a\\x38c\\x38e\\x38f\\x391-\\x3a1\\x3a3-\\x3ab" - "\\x3cf\\x3d2-\\x3d4\\x3d8\\x3da\\x3dc\\x3de\\x3e0\\x3e2\\x3e4" - "\\x3e6\\x3e8\\x3ea\\x3ec\\x3ee\\x3f4\\x3f7\\x3f9\\x3fa" - "\\x3fd-\\x42f\\x460\\x462\\x464\\x466\\x468\\x46a\\x46c\\x46e" - "\\x470\\x472\\x474\\x476\\x478\\x47a\\x47c\\x47e\\x480\\x48a\\x48c" - "\\x48e\\x490\\x492\\x494\\x496\\x498\\x49a\\x49c\\x49e\\x4a0\\x4a2" - "\\x4a4\\x4a6\\x4a8\\x4aa\\x4ac\\x4ae\\x4b0\\x4b2\\x4b4\\x4b6\\x4b8" - "\\x4ba\\x4bc\\x4be\\x4c0\\x4c1\\x4c3\\x4c5\\x4c7\\x4c9\\x4cb\\x4cd" - "\\x4d0\\x4d2\\x4d4\\x4d6\\x4d8\\x4da\\x4dc\\x4de\\x4e0\\x4e2\\x4e4" - "\\x4e6\\x4e8\\x4ea\\x4ec\\x4ee\\x4f0\\x4f2\\x4f4\\x4f6\\x4f8\\x4fa" - "\\x4fc\\x4fe\\x500\\x502\\x504\\x506\\x508\\x50a\\x50c\\x50e\\x510" - "\\x512\\x514\\x516\\x518\\x51a\\x51c\\x51e\\x520\\x522\\x524\\x526" - "\\x528\\x52a\\x52c\\x52e\\x531-\\x556\\x10a0-\\x10c5\\x10c7\\x10cd" - "\\x13a0-\\x13f5\\x1e00\\x1e02\\x1e04\\x1e06\\x1e08\\x1e0a\\x1e0c" - "\\x1e0e\\x1e10\\x1e12\\x1e14\\x1e16\\x1e18\\x1e1a\\x1e1c\\x1e1e" - "\\x1e20\\x1e22\\x1e24\\x1e26\\x1e28\\x1e2a\\x1e2c\\x1e2e\\x1e30" - "\\x1e32\\x1e34\\x1e36\\x1e38\\x1e3a\\x1e3c\\x1e3e\\x1e40\\x1e42" - "\\x1e44\\x1e46\\x1e48\\x1e4a\\x1e4c\\x1e4e\\x1e50\\x1e52\\x1e54" - "\\x1e56\\x1e58\\x1e5a\\x1e5c\\x1e5e\\x1e60\\x1e62\\x1e64\\x1e66" - "\\x1e68\\x1e6a\\x1e6c\\x1e6e\\x1e70\\x1e72\\x1e74\\x1e76\\x1e78" - "\\x1e7a\\x1e7c\\x1e7e\\x1e80\\x1e82\\x1e84\\x1e86\\x1e88\\x1e8a" - "\\x1e8c\\x1e8e\\x1e90\\x1e92\\x1e94\\x1e9e\\x1ea0\\x1ea2\\x1ea4" - "\\x1ea6\\x1ea8\\x1eaa\\x1eac\\x1eae\\x1eb0\\x1eb2\\x1eb4\\x1eb6" - "\\x1eb8\\x1eba\\x1ebc\\x1ebe\\x1ec0\\x1ec2\\x1ec4\\x1ec6\\x1ec8" - "\\x1eca\\x1ecc\\x1ece\\x1ed0\\x1ed2\\x1ed4\\x1ed6\\x1ed8\\x1eda" - "\\x1edc\\x1ede\\x1ee0\\x1ee2\\x1ee4\\x1ee6\\x1ee8\\x1eea\\x1eec" - "\\x1eee\\x1ef0\\x1ef2\\x1ef4\\x1ef6\\x1ef8\\x1efa\\x1efc\\x1efe" - "\\x1f08-\\x1f0f\\x1f18-\\x1f1d\\x1f28-\\x1f2f\\x1f38-\\x1f3f" - "\\x1f48-\\x1f4d\\x1f59\\x1f5b\\x1f5d\\x1f5f\\x1f68-\\x1f6f" - "\\x1fb8-\\x1fbb\\x1fc8-\\x1fcb\\x1fd8-\\x1fdb\\x1fe8-\\x1fec" - "\\x1ff8-\\x1ffb\\x2102\\x2107\\x210b-\\x210d\\x2110-\\x2112\\x2115" - "\\x2119-\\x211d\\x2124\\x2126\\x2128\\x212a-\\x212d\\x2130-\\x2133" - "\\x213e\\x213f\\x2145\\x2183\\x2c00-\\x2c2e\\x2c60\\x2c62-\\x2c64" - "\\x2c67\\x2c69\\x2c6b\\x2c6d-\\x2c70\\x2c72\\x2c75\\x2c7e-\\x2c80" - "\\x2c82\\x2c84\\x2c86\\x2c88\\x2c8a\\x2c8c\\x2c8e\\x2c90\\x2c92" - "\\x2c94\\x2c96\\x2c98\\x2c9a\\x2c9c\\x2c9e\\x2ca0\\x2ca2\\x2ca4" - "\\x2ca6\\x2ca8\\x2caa\\x2cac\\x2cae\\x2cb0\\x2cb2\\x2cb4\\x2cb6" - "\\x2cb8\\x2cba\\x2cbc\\x2cbe\\x2cc0\\x2cc2\\x2cc4\\x2cc6\\x2cc8" - "\\x2cca\\x2ccc\\x2cce\\x2cd0\\x2cd2\\x2cd4\\x2cd6\\x2cd8\\x2cda" - "\\x2cdc\\x2cde\\x2ce0\\x2ce2\\x2ceb\\x2ced\\x2cf2\\xa640\\xa642" - "\\xa644\\xa646\\xa648\\xa64a\\xa64c\\xa64e\\xa650\\xa652\\xa654" - "\\xa656\\xa658\\xa65a\\xa65c\\xa65e\\xa660\\xa662\\xa664\\xa666" - "\\xa668\\xa66a\\xa66c\\xa680\\xa682\\xa684\\xa686\\xa688\\xa68a" - "\\xa68c\\xa68e\\xa690\\xa692\\xa694\\xa696\\xa698\\xa69a\\xa722" - "\\xa724\\xa726\\xa728\\xa72a\\xa72c\\xa72e\\xa732\\xa734\\xa736" - "\\xa738\\xa73a\\xa73c\\xa73e\\xa740\\xa742\\xa744\\xa746\\xa748" - "\\xa74a\\xa74c\\xa74e\\xa750\\xa752\\xa754\\xa756\\xa758\\xa75a" - "\\xa75c\\xa75e\\xa760\\xa762\\xa764\\xa766\\xa768\\xa76a\\xa76c" - "\\xa76e\\xa779\\xa77b\\xa77d\\xa77e\\xa780\\xa782\\xa784\\xa786" - "\\xa78b\\xa78d\\xa790\\xa792\\xa796\\xa798\\xa79a\\xa79c\\xa79e" - "\\xa7a0\\xa7a2\\xa7a4\\xa7a6\\xa7a8\\xa7aa-\\xa7ad\\xa7b0-\\xa7b4" - "\\xa7b6\\xff21-\\xff3a\\x10400-\\x10427\\x10c80-\\x10cb2" - "\\x118a0-\\x118bf\\x1d400-\\x1d419\\x1d434-\\x1d44d" - "\\x1d468-\\x1d481\\x1d49c\\x1d49e\\x1d49f\\x1d4a2\\x1d4a5\\x1d4a6" - "\\x1d4a9-\\x1d4ac\\x1d4ae-\\x1d4b5\\x1d4d0-\\x1d4e9" - "\\x1d504\\x1d505\\x1d507-\\x1d50a\\x1d50d-\\x1d514" - "\\x1d516-\\x1d51c\\x1d538\\x1d539\\x1d53b-\\x1d53e" - "\\x1d540-\\x1d544\\x1d546\\x1d54a-\\x1d550\\x1d56c-\\x1d585" - "\\x1d5a0-\\x1d5b9\\x1d5d4-\\x1d5ed\\x1d608-\\x1d621" - "\\x1d63c-\\x1d655\\x1d670-\\x1d689\\x1d6a8-\\x1d6c0" - "\\x1d6e2-\\x1d6fa\\x1d71c-\\x1d734\\x1d756-\\x1d76e" - "\\x1d790-\\x1d7a8\\x1d7ca]"; - } - - static const char *mark_combining() - { - return "[\\x903\\x93b\\x93e-\\x940\\x949-\\x94c\\x94e\\x94f\\x982\\x983" - "\\x9be-\\x9c0\\x9c7\\x9c8\\x9cb\\x9cc\\x9d7\\xa03\\xa3e-\\xa40" - "\\xa83\\xabe-\\xac0\\xac9\\xacb\\xacc\\xb02\\xb03\\xb3e\\xb40" - "\\xb47\\xb48\\xb4b\\xb4c\\xb57\\xbbe\\xbbf\\xbc1\\xbc2" - "\\xbc6-\\xbc8\\xbca-\\xbcc\\xbd7\\xc01-\\xc03\\xc41-\\xc44" - "\\xc82\\xc83\\xcbe\\xcc0-\\xcc4\\xcc7\\xcc8\\xcca\\xccb" - "\\xcd5\\xcd6\\xd02\\xd03\\xd3e-\\xd40\\xd46-\\xd48\\xd4a-\\xd4c" - "\\xd57\\xd82\\xd83\\xdcf-\\xdd1\\xdd8-\\xddf\\xdf2\\xdf3" - "\\xf3e\\xf3f\\xf7f\\x102b\\x102c\\x1031\\x1038\\x103b\\x103c" - "\\x1056\\x1057\\x1062-\\x1064\\x1067-\\x106d\\x1083\\x1084" - "\\x1087-\\x108c\\x108f\\x109a-\\x109c\\x17b6\\x17be-\\x17c5" - "\\x17c7\\x17c8\\x1923-\\x1926\\x1929-\\x192b\\x1930\\x1931" - "\\x1933-\\x1938\\x1a19\\x1a1a\\x1a55\\x1a57\\x1a61\\x1a63\\x1a64" - "\\x1a6d-\\x1a72\\x1b04\\x1b35\\x1b3b\\x1b3d-\\x1b41\\x1b43\\x1b44" - "\\x1b82\\x1ba1\\x1ba6\\x1ba7\\x1baa\\x1be7\\x1bea-\\x1bec\\x1bee" - "\\x1bf2\\x1bf3\\x1c24-\\x1c2b\\x1c34\\x1c35\\x1ce1\\x1cf2\\x1cf3" - "\\x302e\\x302f\\xa823\\xa824\\xa827\\xa880\\xa881\\xa8b4-\\xa8c3" - "\\xa952\\xa953\\xa983\\xa9b4\\xa9b5\\xa9ba\\xa9bb\\xa9bd-\\xa9c0" - "\\xaa2f\\xaa30\\xaa33\\xaa34\\xaa4d\\xaa7b\\xaa7d\\xaaeb" - "\\xaaee\\xaaef\\xaaf5\\xabe3\\xabe4\\xabe6\\xabe7\\xabe9\\xabea" - "\\xabec\\x11000\\x11002\\x11082\\x110b0-\\x110b2\\x110b7\\x110b8" - "\\x1112c\\x11182\\x111b3-\\x111b5\\x111bf\\x111c0\\x1122c-\\x1122e" - "\\x11232\\x11233\\x11235\\x112e0-\\x112e2\\x11302\\x11303" - "\\x1133e\\x1133f\\x11341-\\x11344\\x11347\\x11348\\x1134b-\\x1134d" - "\\x11357\\x11362\\x11363\\x114b0-\\x114b2\\x114b9\\x114bb-\\x114be" - "\\x114c1\\x115af-\\x115b1\\x115b8-\\x115bb\\x115be" - "\\x11630-\\x11632\\x1163b\\x1163c\\x1163e\\x116ac\\x116ae\\x116af" - "\\x116b6\\x11720\\x11721\\x11726\\x16f51-\\x16f7e\\x1d165\\x1d166" - "\\x1d16d-\\x1d172]"; - } - - static const char *mark_enclosing() - { - return "[\\x488\\x489\\x1abe\\x20dd-\\x20e0\\x20e2-\\x20e4" - "\\xa670-\\xa672]"; - } - - static const char *mark_nonspacing() - { - return "[\\x300-\\x36f\\x483-\\x487\\x591-\\x5bd\\x5bf\\x5c1\\x5c2" - "\\x5c4\\x5c5\\x5c7\\x610-\\x61a\\x64b-\\x65f\\x670\\x6d6-\\x6dc" - "\\x6df-\\x6e4\\x6e7\\x6e8\\x6ea-\\x6ed\\x711\\x730-\\x74a" - "\\x7a6-\\x7b0\\x7eb-\\x7f3\\x816-\\x819\\x81b-\\x823\\x825-\\x827" - "\\x829-\\x82d\\x859-\\x85b\\x8e3-\\x902\\x93a\\x93c\\x941-\\x948" - "\\x94d\\x951-\\x957\\x962\\x963\\x981\\x9bc\\x9c1-\\x9c4\\x9cd" - "\\x9e2\\x9e3\\xa01\\xa02\\xa3c\\xa41\\xa42\\xa47\\xa48" - "\\xa4b-\\xa4d\\xa51\\xa70\\xa71\\xa75\\xa81\\xa82\\xabc" - "\\xac1-\\xac5\\xac7\\xac8\\xacd\\xae2\\xae3\\xb01\\xb3c\\xb3f" - "\\xb41-\\xb44\\xb4d\\xb56\\xb62\\xb63\\xb82\\xbc0\\xbcd\\xc00" - "\\xc3e-\\xc40\\xc46-\\xc48\\xc4a-\\xc4d\\xc55\\xc56\\xc62\\xc63" - "\\xc81\\xcbc\\xcbf\\xcc6\\xccc\\xccd\\xce2\\xce3\\xd01" - "\\xd41-\\xd44\\xd4d\\xd62\\xd63\\xdca\\xdd2-\\xdd4\\xdd6\\xe31" - "\\xe34-\\xe3a\\xe47-\\xe4e\\xeb1\\xeb4-\\xeb9\\xebb\\xebc" - "\\xec8-\\xecd\\xf18\\xf19\\xf35\\xf37\\xf39\\xf71-\\xf7e" - "\\xf80-\\xf84\\xf86\\xf87\\xf8d-\\xf97\\xf99-\\xfbc\\xfc6" - "\\x102d-\\x1030\\x1032-\\x1037\\x1039\\x103a\\x103d\\x103e" - "\\x1058\\x1059\\x105e-\\x1060\\x1071-\\x1074\\x1082\\x1085\\x1086" - "\\x108d\\x109d\\x135d-\\x135f\\x1712-\\x1714\\x1732-\\x1734" - "\\x1752\\x1753\\x1772\\x1773\\x17b4\\x17b5\\x17b7-\\x17bd\\x17c6" - "\\x17c9-\\x17d3\\x17dd\\x180b-\\x180d\\x18a9\\x1920-\\x1922" - "\\x1927\\x1928\\x1932\\x1939-\\x193b\\x1a17\\x1a18\\x1a1b\\x1a56" - "\\x1a58-\\x1a5e\\x1a60\\x1a62\\x1a65-\\x1a6c\\x1a73-\\x1a7c\\x1a7f" - "\\x1ab0-\\x1abd\\x1b00-\\x1b03\\x1b34\\x1b36-\\x1b3a\\x1b3c\\x1b42" - "\\x1b6b-\\x1b73\\x1b80\\x1b81\\x1ba2-\\x1ba5\\x1ba8\\x1ba9" - "\\x1bab-\\x1bad\\x1be6\\x1be8\\x1be9\\x1bed\\x1bef-\\x1bf1" - "\\x1c2c-\\x1c33\\x1c36\\x1c37\\x1cd0-\\x1cd2\\x1cd4-\\x1ce0" - "\\x1ce2-\\x1ce8\\x1ced\\x1cf4\\x1cf8\\x1cf9\\x1dc0-\\x1df5" - "\\x1dfc-\\x1dff\\x20d0-\\x20dc\\x20e1\\x20e5-\\x20f0" - "\\x2cef-\\x2cf1\\x2d7f\\x2de0-\\x2dff\\x302a-\\x302d\\x3099\\x309a" - "\\xa66f\\xa674-\\xa67d\\xa69e\\xa69f\\xa6f0\\xa6f1\\xa802\\xa806" - "\\xa80b\\xa825\\xa826\\xa8c4\\xa8e0-\\xa8f1\\xa926-\\xa92d" - "\\xa947-\\xa951\\xa980-\\xa982\\xa9b3\\xa9b6-\\xa9b9\\xa9bc\\xa9e5" - "\\xaa29-\\xaa2e\\xaa31\\xaa32\\xaa35\\xaa36\\xaa43\\xaa4c\\xaa7c" - "\\xaab0\\xaab2-\\xaab4\\xaab7\\xaab8\\xaabe\\xaabf\\xaac1" - "\\xaaec\\xaaed\\xaaf6\\xabe5\\xabe8\\xabed\\xfb1e\\xfe00-\\xfe0f" - "\\xfe20-\\xfe2f\\x101fd\\x102e0\\x10376-\\x1037a\\x10a01-\\x10a03" - "\\x10a05\\x10a06\\x10a0c-\\x10a0f\\x10a38-\\x10a3a\\x10a3f" - "\\x10ae5\\x10ae6\\x11001\\x11038-\\x11046\\x1107f-\\x11081" - "\\x110b3-\\x110b6\\x110b9\\x110ba\\x11100-\\x11102" - "\\x11127-\\x1112b\\x1112d-\\x11134\\x11173\\x11180\\x11181" - "\\x111b6-\\x111be\\x111ca-\\x111cc\\x1122f-\\x11231\\x11234" - "\\x11236\\x11237\\x112df\\x112e3-\\x112ea\\x11300\\x11301\\x1133c" - "\\x11340\\x11366-\\x1136c\\x11370-\\x11374\\x114b3-\\x114b8" - "\\x114ba\\x114bf\\x114c0\\x114c2\\x114c3\\x115b2-\\x115b5" - "\\x115bc\\x115bd\\x115bf\\x115c0\\x115dc\\x115dd\\x11633-\\x1163a" - "\\x1163d\\x1163f\\x11640\\x116ab\\x116ad\\x116b0-\\x116b5\\x116b7" - "\\x1171d-\\x1171f\\x11722-\\x11725\\x11727-\\x1172b" - "\\x16af0-\\x16af4\\x16b30-\\x16b36\\x16f8f-\\x16f92" - "\\x1bc9d\\x1bc9e\\x1d167-\\x1d169\\x1d17b-\\x1d182" - "\\x1d185-\\x1d18b\\x1d1aa-\\x1d1ad\\x1d242-\\x1d244" - "\\x1da00-\\x1da36\\x1da3b-\\x1da6c\\x1da75\\x1da84" - "\\x1da9b-\\x1da9f\\x1daa1-\\x1daaf\\x1e8d0-\\x1e8d6" - "\\xe0100-\\xe01ef]"; - } - - static const char *number_decimal() - { - return "[\\x30-\\x39\\x660-\\x669\\x6f0-\\x6f9\\x7c0-\\x7c9" - "\\x966-\\x96f\\x9e6-\\x9ef\\xa66-\\xa6f\\xae6-\\xaef\\xb66-\\xb6f" - "\\xbe6-\\xbef\\xc66-\\xc6f\\xce6-\\xcef\\xd66-\\xd6f\\xde6-\\xdef" - "\\xe50-\\xe59\\xed0-\\xed9\\xf20-\\xf29\\x1040-\\x1049" - "\\x1090-\\x1099\\x17e0-\\x17e9\\x1810-\\x1819\\x1946-\\x194f" - "\\x19d0-\\x19d9\\x1a80-\\x1a89\\x1a90-\\x1a99\\x1b50-\\x1b59" - "\\x1bb0-\\x1bb9\\x1c40-\\x1c49\\x1c50-\\x1c59\\xa620-\\xa629" - "\\xa8d0-\\xa8d9\\xa900-\\xa909\\xa9d0-\\xa9d9\\xa9f0-\\xa9f9" - "\\xaa50-\\xaa59\\xabf0-\\xabf9\\xff10-\\xff19\\x104a0-\\x104a9" - "\\x11066-\\x1106f\\x110f0-\\x110f9\\x11136-\\x1113f" - "\\x111d0-\\x111d9\\x112f0-\\x112f9\\x114d0-\\x114d9" - "\\x11650-\\x11659\\x116c0-\\x116c9\\x11730-\\x11739" - "\\x118e0-\\x118e9\\x16a60-\\x16a69\\x16b50-\\x16b59" - "\\x1d7ce-\\x1d7ff]"; - } - - static const char *number_letter() - { - return "[\\x16ee-\\x16f0\\x2160-\\x2182\\x2185-\\x2188\\x3007" - "\\x3021-\\x3029\\x3038-\\x303a\\xa6e6-\\xa6ef\\x10140-\\x10174" - "\\x10341\\x1034a\\x103d1-\\x103d5\\x12400-\\x1246e]"; - } - - static const char *number_other() - { - return "[\\xb2\\xb3\\xb9\\xbc-\\xbe\\x9f4-\\x9f9\\xb72-\\xb77" - "\\xbf0-\\xbf2\\xc78-\\xc7e\\xd70-\\xd75\\xf2a-\\xf33" - "\\x1369-\\x137c\\x17f0-\\x17f9\\x19da\\x2070\\x2074-\\x2079" - "\\x2080-\\x2089\\x2150-\\x215f\\x2189\\x2460-\\x249b" - "\\x24ea-\\x24ff\\x2776-\\x2793\\x2cfd\\x3192-\\x3195" - "\\x3220-\\x3229\\x3248-\\x324f\\x3251-\\x325f\\x3280-\\x3289" - "\\x32b1-\\x32bf\\xa830-\\xa835\\x10107-\\x10133\\x10175-\\x10178" - "\\x1018a\\x1018b\\x102e1-\\x102fb\\x10320-\\x10323" - "\\x10858-\\x1085f\\x10879-\\x1087f\\x108a7-\\x108af" - "\\x108fb-\\x108ff\\x10916-\\x1091b\\x109bc\\x109bd" - "\\x109c0-\\x109cf\\x109d2-\\x109ff\\x10a40-\\x10a47" - "\\x10a7d\\x10a7e\\x10a9d-\\x10a9f\\x10aeb-\\x10aef" - "\\x10b58-\\x10b5f\\x10b78-\\x10b7f\\x10ba9-\\x10baf" - "\\x10cfa-\\x10cff\\x10e60-\\x10e7e\\x11052-\\x11065" - "\\x111e1-\\x111f4\\x1173a\\x1173b\\x118ea-\\x118f2" - "\\x16b5b-\\x16b61\\x1d360-\\x1d371\\x1e8c7-\\x1e8cf" - "\\x1f100-\\x1f10c]"; - } - - static const char *punctuation_connector() - { - return "[\\x5f\\x203f\\x2040\\x2054\\xfe33\\xfe34\\xfe4d-\\xfe4f" - "\\xff3f]"; - } - - static const char *punctuation_dash() - { - return "[\\x2d\\x58a\\x5be\\x1400\\x1806\\x2010-\\x2015\\x2e17\\x2e1a" - "\\x2e3a\\x2e3b\\x2e40\\x301c\\x3030\\x30a0\\xfe31\\xfe32\\xfe58" - "\\xfe63\\xff0d]"; - } - - static const char *punctuation_close() - { - return "[\\x29\\x5d\\x7d\\xf3b\\xf3d\\x169c\\x2046\\x207e\\x208e\\x2309" - "\\x230b\\x232a\\x2769\\x276b\\x276d\\x276f\\x2771\\x2773\\x2775" - "\\x27c6\\x27e7\\x27e9\\x27eb\\x27ed\\x27ef\\x2984\\x2986\\x2988" - "\\x298a\\x298c\\x298e\\x2990\\x2992\\x2994\\x2996\\x2998\\x29d9" - "\\x29db\\x29fd\\x2e23\\x2e25\\x2e27\\x2e29\\x3009\\x300b\\x300d" - "\\x300f\\x3011\\x3015\\x3017\\x3019\\x301b\\x301e\\x301f\\xfd3e" - "\\xfe18\\xfe36\\xfe38\\xfe3a\\xfe3c\\xfe3e\\xfe40\\xfe42\\xfe44" - "\\xfe48\\xfe5a\\xfe5c\\xfe5e\\xff09\\xff3d\\xff5d\\xff60\\xff63]"; - } - - static const char *punctuation_final() - { - return "[\\xbb\\x2019\\x201d\\x203a\\x2e03\\x2e05\\x2e0a\\x2e0d\\x2e1d" - "\\x2e21]"; - } - - static const char *punctuation_initial() - { - return "[\\xab\\x2018\\x201b\\x201c\\x201f\\x2039\\x2e02\\x2e04\\x2e09" - "\\x2e0c\\x2e1c\\x2e20]"; - } - - static const char *punctuation_other() - { - return "[\\x21-\\x23\\x25-\\x27\\x2a\\x2c\\x2e\\x2f\\x3a\\x3b\\x3f\\x40" - "\\x5c\\xa1\\xa7\\xb6\\xb7\\xbf\\x37e\\x387\\x55a-\\x55f\\x589" - "\\x5c0\\x5c3\\x5c6\\x5f3\\x5f4\\x609\\x60a\\x60c\\x60d\\x61b" - "\\x61e\\x61f\\x66a-\\x66d\\x6d4\\x700-\\x70d\\x7f7-\\x7f9" - "\\x830-\\x83e\\x85e\\x964\\x965\\x970\\xaf0\\xdf4\\xe4f" - "\\xe5a\\xe5b\\xf04-\\xf12\\xf14\\xf85\\xfd0-\\xfd4\\xfd9\\xfda" - "\\x104a-\\x104f\\x10fb\\x1360-\\x1368\\x166d\\x166e\\x16eb-\\x16ed" - "\\x1735\\x1736\\x17d4-\\x17d6\\x17d8-\\x17da\\x1800-\\x1805" - "\\x1807-\\x180a\\x1944\\x1945\\x1a1e\\x1a1f\\x1aa0-\\x1aa6" - "\\x1aa8-\\x1aad\\x1b5a-\\x1b60\\x1bfc-\\x1bff\\x1c3b-\\x1c3f" - "\\x1c7e\\x1c7f\\x1cc0-\\x1cc7\\x1cd3\\x2016\\x2017\\x2020-\\x2027" - "\\x2030-\\x2038\\x203b-\\x203e\\x2041-\\x2043\\x2047-\\x2051" - "\\x2053\\x2055-\\x205e\\x2cf9-\\x2cfc\\x2cfe\\x2cff\\x2d70" - "\\x2e00\\x2e01\\x2e06-\\x2e08\\x2e0b\\x2e0e-\\x2e16\\x2e18\\x2e19" - "\\x2e1b\\x2e1e\\x2e1f\\x2e2a-\\x2e2e\\x2e30-\\x2e39\\x2e3c-\\x2e3f" - "\\x2e41\\x3001-\\x3003\\x303d\\x30fb\\xa4fe\\xa4ff\\xa60d-\\xa60f" - "\\xa673\\xa67e\\xa6f2-\\xa6f7\\xa874-\\xa877\\xa8ce\\xa8cf" - "\\xa8f8-\\xa8fa\\xa8fc\\xa92e\\xa92f\\xa95f\\xa9c1-\\xa9cd" - "\\xa9de\\xa9df\\xaa5c-\\xaa5f\\xaade\\xaadf\\xaaf0\\xaaf1\\xabeb" - "\\xfe10-\\xfe16\\xfe19\\xfe30\\xfe45\\xfe46\\xfe49-\\xfe4c" - "\\xfe50-\\xfe52\\xfe54-\\xfe57\\xfe5f-\\xfe61\\xfe68\\xfe6a\\xfe6b" - "\\xff01-\\xff03\\xff05-\\xff07\\xff0a\\xff0c\\xff0e\\xff0f" - "\\xff1a\\xff1b\\xff1f\\xff20\\xff3c\\xff61\\xff64\\xff65" - "\\x10100-\\x10102\\x1039f\\x103d0\\x1056f\\x10857\\x1091f\\x1093f" - "\\x10a50-\\x10a58\\x10a7f\\x10af0-\\x10af6\\x10b39-\\x10b3f" - "\\x10b99-\\x10b9c\\x11047-\\x1104d\\x110bb\\x110bc" - "\\x110be-\\x110c1\\x11140-\\x11143\\x11174\\x11175" - "\\x111c5-\\x111c9\\x111cd\\x111db\\x111dd-\\x111df" - "\\x11238-\\x1123d\\x112a9\\x114c6\\x115c1-\\x115d7" - "\\x11641-\\x11643\\x1173c-\\x1173e\\x12470-\\x12474" - "\\x16a6e\\x16a6f\\x16af5\\x16b37-\\x16b3b\\x16b44\\x1bc9f" - "\\x1da87-\\x1da8b]"; - } - - static const char *punctuation_open() - { - return "[\\x28\\x5b\\x7b\\xf3a\\xf3c\\x169b\\x201a\\x201e\\x2045\\x207d" - "\\x208d\\x2308\\x230a\\x2329\\x2768\\x276a\\x276c\\x276e\\x2770" - "\\x2772\\x2774\\x27c5\\x27e6\\x27e8\\x27ea\\x27ec\\x27ee\\x2983" - "\\x2985\\x2987\\x2989\\x298b\\x298d\\x298f\\x2991\\x2993\\x2995" - "\\x2997\\x29d8\\x29da\\x29fc\\x2e22\\x2e24\\x2e26\\x2e28\\x2e42" - "\\x3008\\x300a\\x300c\\x300e\\x3010\\x3014\\x3016\\x3018\\x301a" - "\\x301d\\xfd3f\\xfe17\\xfe35\\xfe37\\xfe39\\xfe3b\\xfe3d\\xfe3f" - "\\xfe41\\xfe43\\xfe47\\xfe59\\xfe5b\\xfe5d\\xff08\\xff3b\\xff5b" - "\\xff5f\\xff62]"; - } - - static const char *symbol_currency() - { - return "[\\x24\\xa2-\\xa5\\x58f\\x60b\\x9f2\\x9f3\\x9fb\\xaf1\\xbf9" - "\\xe3f\\x17db\\x20a0-\\x20be\\xa838\\xfdfc\\xfe69\\xff04" - "\\xffe0\\xffe1\\xffe5\\xffe6]"; - } - - static const char *symbol_modifier() - { - return "[\\x5e\\x60\\xa8\\xaf\\xb4\\xb8\\x2c2-\\x2c5\\x2d2-\\x2df" - "\\x2e5-\\x2eb\\x2ed\\x2ef-\\x2ff\\x375\\x384\\x385\\x1fbd" - "\\x1fbf-\\x1fc1\\x1fcd-\\x1fcf\\x1fdd-\\x1fdf\\x1fed-\\x1fef" - "\\x1ffd\\x1ffe\\x309b\\x309c\\xa700-\\xa716\\xa720\\xa721" - "\\xa789\\xa78a\\xab5b\\xfbb2-\\xfbc1\\xff3e\\xff40\\xffe3" - "\\x1f3fb-\\x1f3ff]"; - } - - static const char *symbol_math() - { - return "[\\x2b\\x3c-\\x3e\\x7c\\x7e\\xac\\xb1\\xd7\\xf7\\x3f6" - "\\x606-\\x608\\x2044\\x2052\\x207a-\\x207c\\x208a-\\x208c\\x2118" - "\\x2140-\\x2144\\x214b\\x2190-\\x2194\\x219a\\x219b\\x21a0\\x21a3" - "\\x21a6\\x21ae\\x21ce\\x21cf\\x21d2\\x21d4\\x21f4-\\x22ff" - "\\x2320\\x2321\\x237c\\x239b-\\x23b3\\x23dc-\\x23e1\\x25b7\\x25c1" - "\\x25f8-\\x25ff\\x266f\\x27c0-\\x27c4\\x27c7-\\x27e5" - "\\x27f0-\\x27ff\\x2900-\\x2982\\x2999-\\x29d7\\x29dc-\\x29fb" - "\\x29fe-\\x2aff\\x2b30-\\x2b44\\x2b47-\\x2b4c\\xfb29\\xfe62" - "\\xfe64-\\xfe66\\xff0b\\xff1c-\\xff1e\\xff5c\\xff5e\\xffe2" - "\\xffe9-\\xffec\\x1d6c1\\x1d6db\\x1d6fb\\x1d715\\x1d735\\x1d74f" - "\\x1d76f\\x1d789\\x1d7a9\\x1d7c3\\x1eef0\\x1eef1]"; - } - - static const char *symbol_other() - { - return "[\\xa6\\xa9\\xae\\xb0\\x482\\x58d\\x58e\\x60e\\x60f\\x6de\\x6e9" - "\\x6fd\\x6fe\\x7f6\\x9fa\\xb70\\xbf3-\\xbf8\\xbfa\\xc7f\\xd79" - "\\xf01-\\xf03\\xf13\\xf15-\\xf17\\xf1a-\\xf1f\\xf34\\xf36\\xf38" - "\\xfbe-\\xfc5\\xfc7-\\xfcc\\xfce\\xfcf\\xfd5-\\xfd8\\x109e\\x109f" - "\\x1390-\\x1399\\x1940\\x19de-\\x19ff\\x1b61-\\x1b6a" - "\\x1b74-\\x1b7c\\x2100\\x2101\\x2103-\\x2106\\x2108\\x2109\\x2114" - "\\x2116\\x2117\\x211e-\\x2123\\x2125\\x2127\\x2129\\x212e" - "\\x213a\\x213b\\x214a\\x214c\\x214d\\x214f\\x218a\\x218b" - "\\x2195-\\x2199\\x219c-\\x219f\\x21a1\\x21a2\\x21a4\\x21a5" - "\\x21a7-\\x21ad\\x21af-\\x21cd\\x21d0\\x21d1\\x21d3\\x21d5-\\x21f3" - "\\x2300-\\x2307\\x230c-\\x231f\\x2322-\\x2328\\x232b-\\x237b" - "\\x237d-\\x239a\\x23b4-\\x23db\\x23e2-\\x23fa\\x2400-\\x2426" - "\\x2440-\\x244a\\x249c-\\x24e9\\x2500-\\x25b6\\x25b8-\\x25c0" - "\\x25c2-\\x25f7\\x2600-\\x266e\\x2670-\\x2767\\x2794-\\x27bf" - "\\x2800-\\x28ff\\x2b00-\\x2b2f\\x2b45\\x2b46\\x2b4d-\\x2b73" - "\\x2b76-\\x2b95\\x2b98-\\x2bb9\\x2bbd-\\x2bc8\\x2bca-\\x2bd1" - "\\x2bec-\\x2bef\\x2ce5-\\x2cea\\x2e80-\\x2e99\\x2e9b-\\x2ef3" - "\\x2f00-\\x2fd5\\x2ff0-\\x2ffb\\x3004\\x3012\\x3013\\x3020" - "\\x3036\\x3037\\x303e\\x303f\\x3190\\x3191\\x3196-\\x319f" - "\\x31c0-\\x31e3\\x3200-\\x321e\\x322a-\\x3247\\x3250" - "\\x3260-\\x327f\\x328a-\\x32b0\\x32c0-\\x32fe\\x3300-\\x33ff" - "\\x4dc0-\\x4dff\\xa490-\\xa4c6\\xa828-\\xa82b\\xa836\\xa837\\xa839" - "\\xaa77-\\xaa79\\xfdfd\\xffe4\\xffe8\\xffed\\xffee\\xfffc\\xfffd" - "\\x10137-\\x1013f\\x10179-\\x10189\\x1018c\\x10190-\\x1019b" - "\\x101a0\\x101d0-\\x101fc\\x10877\\x10878\\x10ac8\\x1173f" - "\\x16b3c-\\x16b3f\\x16b45\\x1bc9c\\x1d000-\\x1d0f5" - "\\x1d100-\\x1d126\\x1d129-\\x1d164\\x1d16a-\\x1d16c" - "\\x1d183\\x1d184\\x1d18c-\\x1d1a9\\x1d1ae-\\x1d1e8" - "\\x1d200-\\x1d241\\x1d245\\x1d300-\\x1d356\\x1d800-\\x1d9ff" - "\\x1da37-\\x1da3a\\x1da6d-\\x1da74\\x1da76-\\x1da83" - "\\x1da85\\x1da86\\x1f000-\\x1f02b\\x1f030-\\x1f093" - "\\x1f0a0-\\x1f0ae\\x1f0b1-\\x1f0bf\\x1f0c1-\\x1f0cf" - "\\x1f0d1-\\x1f0f5\\x1f110-\\x1f12e\\x1f130-\\x1f16b" - "\\x1f170-\\x1f19a\\x1f1e6-\\x1f202\\x1f210-\\x1f23a" - "\\x1f240-\\x1f248\\x1f250\\x1f251\\x1f300-\\x1f3fa" - "\\x1f400-\\x1f579\\x1f57b-\\x1f5a3\\x1f5a5-\\x1f6d0" - "\\x1f6e0-\\x1f6ec\\x1f6f0-\\x1f6f3\\x1f700-\\x1f773" - "\\x1f780-\\x1f7d4\\x1f800-\\x1f80b\\x1f810-\\x1f847" - "\\x1f850-\\x1f859\\x1f860-\\x1f887\\x1f890-\\x1f8ad" - "\\x1f910-\\x1f918\\x1f980-\\x1f984\\x1f9c0]"; - } - - static const char *separator_line() - { - return "[\\x2028]"; - } - - static const char *separator_paragraph() - { - return "[\\x2029]"; - } - - static const char *separator_space() - { - return "[\\x20\\xa0\\x1680\\x2000-\\x200a\\x202f\\x205f\\x3000]"; - } - - template - static input_char_type decode_octal(state_type &state_) - { - std::size_t oct_ = 0; - auto ch_ = *state_._curr; - unsigned short count_ = 3; - bool eos_ = false; - - for (;;) - { - oct_ *= 8; - oct_ += ch_ - '0'; - --count_; - state_.increment(); - eos_ = state_.eos(); - - if (!count_ || eos_) break; - - ch_ = *state_._curr; - - // Don't consume invalid chars! - if (ch_ < '0' || ch_ > '7') - { - break; - } - } - - if (oct_ > static_cast(char_traits::max_val())) - { - std::ostringstream ss_; - - ss_ << "Escape \\" << std::oct << oct_ << - " is too big for the state machine char type " - "preceding index " << std::dec << state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - return static_cast(oct_); - } - - template - static input_char_type decode_control_char(state_type &state_) - { - // Skip over 'c' - state_.increment(); - - typename state_type::char_type ch_ = 0; - bool eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\c"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - else - { - if (ch_ >= 'a' && ch_ <= 'z') - { - ch_ -= 'a' - 1; - } - else if (ch_ >= 'A' && ch_ <= 'Z') - { - ch_ -= 'A' - 1; - } - else if (ch_ == '@') - { - // Apparently... - ch_ = 0; - } - else - { - std::ostringstream ss_; - - ss_ << "Invalid control char at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - - return ch_; - } - - template - static input_char_type decode_hex(state_type &state_) - { - // Skip over 'x' - state_.increment(); - - typename state_type::char_type ch_ = 0; - bool eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following \\x"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - if (!((ch_ >= '0' && ch_ <= '9') || (ch_ >= 'a' && ch_ <= 'f') || - (ch_ >= 'A' && ch_ <= 'F'))) - { - std::ostringstream ss_; - - ss_ << "Illegal char following \\x at index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - std::size_t hex_ = 0; - - do - { - hex_ *= 16; - - if (ch_ >= '0' && ch_ <= '9') - { - hex_ += ch_ - '0'; - } - else if (ch_ >= 'a' && ch_ <= 'f') - { - hex_ += 10 + (ch_ - 'a'); - } - else - { - hex_ += 10 + (ch_ - 'A'); - } - - eos_ = state_.eos(); - - if (!eos_) - { - ch_ = *state_._curr; - - // Don't consume invalid chars! - if (((ch_ >= '0' && ch_ <= '9') || - (ch_ >= 'a' && ch_ <= 'f') || (ch_ >= 'A' && ch_ <= 'F'))) - { - state_.increment(); - } - else - { - eos_ = true; - } - } - } while (!eos_); - - if (hex_ > static_cast(char_traits::max_val())) - { - std::ostringstream ss_; - - ss_ << "Escape \\x" << std::hex << hex_ << - " is too big for the state machine char type " << - "preceding index " << - std::dec << state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - return static_cast(hex_); - } - - template - static void charset_range(const bool chset_, state_type &state_, - bool &eos_, typename state_type::char_type &ch_, - const input_char_type prev_, string_token &chars_) - { - if (chset_) - { - std::ostringstream ss_; - - ss_ << "Charset cannot form start of range preceding " - "index " << state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - eos_ = state_.next(ch_); - - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " following '-'"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - input_char_type curr_ = 0; - - if (ch_ == '\\') - { - std::size_t str_len_ = 0; - - if (escape_sequence(state_, curr_, str_len_)) - { - std::ostringstream ss_; - - ss_ << "Charset cannot form end of range preceding index " - << state_.index(); - state_.error(ss_); - throw runtime_error(ss_.str()); - } - } - else if (ch_ == '[' && !state_.eos() && *state_._curr == ':') - { - std::ostringstream ss_; - - ss_ << "POSIX char class cannot form end of range at " - "index " << state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - else - { - curr_ = ch_; - } - - eos_ = state_.next(ch_); - - // Covers preceding if and else - if (eos_) - { - std::ostringstream ss_; - - // Pointless returning index if at end of string - state_.unexpected_end(ss_); - ss_ << " (missing ']')"; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - // Use index_type as char is generally signed - // and we want to ignore signedness. - auto start_ = static_cast(prev_); - auto end_ = static_cast(curr_); - - // Semanic check - if (end_ < start_) - { - std::ostringstream ss_; - - ss_ << "Max less than Min in charset range preceding index " << - state_.index() - 1; - state_.error(ss_); - throw runtime_error(ss_.str()); - } - - // Even though ranges are used now, we still need to consider - // each character if icase is set. - if (state_._flags & icase) - { - range range_(start_, end_); - string_token folded_; - - chars_.insert(range_); - fold(range_, state_._locale, folded_, - size()); - - if (!folded_.empty()) - { - chars_.insert(folded_); - } - } - else - { - chars_.insert(range(prev_, curr_)); - } - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp b/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp deleted file mode 100644 index 1d41ea5c..00000000 --- a/YACReaderLibrary/lexertl/parser/tokeniser/re_tokeniser_state.hpp +++ /dev/null @@ -1,136 +0,0 @@ -// tokeniser_state.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_RE_TOKENISER_STATE_HPP -#define LEXERTL_RE_TOKENISER_STATE_HPP - -#include "../../char_traits.hpp" -#include "../../enums.hpp" -#include -#include "../../narrow.hpp" -#include - -namespace lexertl -{ -namespace detail -{ -template -struct basic_re_tokeniser_state -{ - using char_type = ch_type; - using index_type = typename basic_char_traits::index_type; - - const char_type * const _start; - const char_type * const _end; - const char_type *_curr; - id_type _id; - std::size_t _flags; - std::stack _flags_stack; - std::locale _locale; - const char_type *_macro_name; - long _paren_count; - bool _in_string; - id_type _nl_id; - - basic_re_tokeniser_state(const char_type *start_, - const char_type * const end_, id_type id_, const std::size_t flags_, - const std::locale locale_, const char_type *macro_name_) : - _start(start_), - _end(end_), - _curr(start_), - _id(id_), - _flags(flags_), - _flags_stack(), - _locale(locale_), - _macro_name(macro_name_), - _paren_count(0), - _in_string(false), - _nl_id(static_cast(~0)) - { - } - - basic_re_tokeniser_state(const basic_re_tokeniser_state &rhs_) - { - assign(rhs_); - } - - // prevent VC++ 7.1 warning: - const basic_re_tokeniser_state &operator = - (const basic_re_tokeniser_state &rhs_) - { - return assign(rhs_); - } - - basic_re_tokeniser_state &assign(const basic_re_tokeniser_state &rhs_) - { - _start = rhs_._start; - _end = rhs_._end; - _curr = rhs_._curr; - _id = rhs_._id; - _flags = rhs_._flags; - _flags_stack = rhs_._flags_stack; - _locale = rhs_._locale; - _macro_name = rhs_._macro_name; - _paren_count = rhs_._paren_count; - _in_string = rhs_._in_string; - _nl_id = rhs_._nl_id; - return *this; - } - - inline bool next(char_type &ch_) - { - if (_curr >= _end) - { - ch_ = 0; - return true; - } - else - { - ch_ = *_curr; - increment(); - return false; - } - } - - inline void increment() - { - ++_curr; - } - - inline std::size_t index() - { - return _curr - _start; - } - - inline bool eos() - { - return _curr >= _end; - } - - inline void unexpected_end(std::ostringstream &ss_) - { - ss_ << "Unexpected end of regex"; - } - - inline void error(std::ostringstream &ss_) - { - ss_ << " in "; - - if (_macro_name) - { - ss_ << "MACRO '"; - narrow(_macro_name, ss_); - ss_ << "'."; - } - else - { - ss_ << "rule id " << _id << '.'; - } - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/end_node.hpp b/YACReaderLibrary/lexertl/parser/tree/end_node.hpp deleted file mode 100644 index c485fca5..00000000 --- a/YACReaderLibrary/lexertl/parser/tree/end_node.hpp +++ /dev/null @@ -1,111 +0,0 @@ -// end_node.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_END_NODE_HPP -#define LEXERTL_END_NODE_HPP - -#include "node.hpp" - -namespace lexertl -{ -namespace detail -{ -template -class basic_end_node : public basic_node -{ -public: - using node = basic_node; - using bool_stack = typename node::bool_stack; - using const_node_stack = typename node::const_node_stack; - using node_ptr_vector = typename node::node_ptr_vector; - using node_stack = typename node::node_stack; - using node_type = typename node::node_type; - using node_vector = typename node::node_vector; - - basic_end_node(const id_type id_, const id_type user_id_, - const id_type next_dfa_, const id_type push_dfa_, - const bool pop_dfa_) : - node(false), - _id(id_), - _user_id(user_id_), - _next_dfa(next_dfa_), - _push_dfa(push_dfa_), - _pop_dfa(pop_dfa_), - _followpos() - { - node::_firstpos.push_back(this); - node::_lastpos.push_back(this); - } - - virtual ~basic_end_node() override - { - } - - virtual node_type what_type() const override - { - return node::END; - } - - virtual bool traverse(const_node_stack &/*node_stack_*/, - bool_stack &/*perform_op_stack_*/) const override - { - return false; - } - - virtual const node_vector &followpos() const override - { - // _followpos is always empty..! - return _followpos; - } - - virtual bool end_state() const override - { - return true; - } - - virtual id_type id() const override - { - return _id; - } - - virtual id_type user_id() const override - { - return _user_id; - } - - virtual id_type next_dfa() const override - { - return _next_dfa; - } - - virtual id_type push_dfa() const override - { - return _push_dfa; - } - - virtual bool pop_dfa() const override - { - return _pop_dfa; - } - -private: - id_type _id; - id_type _user_id; - id_type _next_dfa; - id_type _push_dfa; - bool _pop_dfa; - node_vector _followpos; - - virtual void copy_node(node_ptr_vector &/*node_ptr_vector_*/, - node_stack &/*new_node_stack_*/, bool_stack &/*perform_op_stack_*/, - bool &/*down_*/) const override - { - // Nothing to do, as end_nodes are not copied. - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp b/YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp deleted file mode 100644 index 41baba7e..00000000 --- a/YACReaderLibrary/lexertl/parser/tree/iteration_node.hpp +++ /dev/null @@ -1,96 +0,0 @@ -// iteration_node.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_ITERATION_NODE_HPP -#define LEXERTL_ITERATION_NODE_HPP - -#include "node.hpp" - -namespace lexertl -{ -namespace detail -{ -template -class basic_iteration_node : public basic_node -{ -public: - using node = basic_node; - using bool_stack = typename node::bool_stack; - using const_node_stack = typename node::const_node_stack; - using node_ptr_vector = typename node::node_ptr_vector; - using node_stack = typename node::node_stack; - using node_type = typename node::node_type; - using node_vector = typename node::node_vector; - - basic_iteration_node(observer_ptr next_, const bool greedy_) : - node(true), - _next(next_), - _greedy(greedy_) - { - _next->append_firstpos(node::_firstpos); - _next->append_lastpos(node::_lastpos); - - for (observer_ptr node_ : node::_lastpos) - { - node_->append_followpos(node::_firstpos); - } - - for (observer_ptr node_ : node::_firstpos) - { - node_->greedy(greedy_); - } - } - - virtual ~basic_iteration_node() override - { - } - - virtual node_type what_type() const override - { - return node::ITERATION; - } - - virtual bool traverse(const_node_stack &node_stack_, - bool_stack &perform_op_stack_) const override - { - perform_op_stack_.push(true); - node_stack_.push(_next); - return true; - } - -private: - observer_ptr _next; - bool _greedy; - - virtual void copy_node(node_ptr_vector &node_ptr_vector_, - node_stack &new_node_stack_, bool_stack &perform_op_stack_, - bool &down_) const override - { - if (perform_op_stack_.top()) - { - observer_ptr ptr_ = new_node_stack_.top(); - - node_ptr_vector_.emplace_back - (std::make_unique(ptr_, _greedy)); - new_node_stack_.top() = node_ptr_vector_.back().get(); - } - else - { - down_ = true; - } - - perform_op_stack_.pop(); - } - - // No copy construction. - basic_iteration_node(const basic_iteration_node &) = delete; - // No assignment. - const basic_iteration_node &operator = - (const basic_iteration_node &) = delete; -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp b/YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp deleted file mode 100644 index ef1b485e..00000000 --- a/YACReaderLibrary/lexertl/parser/tree/leaf_node.hpp +++ /dev/null @@ -1,110 +0,0 @@ -// leaf_node.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_LEAF_NODE_HPP -#define LEXERTL_LEAF_NODE_HPP - -#include "../../enums.hpp" // null_token -#include "node.hpp" - -namespace lexertl -{ -namespace detail -{ -template -class basic_leaf_node : public basic_node -{ -public: - using node = basic_node; - using bool_stack = typename node::bool_stack; - using const_node_stack = typename node::const_node_stack; - using node_ptr_vector = typename node::node_ptr_vector; - using node_stack = typename node::node_stack; - using node_type = typename node::node_type; - using node_vector = typename node::node_vector; - - basic_leaf_node(const id_type token_, const bool greedy_) : - node(token_ == node::null_token()), - _token(token_), - _set_greedy(!greedy_), - _greedy(greedy_), - _followpos() - { - if (!node::_nullable) - { - node::_firstpos.push_back(this); - node::_lastpos.push_back(this); - } - } - - virtual ~basic_leaf_node() override - { - } - - virtual void append_followpos(const node_vector &followpos_) override - { - _followpos.insert(_followpos.end(), - followpos_.begin(), followpos_.end()); - } - - virtual node_type what_type() const override - { - return node::LEAF; - } - - virtual bool traverse(const_node_stack &/*node_stack_*/, - bool_stack &/*perform_op_stack_*/) const override - { - return false; - } - - virtual id_type token() const override - { - return _token; - } - - virtual void greedy(const bool greedy_) override - { - if (!_set_greedy) - { - _greedy = greedy_; - _set_greedy = true; - } - } - - virtual bool greedy() const override - { - return _greedy; - } - - virtual const node_vector &followpos() const override - { - return _followpos; - } - - virtual node_vector &followpos() override - { - return _followpos; - } - -private: - id_type _token; - bool _set_greedy; - bool _greedy; - node_vector _followpos; - - virtual void copy_node(node_ptr_vector &node_ptr_vector_, - node_stack &new_node_stack_, bool_stack &/*perform_op_stack_*/, - bool &/*down_*/) const override - { - node_ptr_vector_.emplace_back(std::make_unique - (_token, _greedy)); - new_node_stack_.push(node_ptr_vector_.back().get()); - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/node.hpp b/YACReaderLibrary/lexertl/parser/tree/node.hpp deleted file mode 100644 index cee7729a..00000000 --- a/YACReaderLibrary/lexertl/parser/tree/node.hpp +++ /dev/null @@ -1,242 +0,0 @@ -// node.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_NODE_HPP -#define LEXERTL_NODE_HPP - -#include -#include -#include "../../observer_ptr.hpp" -#include "../../runtime_error.hpp" -#include -#include - -namespace lexertl -{ -namespace detail -{ -template -class basic_node -{ -public: - enum node_type {LEAF, SEQUENCE, SELECTION, ITERATION, END}; - - using bool_stack = std::stack; - using node_stack = std::stack>; - using const_node_stack = std::stack>; - using node_vector = std::vector>; - using node_ptr_vector = std::vector>; - - basic_node() : - _nullable(false), - _firstpos(), - _lastpos() - { - } - - basic_node(const bool nullable_) : - _nullable(nullable_), - _firstpos(), - _lastpos() - { - } - - virtual ~basic_node() - { - } - - static id_type null_token() - { - return static_cast(~0); - } - - bool nullable() const - { - return _nullable; - } - - void append_firstpos(node_vector &firstpos_) const - { - firstpos_.insert(firstpos_.end(), - _firstpos.begin(), _firstpos.end()); - } - - void append_lastpos(node_vector &lastpos_) const - { - lastpos_.insert(lastpos_.end(), - _lastpos.begin(), _lastpos.end()); - } - - virtual void append_followpos(const node_vector &/*followpos_*/) - { - throw runtime_error("Internal error node::append_followpos()."); - } - - observer_ptr copy(node_ptr_vector &node_ptr_vector_) const - { - observer_ptr new_root_ = nullptr; - const_node_stack node_stack_; - bool_stack perform_op_stack_; - bool down_ = true; - node_stack new_node_stack_; - - node_stack_.push(this); - - while (!node_stack_.empty()) - { - while (down_) - { - down_ = node_stack_.top()->traverse(node_stack_, - perform_op_stack_); - } - - while (!down_ && !node_stack_.empty()) - { - observer_ptr top_ = node_stack_.top(); - - top_->copy_node(node_ptr_vector_, new_node_stack_, - perform_op_stack_, down_); - - if (!down_) node_stack_.pop(); - } - } - - assert(new_node_stack_.size() == 1); - new_root_ = new_node_stack_.top(); - new_node_stack_.pop(); - return new_root_; - } - - virtual node_type what_type() const = 0; - - virtual bool traverse(const_node_stack &node_stack_, - bool_stack &perform_op_stack_) const = 0; - - node_vector &firstpos() - { - return _firstpos; - } - - const node_vector &firstpos() const - { - return _firstpos; - } - - // _lastpos modified externally, so not const & - node_vector &lastpos() - { - return _lastpos; - } - - virtual bool end_state() const - { - return false; - } - - virtual id_type id() const - { - throw runtime_error("Internal error node::id()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return id_type(); -#endif - } - - virtual id_type user_id() const - { - throw runtime_error("Internal error node::user_id()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return id_type(); -#endif - } - - virtual id_type next_dfa() const - { - throw runtime_error("Internal error node::next_dfa()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return id_type(); -#endif - } - - virtual id_type push_dfa() const - { - throw runtime_error("Internal error node::push_dfa()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return id_type(); -#endif - } - - virtual bool pop_dfa() const - { - throw runtime_error("Internal error node::pop_dfa()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return false; -#endif - } - - virtual id_type token() const - { - throw runtime_error("Internal error node::token()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return id_type(); -#endif - } - - virtual void greedy(const bool /*greedy_*/) - { - throw runtime_error("Internal error node::greedy(bool)."); - } - - virtual bool greedy() const - { - throw runtime_error("Internal error node::greedy()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return false; -#endif - } - - virtual const node_vector &followpos() const - { - throw runtime_error("Internal error node::followpos()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return firstpos; -#endif - } - - virtual node_vector &followpos() - { - throw runtime_error("Internal error node::followpos()."); -#ifdef __SUNPRO_CC - // Stop bogus Solaris compiler warning - return firstpos; -#endif - } - -protected: - const bool _nullable; - node_vector _firstpos; - node_vector _lastpos; - - virtual void copy_node(node_ptr_vector &node_ptr_vector_, - node_stack &new_node_stack_, bool_stack &perform_op_stack_, - bool &down_) const = 0; - -private: - // No copy construction. - basic_node(const basic_node &) = delete; - // No assignment. - const basic_node &operator =(const basic_node &) = delete; -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/selection_node.hpp b/YACReaderLibrary/lexertl/parser/tree/selection_node.hpp deleted file mode 100644 index 603bbc68..00000000 --- a/YACReaderLibrary/lexertl/parser/tree/selection_node.hpp +++ /dev/null @@ -1,104 +0,0 @@ -// selection_node.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_SELECTION_NODE_HPP -#define LEXERTL_SELECTION_NODE_HPP - -#include "node.hpp" - -namespace lexertl -{ -namespace detail -{ -template -class basic_selection_node : public basic_node -{ -public: - using node = basic_node; - using bool_stack = typename node::bool_stack; - using const_node_stack = typename node::const_node_stack; - using node_ptr_vector = typename node::node_ptr_vector; - using node_stack = typename node::node_stack; - using node_type = typename node::node_type; - - basic_selection_node(observer_ptr left_, observer_ptr right_) : - node(left_->nullable() || right_->nullable()), - _left(left_), - _right(right_) - { - _left->append_firstpos(node::_firstpos); - _right->append_firstpos(node::_firstpos); - _left->append_lastpos(node::_lastpos); - _right->append_lastpos(node::_lastpos); - } - - virtual ~basic_selection_node() override - { - } - - virtual node_type what_type() const override - { - return node::SELECTION; - } - - virtual bool traverse(const_node_stack &node_stack_, - bool_stack &perform_op_stack_) const override - { - perform_op_stack_.push(true); - - switch (_right->what_type()) - { - case node::SEQUENCE: - case node::SELECTION: - case node::ITERATION: - perform_op_stack_.push(false); - break; - default: - break; - } - - node_stack_.push(_right); - node_stack_.push(_left); - return true; - } - -private: - observer_ptr _left; - observer_ptr _right; - - virtual void copy_node(node_ptr_vector &node_ptr_vector_, - node_stack &new_node_stack_, bool_stack &perform_op_stack_, - bool &down_) const override - { - if (perform_op_stack_.top()) - { - observer_ptr rhs_ = new_node_stack_.top(); - - new_node_stack_.pop(); - - observer_ptr lhs_ = new_node_stack_.top(); - - node_ptr_vector_.emplace_back - (std::make_unique(lhs_, rhs_)); - new_node_stack_.top() = node_ptr_vector_.back().get(); - } - else - { - down_ = true; - } - - perform_op_stack_.pop(); - } - - // No copy construction. - basic_selection_node(const basic_selection_node &) = delete; - // No assignment. - const basic_selection_node &operator = - (const basic_selection_node &) = delete; -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp b/YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp deleted file mode 100644 index 22276735..00000000 --- a/YACReaderLibrary/lexertl/parser/tree/sequence_node.hpp +++ /dev/null @@ -1,121 +0,0 @@ -// sequence_node.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_SEQUENCE_NODE_HPP -#define LEXERTL_SEQUENCE_NODE_HPP - -#include "node.hpp" - -namespace lexertl -{ -namespace detail -{ -template -class basic_sequence_node : public basic_node -{ -public: - using node = basic_node; - using bool_stack = typename node::bool_stack; - using const_node_stack = typename node::const_node_stack; - using node_ptr_vector = typename node::node_ptr_vector; - using node_stack = typename node::node_stack; - using node_type = typename node::node_type; - using node_vector = typename node::node_vector; - - basic_sequence_node(observer_ptr left_, observer_ptr right_) : - node(left_->nullable() && right_->nullable()), - _left(left_), - _right(right_) - { - _left->append_firstpos(node::_firstpos); - - if (_left->nullable()) - { - _right->append_firstpos(node::_firstpos); - } - - if (_right->nullable()) - { - _left->append_lastpos(node::_lastpos); - } - - _right->append_lastpos(node::_lastpos); - - node_vector &lastpos_ = _left->lastpos(); - const node_vector &firstpos_ = _right->firstpos(); - - for (observer_ptr node_ : lastpos_) - { - node_->append_followpos(firstpos_); - } - } - - virtual ~basic_sequence_node() override - { - } - - virtual node_type what_type() const override - { - return node::SEQUENCE; - } - - virtual bool traverse(const_node_stack &node_stack_, - bool_stack &perform_op_stack_) const override - { - perform_op_stack_.push(true); - - switch (_right->what_type()) - { - case node::SEQUENCE: - case node::SELECTION: - case node::ITERATION: - perform_op_stack_.push(false); - break; - default: - break; - } - - node_stack_.push(_right); - node_stack_.push(_left); - return true; - } - -private: - observer_ptr _left; - observer_ptr _right; - - virtual void copy_node(node_ptr_vector &node_ptr_vector_, - node_stack &new_node_stack_, bool_stack &perform_op_stack_, - bool &down_) const override - { - if (perform_op_stack_.top()) - { - observer_ptr rhs_ = new_node_stack_.top(); - - new_node_stack_.pop(); - - observer_ptr lhs_ = new_node_stack_.top(); - - node_ptr_vector_.emplace_back - (std::make_unique(lhs_, rhs_)); - new_node_stack_.top() = node_ptr_vector_.back().get(); - } - else - { - down_ = true; - } - - perform_op_stack_.pop(); - } - - // No copy construction. - basic_sequence_node(const basic_sequence_node &) = delete; - // No assignment. - const basic_sequence_node &operator =(const basic_sequence_node &) = delete; -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/partition/charset.hpp b/YACReaderLibrary/lexertl/partition/charset.hpp deleted file mode 100644 index 2bfbd335..00000000 --- a/YACReaderLibrary/lexertl/partition/charset.hpp +++ /dev/null @@ -1,72 +0,0 @@ -// charset.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_CHARSET_HPP -#define LEXERTL_CHARSET_HPP - -#include -#include -#include -#include "../string_token.hpp" - -namespace lexertl -{ -namespace detail -{ -template -struct basic_charset -{ - using token = basic_string_token; - using index_set = std::set; - - token _token; - index_set _index_set; - - basic_charset() : - _token(), - _index_set() - { - } - - basic_charset(const token &token_, const id_type index_) : - _token(token_), - _index_set() - { - _index_set.insert(index_); - } - - bool empty() const - { - return _token.empty() && _index_set.empty(); - } - - void intersect(basic_charset &rhs_, basic_charset &overlap_) - { - _token.intersect(rhs_._token, overlap_._token); - - if (!overlap_._token.empty()) - { - std::merge(_index_set.begin(), _index_set.end(), - rhs_._index_set.begin(), rhs_._index_set.end(), - std::inserter(overlap_._index_set, - overlap_._index_set.end())); - - if (_token.empty()) - { - _index_set.clear(); - } - - if (rhs_._token.empty()) - { - rhs_._index_set.clear(); - } - } - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/partition/equivset.hpp b/YACReaderLibrary/lexertl/partition/equivset.hpp deleted file mode 100644 index af709b03..00000000 --- a/YACReaderLibrary/lexertl/partition/equivset.hpp +++ /dev/null @@ -1,135 +0,0 @@ -// equivset.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_EQUIVSET_HPP -#define LEXERTL_EQUIVSET_HPP - -#include -#include "../parser/tree/node.hpp" -#include - -namespace lexertl -{ -namespace detail -{ -template -struct basic_equivset -{ - using index_set = std::set; - using index_vector = std::vector; - using node = basic_node; - using node_vector = std::vector>; - - index_vector _index_vector; - id_type _id; - bool _greedy; - node_vector _followpos; - - basic_equivset() : - _index_vector(), - _id(0), - _greedy(true), - _followpos() - { - } - - basic_equivset(const index_set &index_set_, const id_type id_, - const bool greedy_, const node_vector &followpos_) : - _index_vector(index_set_.begin(), index_set_.end()), - _id(id_), - _greedy(greedy_), - _followpos(followpos_) - { - } - - bool empty() const - { - return _index_vector.empty() && _followpos.empty(); - } - - void intersect(basic_equivset &rhs_, basic_equivset &overlap_) - { - intersect_indexes(rhs_._index_vector, overlap_._index_vector); - - if (!overlap_._index_vector.empty()) - { - // Note that the LHS takes priority in order to - // respect rule ordering priority in the lex spec. - overlap_._id = _id; - overlap_._greedy = _greedy; - overlap_._followpos = _followpos; - - auto overlap_begin_ = overlap_._followpos.cbegin(); - auto overlap_end_ = overlap_._followpos.cend(); - - for (observer_ptr node_ : rhs_._followpos) - { - if (std::find(overlap_begin_, overlap_end_, node_) == - overlap_end_) - { - overlap_._followpos.push_back(node_); - overlap_begin_ = overlap_._followpos.begin(); - overlap_end_ = overlap_._followpos.end(); - } - } - - if (_index_vector.empty()) - { - _followpos.clear(); - } - - if (rhs_._index_vector.empty()) - { - rhs_._followpos.clear(); - } - } - } - -private: - void intersect_indexes(index_vector &rhs_, index_vector &overlap_) - { - std::set_intersection(_index_vector.begin(), _index_vector.end(), - rhs_.begin(), rhs_.end(), std::back_inserter(overlap_)); - - if (!overlap_.empty()) - { - remove(overlap_, _index_vector); - remove(overlap_, rhs_); - } - } - - void remove(const index_vector &source_, index_vector &dest_) - { - auto inter_ = source_.begin(); - auto inter_end_ = source_.end(); - auto reader_ = std::find(dest_.begin(), dest_.end(), *inter_); - auto writer_ = reader_; - auto dest_end_ = dest_.end(); - - while (writer_ != dest_end_ && inter_ != inter_end_) - { - if (*reader_ == *inter_) - { - ++inter_; - ++reader_; - } - else - { - *writer_++ = *reader_++; - } - } - - while (reader_ != dest_end_) - { - *writer_++ = *reader_++; - } - - dest_.resize(dest_.size() - source_.size()); - } -}; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/rules.hpp b/YACReaderLibrary/lexertl/rules.hpp deleted file mode 100644 index c5b29e0b..00000000 --- a/YACReaderLibrary/lexertl/rules.hpp +++ /dev/null @@ -1,1018 +0,0 @@ -// rules.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_RULES_HPP -#define LEXERTL_RULES_HPP - -#include "enums.hpp" -#include -#include -#include "narrow.hpp" -#include "observer_ptr.hpp" -#include "parser/tokeniser/re_tokeniser.hpp" -#include "runtime_error.hpp" -#include -#include -#include -#include - -namespace lexertl -{ -template -class basic_rules -{ -public: - using bool_vector = std::vector; - using bool_vector_vector = std::vector; - using char_type = ch_type; - using rules_char_type = r_ch_type; - using id_type = id_ty; - using id_vector = std::vector; - using id_vector_vector = std::vector; - using re_state = detail::basic_re_tokeniser_state; - using string = std::basic_string; - using string_token = basic_string_token; - using string_vector = std::vector; - using string_set = std::set; - using string_pair = std::pair; - using string_id_type_map = std::map; - using string_id_type_pair = std::pair; - using token = detail::basic_re_token; - using token_vector = std::vector; - using token_vector_vector = std::vector; - using token_vector_vector_vector = std::vector; - using macro_map = std::map; - using macro_pair = std::pair; - using tokeniser = - detail::basic_re_tokeniser; - - // If you get a compile error here you have - // failed to define an unsigned id type. - static_assert(std::is_unsigned::value, "Your id type is signed"); - -#ifdef _WIN32 - basic_rules(const std::size_t flags_ = dot_not_cr_lf) : -#else - basic_rules(const std::size_t flags_ = dot_not_newline) : -#endif - _statemap(), - _macro_map(), - _regexes(), - _features(), - _ids(), - _user_ids(), - _next_dfas(), - _pushes(), - _pops(), - _flags(flags_), - _locale(), - _lexer_state_names() - { - push_state(initial()); - } - - void clear() - { - _statemap.clear(); - _macro_map.clear(); - _regexes.clear(); - _features.clear(); - _ids.clear(); - _user_ids.clear(); - _next_dfas.clear(); - _pushes.clear(); - _pops.clear(); -#ifdef _WIN32 - _flags = dot_not_cr_lf; -#else - _flags = dot_not_newline; -#endif - _locale = std::locale(); - _lexer_state_names.clear(); - push_state(initial()); - } - - void clear(const id_type dfa_) - { - if (_regexes.size() > dfa_) - { - _regexes[dfa_].clear(); - _features[dfa_] = 0; - _ids[dfa_].clear(); - _user_ids[dfa_].clear(); - _next_dfas[dfa_].clear(); - _pushes[dfa_].clear(); - _pops[dfa_].clear(); - } - } - - void flags(const std::size_t flags_) - { - _flags = flags_; - } - - std::size_t flags() const - { - return _flags; - } - - static id_type skip() - { - return static_cast(~1); - } - - id_type eoi() const - { - return 0; - } - - static id_type npos() - { - return static_cast(~0); - } - - std::locale imbue(const std::locale &locale_) - { - std::locale loc_ = _locale; - - _locale = locale_; - return loc_; - } - - const std::locale &locale() const - { - return _locale; - } - - const rules_char_type *state(const id_type index_) const - { - if (index_ == 0) - { - return initial(); - } - else - { - const id_type i_ = index_ - 1; - - if (_lexer_state_names.size() > i_) - { - return _lexer_state_names[i_].c_str(); - } - else - { - return 0; - } - } - } - - id_type state(const rules_char_type *name_) const - { - typename string_id_type_map::const_iterator iter_ = - _statemap.find(name_); - - if (iter_ == _statemap.end()) - { - return npos(); - } - else - { - return iter_->second; - } - } - - id_type push_state(const rules_char_type *name_) - { - validate(name_); - - if (_statemap.insert(string_id_type_pair(name_, - static_cast(_statemap.size()))).second) - { - _regexes.push_back(token_vector_vector()); - _features.push_back(0); - _ids.push_back(id_vector()); - _user_ids.push_back(id_vector()); - _next_dfas.push_back(id_vector()); - _pushes.push_back(id_vector()); - _pops.push_back(bool_vector()); - - if (string(name_) != initial()) - { - _lexer_state_names.push_back(name_); - } - } - else - { - return _statemap.find(name_)->second; - } - - if (_next_dfas.size() > npos()) - { - // Overflow - throw runtime_error("The data type you have chosen cannot hold " - "this many lexer start states."); - } - - // Initial is not stored, so no need to - 1. - return static_cast(_lexer_state_names.size()); - } - - void insert_macro(const rules_char_type *name_, - const rules_char_type *regex_) - { - insert_macro(name_, string(regex_)); - } - - void insert_macro(const rules_char_type *name_, - const rules_char_type *regex_start_, - const rules_char_type *regex_end_) - { - insert_macro(name_, string(regex_start_, regex_end_)); - } - - void insert_macro(const rules_char_type *name_, const string ®ex_) - { - validate(name_); - - typename macro_map::const_iterator iter_ = _macro_map.find(name_); - - if (iter_ == _macro_map.end()) - { - auto pair_ = _macro_map.insert(macro_pair(name_, token_vector())); - - tokenise(regex_, pair_.first->second, npos(), name_); - } - else - { - std::ostringstream ss_; - - ss_ << "Attempt to redefine MACRO '"; - narrow(name_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - } - - // Add rule to INITIAL - void push(const rules_char_type *regex_, const id_type id_, - const id_type user_id_ = npos()) - { - push(string(regex_), id_, user_id_); - } - - void push(const rules_char_type *regex_start_, - const rules_char_type *regex_end_, - const id_type id_, const id_type user_id_ = npos()) - { - push(string(regex_start_, regex_end_), id_, user_id_); - } - - void push(const string ®ex_, const id_type id_, - const id_type user_id_ = npos()) - { - check_for_invalid_id(id_); - _regexes.front().push_back(token_vector()); - tokenise(regex_, _regexes.front().back(), id_, 0); - - if (regex_[0] == '^') - { - _features.front() |= bol_bit; - } - - if (regex_.size() > 0 && regex_[regex_.size() - 1] == '$') - { - _features.front() |= eol_bit; - } - - if (id_ == skip()) - { - _features.front() |= skip_bit; - } - else if (id_ == eoi()) - { - _features.front() |= again_bit; - } - - _ids.front().push_back(id_); - _user_ids.front().push_back(user_id_); - _next_dfas.front().push_back(0); - _pushes.front().push_back(npos()); - _pops.front().push_back(false); - } - - // Add rule with no id - void push(const rules_char_type *curr_dfa_, - const rules_char_type *regex_, const rules_char_type *new_dfa_) - { - push(curr_dfa_, string(regex_), new_dfa_); - } - - void push(const rules_char_type *curr_dfa_, - const rules_char_type *regex_start_, const rules_char_type *regex_end_, - const rules_char_type *new_dfa_) - { - push(curr_dfa_, string(regex_start_, regex_end_), new_dfa_); - } - - void push(const rules_char_type *curr_dfa_, const string ®ex_, - const rules_char_type *new_dfa_) - { - push(curr_dfa_, regex_, eoi(), new_dfa_, false); - } - - // Add rule with id - void push(const rules_char_type *curr_dfa_, - const rules_char_type *regex_, const id_type id_, - const rules_char_type *new_dfa_, const id_type user_id_ = npos()) - { - push(curr_dfa_, string(regex_), id_, new_dfa_, user_id_); - } - - void push(const rules_char_type *curr_dfa_, - const rules_char_type *regex_start_, - const rules_char_type *regex_end_, const id_type id_, - const rules_char_type *new_dfa_, const id_type user_id_ = npos()) - { - push(curr_dfa_, string(regex_start_, regex_end_), - id_, new_dfa_, user_id_); - } - - void push(const rules_char_type *curr_dfa_, const string ®ex_, - const id_type id_, const rules_char_type *new_dfa_, - const id_type user_id_ = npos()) - { - push(curr_dfa_, regex_, id_, new_dfa_, true, user_id_); - } - - void reverse() - { - for (auto &state_ : _regexes) - { - for (auto ®ex_ : state_) - { - reverse(regex_); - } - } - - for (auto &pair_ : _macro_map) - { - reverse(pair_.second); - } - } - - const string_id_type_map &statemap() const - { - return _statemap; - } - - const token_vector_vector_vector ®exes() const - { - return _regexes; - } - - const id_vector &features() const - { - return _features; - } - - const id_vector_vector &ids() const - { - return _ids; - } - - const id_vector_vector &user_ids() const - { - return _user_ids; - } - - const id_vector_vector &next_dfas() const - { - return _next_dfas; - } - - const id_vector_vector &pushes() const - { - return _pushes; - } - - const bool_vector_vector &pops() const - { - return _pops; - } - - bool empty() const - { - bool empty_ = true; - - for (const auto ®ex_ : _regexes) - { - if (!regex_.empty()) - { - empty_ = false; - break; - } - } - - return empty_; - } - - static const rules_char_type *initial() - { - static const rules_char_type initial_ [] = - { 'I', 'N', 'I', 'T', 'I', 'A', 'L', 0 }; - - return initial_; - } - - static const rules_char_type *dot() - { - static const rules_char_type dot_ [] = { '.', 0 }; - - return dot_; - } - - static const rules_char_type *all_states() - { - static const rules_char_type star_ [] = { '*', 0 }; - - return star_; - } - -private: - string_id_type_map _statemap; - macro_map _macro_map; - token_vector_vector_vector _regexes; - id_vector _features; - id_vector_vector _ids; - id_vector_vector _user_ids; - id_vector_vector _next_dfas; - id_vector_vector _pushes; - bool_vector_vector _pops; - std::size_t _flags; - std::locale _locale; - string_vector _lexer_state_names; - - void tokenise(const string ®ex_, token_vector &tokens_, - const id_type id_, const rules_char_type *name_) - { - re_state state_(regex_.c_str(), regex_.c_str() + regex_.size(), id_, - _flags, _locale, name_); - string macro_; - rules_char_type diff_ = 0; - - tokens_.push_back(token()); - - do - { - observer_ptr lhs_ = &tokens_.back(); - token rhs_; - - tokeniser::next(*lhs_, state_, rhs_); - - if (rhs_._type != detail::DIFF && - lhs_->precedence(rhs_._type) == ' ') - { - std::ostringstream ss_; - - ss_ << "A syntax error occurred: '" << - lhs_->precedence_string() << - "' against '" << rhs_.precedence_string() << - "' preceding index " << state_.index() << - " in "; - - if (name_ != 0) - { - ss_ << "macro "; - narrow(name_, ss_); - } - else - { - ss_ << "rule id " << state_._id; - } - - ss_ << '.'; - throw runtime_error(ss_.str()); - } - - if (rhs_._type == detail::MACRO) - { - typename macro_map::const_iterator iter_ = - _macro_map.find(rhs_._extra); - - macro_ = rhs_._extra; - - if (iter_ == _macro_map.end()) - { - const rules_char_type *rhs_name_ = rhs_._extra.c_str(); - std::ostringstream ss_; - - ss_ << "Unknown MACRO name '"; - narrow(rhs_name_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - else - { - const bool multiple_ = iter_->second.size() > 3; - - if (diff_) - { - if (multiple_) - { - std::ostringstream ss_; - - ss_ << "Single CHARSET must follow {-} or {+} at " - "index " << state_.index() - 1 << " in "; - - if (name_ != 0) - { - ss_ << "macro "; - narrow(name_, ss_); - } - else - { - ss_ << "rule id " << state_._id; - } - - ss_ << '.'; - throw runtime_error(ss_.str()); - } - else - { - rhs_ = iter_->second[1]; - } - } - - // Any macro with more than one charset (or quantifiers) - // requires bracketing. - if (multiple_) - { - token open_; - - open_._type = detail::OPENPAREN; - open_._str.insert('('); - tokens_.push_back(open_); - } - - // Don't need to store token if it is diff. - if (!diff_) - { - // Don't insert BEGIN or END tokens - tokens_.insert(tokens_.end(), iter_->second.begin() + 1, - iter_->second.end() - 1); - lhs_ = &tokens_.back(); - } - - if (multiple_) - { - token close_; - - close_._type = detail::CLOSEPAREN; - close_._str.insert(')'); - tokens_.push_back(close_); - } - } - } - else if (rhs_._type == detail::DIFF) - { - if (!macro_.empty()) - { - typename macro_map::const_iterator iter_ = - _macro_map.find(macro_); - - if (iter_->second.size() > 3) - { - std::ostringstream ss_; - - ss_ << "Single CHARSET must precede {-} or {+} at " - "index " << state_.index() - 1 << " in "; - - if (name_ != 0) - { - ss_ << "macro "; - narrow(name_, ss_); - } - else - { - ss_ << "rule id " << state_._id; - } - - ss_ << '.'; - throw runtime_error(ss_.str()); - } - } - - diff_ = rhs_._extra[0]; - macro_.clear(); - continue; - } - else if (!diff_) - { - tokens_.push_back(rhs_); - lhs_ = &tokens_.back(); - macro_.clear(); - } - - // diff_ may have been set by previous conditional. - if (diff_) - { - if (rhs_._type != detail::CHARSET) - { - std::ostringstream ss_; - - ss_ << "CHARSET must follow {-} or {+} at index " << - state_.index() - 1 << " in "; - - if (name_ != 0) - { - ss_ << "macro "; - narrow(name_, ss_); - } - else - { - ss_ << "rule id " << state_._id; - } - - ss_ << '.'; - throw runtime_error(ss_.str()); - } - - switch (diff_) - { - case '-': - lhs_->_str.remove(rhs_._str); - - if (lhs_->_str.empty()) - { - std::ostringstream ss_; - - ss_ << "Empty charset created by {-} at index " << - state_.index() - 1 << " in "; - - if (name_ != 0) - { - ss_ << "macro "; - narrow(name_, ss_); - } - else - { - ss_ << "rule id " << state_._id; - } - - ss_ << '.'; - throw runtime_error(ss_.str()); - } - - break; - case '+': - lhs_->_str.insert(rhs_._str); - break; - } - - diff_ = 0; - } - } while (tokens_.back()._type != detail::END); - - if (tokens_.size() == 2) - { - std::ostringstream ss_; - - ss_ << "Empty regex in "; - - if (name_ != 0) - { - ss_ << "macro "; - narrow(name_, ss_); - } - else - { - ss_ << "rule id " << state_._id; - } - - ss_ << " is not allowed."; - throw runtime_error(ss_.str()); - } - } - - void reverse(token_vector &vector_) - { - token_vector new_vector_(vector_.size(), token()); - auto iter_ = vector_.rbegin(); - auto end_ = vector_.rend(); - auto dest_ = new_vector_.begin(); - std::stack stack_; - - for (; iter_ != end_; ++iter_, ++dest_) - { - switch (iter_->_type) - { - case detail::BEGIN: - iter_->swap(*dest_); - dest_->_type = detail::END; - break; - case detail::BOL: - iter_->swap(*dest_); - dest_->_type = detail::EOL; - break; - case detail::EOL: - iter_->swap(*dest_); - dest_->_type = detail::BOL; - break; - case detail::OPENPAREN: - iter_->swap(*dest_); - dest_->_type = detail::CLOSEPAREN; - - if (stack_.top() != end_) - { - ++dest_; - dest_->swap(*stack_.top()); - } - - stack_.pop(); - break; - case detail::CLOSEPAREN: - iter_->swap(*dest_); - dest_->_type = detail::OPENPAREN; - stack_.push(end_); - break; - case detail::OPT: - case detail::AOPT: - case detail::ZEROORMORE: - case detail::AZEROORMORE: - case detail::ONEORMORE: - case detail::AONEORMORE: - case detail::REPEATN: - case detail::AREPEATN: - { - auto temp_ = iter_ + 1; - - if (temp_->_type == detail::CLOSEPAREN) - { - stack_.push(iter_); - ++iter_; - iter_->swap(*dest_); - dest_->_type = detail::OPENPAREN; - } - else - { - dest_->swap(*temp_); - ++dest_; - dest_->swap(*iter_); - ++iter_; - } - - break; - } - case detail::END: - iter_->swap(*dest_); - dest_->_type = detail::BEGIN; - break; - default: - // detail::OR - // detail::CHARSET - iter_->swap(*dest_); - break; - } - } - - new_vector_.swap(vector_); - } - - void push(const rules_char_type *curr_dfa_, const string ®ex_, - const id_type id_, const rules_char_type *new_dfa_, - const bool check_, const id_type user_id_ = npos()) - { - const bool star_ = *curr_dfa_ == '*' && *(curr_dfa_ + 1) == 0; - const bool dot_ = *new_dfa_ == '.' && *(new_dfa_ + 1) == 0; - const bool push_ = *new_dfa_ == '>'; - const rules_char_type *push_dfa_ = nullptr; - const bool pop_ = *new_dfa_ == '<'; - - if (push_ || pop_) - { - ++new_dfa_; - } - - if (check_) - { - check_for_invalid_id(id_); - } - - if (!dot_ && !pop_) - { - const rules_char_type *temp_ = new_dfa_; - - while (*temp_ && *temp_ != ':') - { - ++temp_; - } - - if (*temp_) push_dfa_ = temp_ + 1; - - validate(new_dfa_, *temp_ ? temp_ : 0); - - if (push_dfa_) - { - validate(push_dfa_); - } - } - - // npos means pop here - id_type new_dfa_id_ = npos(); - id_type push_dfa_id_ = npos(); - typename string_id_type_map::const_iterator iter_; - auto end_ = _statemap.cend(); - id_vector next_dfas_; - - if (!dot_ && !pop_) - { - if (push_dfa_) - { - iter_ = _statemap.find(string(new_dfa_, push_dfa_ - 1)); - } - else - { - iter_ = _statemap.find(new_dfa_); - } - - if (iter_ == end_) - { - std::ostringstream ss_; - - ss_ << "Unknown state name '"; - narrow(new_dfa_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - - new_dfa_id_ = iter_->second; - - if (push_dfa_) - { - iter_ = _statemap.find(push_dfa_); - - if (iter_ == end_) - { - std::ostringstream ss_; - - ss_ << "Unknown state name '"; - narrow(push_dfa_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - - push_dfa_id_ = iter_->second; - } - } - - if (star_) - { - const std::size_t size_ = _statemap.size(); - - for (id_type i_ = 0; i_ < size_; ++i_) - { - next_dfas_.push_back(i_); - } - } - else - { - const rules_char_type *start_ = curr_dfa_; - string next_dfa_; - - while (*curr_dfa_) - { - while (*curr_dfa_ && *curr_dfa_ != ',') - { - ++curr_dfa_; - } - - next_dfa_.assign(start_, curr_dfa_); - - if (*curr_dfa_) - { - ++curr_dfa_; - start_ = curr_dfa_; - } - - validate(next_dfa_.c_str()); - iter_ = _statemap.find(next_dfa_.c_str()); - - if (iter_ == end_) - { - std::ostringstream ss_; - - ss_ << "Unknown state name '"; - curr_dfa_ = next_dfa_.c_str(); - narrow(curr_dfa_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - - next_dfas_.push_back(iter_->second); - } - } - - for (std::size_t i_ = 0, size_ = next_dfas_.size(); - i_ < size_; ++i_) - { - const id_type curr_ = next_dfas_[i_]; - - _regexes[curr_].push_back(token_vector()); - tokenise(regex_, _regexes[curr_].back(), id_, 0); - - if (regex_[0] == '^') - { - _features[curr_] |= bol_bit; - } - - if (regex_[regex_.size() - 1] == '$') - { - _features[curr_] |= eol_bit; - } - - if (id_ == skip()) - { - _features[curr_] |= skip_bit; - } - else if (id_ == eoi()) - { - _features[curr_] |= again_bit; - } - - if (push_ || pop_) - { - _features[curr_] |= recursive_bit; - } - - _ids[curr_].push_back(id_); - _user_ids[curr_].push_back(user_id_); - _next_dfas[curr_].push_back(dot_ ? curr_ : new_dfa_id_); - _pushes[curr_].push_back(push_ ? (push_dfa_ ? - push_dfa_id_ : curr_) : npos()); - _pops[curr_].push_back(pop_); - } - } - - void validate(const rules_char_type *name_, - const rules_char_type *end_ = nullptr) const - { - const rules_char_type *start_ = name_; - - if (*name_ != '_' && !(*name_ >= 'A' && *name_ <= 'Z') && - !(*name_ >= 'a' && *name_ <= 'z')) - { - std::ostringstream ss_; - - ss_ << "Invalid name '"; - narrow(name_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - else if (*name_) - { - ++name_; - } - - while (*name_ && name_ != end_) - { - if (*name_ != '_' && *name_ != '-' && - !(*name_ >= 'A' && *name_ <= 'Z') && - !(*name_ >= 'a' && *name_ <= 'z') && - !(*name_ >= '0' && *name_ <= '9')) - { - std::ostringstream ss_; - - ss_ << "Invalid name '"; - name_ = start_; - narrow(name_, ss_); - ss_ << "'."; - throw runtime_error(ss_.str()); - } - - ++name_; - } - } - - void check_for_invalid_id(const id_type id_) const - { - if (id_ == eoi()) - { - throw runtime_error("Cannot resuse the id for eoi."); - } - - if (id_ == npos()) - { - throw runtime_error("The id npos is reserved for the " - "UNKNOWN token."); - } - } -}; - -using rules = basic_rules; -using wrules = basic_rules; -using u32rules = basic_rules; -} - -#endif diff --git a/YACReaderLibrary/lexertl/runtime_error.hpp b/YACReaderLibrary/lexertl/runtime_error.hpp deleted file mode 100644 index 7c240118..00000000 --- a/YACReaderLibrary/lexertl/runtime_error.hpp +++ /dev/null @@ -1,23 +0,0 @@ -// runtime_error.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_RUNTIME_ERROR_HPP -#define LEXERTL_RUNTIME_ERROR_HPP - -#include - -namespace lexertl -{ -class runtime_error : public std::runtime_error -{ -public: - runtime_error(const std::string &what_arg_) : - std::runtime_error(what_arg_) - { - } -}; -} - -#endif diff --git a/YACReaderLibrary/lexertl/serialise.hpp b/YACReaderLibrary/lexertl/serialise.hpp deleted file mode 100644 index 931f519b..00000000 --- a/YACReaderLibrary/lexertl/serialise.hpp +++ /dev/null @@ -1,28 +0,0 @@ -// serialise.hpp -// Copyright (c) 2007-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_SERIALISE_HPP -#define LEXERTL_SERIALISE_HPP - -#include "state_machine.hpp" -#include - -namespace lexertl -{ -// IMPORTANT! This won't work if you don't enable RTTI! -template -void serialise(basic_state_machine &sm_, Archive &ar_) -{ - detail::basic_internals &internals_ = sm_.data(); - - ar_ & internals_._eoi; - ar_ & *internals_._lookup; - ar_ & internals_._dfa_alphabet; - ar_ & internals_._features; - ar_ & *internals_._dfa; -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/sm_to_csm.hpp b/YACReaderLibrary/lexertl/sm_to_csm.hpp deleted file mode 100644 index 5ffe69fe..00000000 --- a/YACReaderLibrary/lexertl/sm_to_csm.hpp +++ /dev/null @@ -1,53 +0,0 @@ -// sm_to_csm.hpp -// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_SM_TO_CSM_HPP -#define LEXERTL_SM_TO_CSM_HPP - -#include "enums.hpp" -#include "observer_ptr.hpp" -#include - -namespace lexertl -{ -template -void sm_to_csm(const sm &sm_, char_state_machine &csm_) -{ - using id_type = typename sm::traits::id_type; - using internals = typename sm::internals; - using string_token = typename char_state_machine::state::string_token; - using index_type = typename string_token::index_type; - using string_token_vector = - typename char_state_machine::string_token_vector; - const internals &internals_ = sm_.data(); - const std::size_t dfas_ = internals_._dfa.size(); - - for (id_type i_ = 0; i_ < dfas_; ++i_) - { - if (internals_._dfa_alphabet[i_] == 0) continue; - - const std::size_t alphabet_ = internals_._dfa_alphabet[i_] - - transitions_index; - string_token_vector token_vector_(alphabet_, string_token()); - observer_ptr ptr_ = &internals_._lookup[i_].front(); - - for (std::size_t c_ = 0; c_ < 256; ++c_, ++ptr_) - { - if (*ptr_ >= transitions_index) - { - string_token &token_ = token_vector_ - [*ptr_ - transitions_index]; - - token_.insert(typename string_token::range - (index_type(c_), index_type(c_))); - } - } - - csm_.append(token_vector_, internals_, i_); - } -} -} - -#endif diff --git a/YACReaderLibrary/lexertl/sm_traits.hpp b/YACReaderLibrary/lexertl/sm_traits.hpp deleted file mode 100644 index 161b29c9..00000000 --- a/YACReaderLibrary/lexertl/sm_traits.hpp +++ /dev/null @@ -1,44 +0,0 @@ -// sm_traits.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_SM_TRAITS_HPP -#define LEXERTL_SM_TRAITS_HPP - -namespace lexertl -{ -template -struct basic_sm_traits -{ - enum {char_24_bit = sizeof(ch_type) > 2, compressed = comp, lookup = look, - is_dfa = dfa_nfa}; - using input_char_type = ch_type; - using char_type = ch_type; - using id_type = sm_type; - - static id_type npos() - { - return static_cast(~0); - } -}; - -template -struct basic_sm_traits -{ - enum {char_24_bit = sizeof(ch_type) > 2, compressed = true, lookup = look, - is_dfa = dfa_nfa}; - using input_char_type = ch_type; - using char_type = unsigned char; - using id_type = sm_type; - - static id_type npos() - { - return static_cast(~0); - } -}; -} - -#endif diff --git a/YACReaderLibrary/lexertl/state_machine.hpp b/YACReaderLibrary/lexertl/state_machine.hpp deleted file mode 100644 index 76e7bc31..00000000 --- a/YACReaderLibrary/lexertl/state_machine.hpp +++ /dev/null @@ -1,521 +0,0 @@ -// state_machine.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_STATE_MACHINE_HPP -#define LEXERTL_STATE_MACHINE_HPP - -// memcmp() -#include -#include "internals.hpp" -#include -#include "observer_ptr.hpp" -#include -#include "sm_traits.hpp" -#include "string_token.hpp" - -namespace lexertl -{ -template -class basic_state_machine -{ -public: - using id_type = id_ty; - using traits = - basic_sm_traits 1), true, true>; - using internals = detail::basic_internals; - - // If you get a compile error here you have - // failed to define an unsigned id type. - static_assert(std::is_unsigned::value, "Your id type is signed"); - - basic_state_machine() : - _internals() - { - } - - void clear() - { - _internals.clear(); - } - - internals &data() - { - return _internals; - } - - const internals &data() const - { - return _internals; - } - - bool empty() const - { - return _internals.empty(); - } - - id_type eoi() const - { - return _internals._eoi; - } - - void minimise() - { - const id_type dfas_ = static_cast(_internals._dfa.size()); - - for (id_type i_ = 0; i_ < dfas_; ++i_) - { - const id_type dfa_alphabet_ = _internals._dfa_alphabet[i_]; - id_type_vector &dfa_ = _internals._dfa[i_]; - - if (dfa_alphabet_ != 0) - { - std::size_t size_ = 0; - - do - { - size_ = dfa_.size(); - minimise_dfa(dfa_alphabet_, dfa_, size_); - } while (dfa_.size() != size_); - } - } - } - - static id_type npos() - { - return static_cast(~0); - } - - static id_type skip() - { - return static_cast(~1); - } - - void swap(basic_state_machine &rhs_) - { - _internals.swap(rhs_._internals); - } - -private: - using id_type_vector = typename internals::id_type_vector; - using index_set = std::set; - internals _internals; - - void minimise_dfa(const id_type dfa_alphabet_, - id_type_vector &dfa_, std::size_t size_) - { - observer_ptr first_ = &dfa_.front(); - observer_ptr end_ = first_ + size_; - id_type index_ = 1; - id_type new_index_ = 1; - id_type_vector lookup_(size_ / dfa_alphabet_, npos()); - observer_ptr lookup_ptr_ = &lookup_.front(); - index_set index_set_; - const id_type bol_index_ = dfa_.front(); - - *lookup_ptr_ = 0; - // Only one 'jam' state, so skip it. - first_ += dfa_alphabet_; - - for (; first_ < end_; first_ += dfa_alphabet_, ++index_) - { - observer_ptr second_ = first_ + dfa_alphabet_; - - for (id_type curr_index_ = index_ + 1; second_ < end_; - ++curr_index_, second_ += dfa_alphabet_) - { - if (index_set_.find(curr_index_) != index_set_.end()) - { - continue; - } - - // Some systems have memcmp in namespace std. - using namespace std; - - if (memcmp(first_, second_, sizeof(id_type) * - dfa_alphabet_) == 0) - { - index_set_.insert(curr_index_); - lookup_ptr_[curr_index_] = new_index_; - } - } - - if (lookup_ptr_[index_] == npos()) - { - lookup_ptr_[index_] = new_index_; - ++new_index_; - } - } - - if (!index_set_.empty()) - { - observer_ptr front_ = &dfa_.front(); - id_type_vector new_dfa_(front_, front_ + dfa_alphabet_); - auto set_end_ = index_set_.cend(); - observer_ptr ptr_ = front_ + dfa_alphabet_; - observer_ptr new_ptr_ = nullptr; - - new_dfa_.resize(size_ - index_set_.size() * dfa_alphabet_, 0); - new_ptr_ = &new_dfa_.front() + dfa_alphabet_; - size_ /= dfa_alphabet_; - - if (bol_index_) - { - new_dfa_.front() = lookup_ptr_[bol_index_]; - } - - for (index_ = 1; index_ < size_; ++index_) - { - if (index_set_.find(index_) != set_end_) - { - ptr_ += dfa_alphabet_; - continue; - } - - new_ptr_[end_state_index] = ptr_[end_state_index]; - new_ptr_[id_index] = ptr_[id_index]; - new_ptr_[user_id_index] = ptr_[user_id_index]; - new_ptr_[push_dfa_index] = ptr_[push_dfa_index]; - new_ptr_[next_dfa_index] = ptr_[next_dfa_index]; - new_ptr_[eol_index] = lookup_ptr_[ptr_[eol_index]]; - new_ptr_ += transitions_index; - ptr_ += transitions_index; - - for (id_type i_ = transitions_index; i_ < dfa_alphabet_; ++i_) - { - *new_ptr_++ = lookup_ptr_[*ptr_++]; - } - } - - dfa_.swap(new_dfa_); - } - } -}; - -using state_machine = basic_state_machine; -using wstate_machine = basic_state_machine; -using u32state_machine = basic_state_machine; - -template -struct basic_char_state_machine -{ - using id_type = id_ty; - using traits = basic_sm_traits; - using internals = detail::basic_internals; - using id_type_vector = typename internals::id_type_vector; - - struct state - { - using string_token = basic_string_token; - using id_type_string_token_map = std::map; - using id_type_string_token_pair = std::pair; - enum push_pop_dfa {neither, push_dfa, pop_dfa}; - - bool _end_state; - push_pop_dfa _push_pop_dfa; - id_type _id; - id_type _user_id; - id_type _push_dfa; - id_type _next_dfa; - id_type _eol_index; - id_type_string_token_map _transitions; - - state() : - _end_state(false), - _push_pop_dfa(neither), - _id(0), - _user_id(traits::npos()), - _push_dfa(traits::npos()), - _next_dfa(0), - _eol_index(traits::npos()), - _transitions() - { - } - - bool operator ==(const state rhs_) const - { - return _end_state == rhs_._end_state && - _push_pop_dfa == rhs_._push_pop_dfa && - _id == rhs_._id && - _user_id == rhs_._user_id && - _push_dfa == rhs_._push_dfa && - _next_dfa == rhs_._next_dfa && - _eol_index == rhs_._eol_index && - _transitions == rhs_._transitions; - } - }; - - using string_token = typename state::string_token; - using state_vector = std::vector; - using string_token_vector = std::vector; - using id_type_string_token_pair = - typename state::id_type_string_token_pair; - - struct dfa - { - id_type _bol_index; - state_vector _states; - - dfa(const std::size_t size_) : - _bol_index(traits::npos()), - _states(state_vector(size_)) - { - } - - std::size_t size() const - { - return _states.size(); - } - - void swap(dfa &rhs_) - { - std::swap(_bol_index, rhs_._bol_index); - _states.swap(rhs_._states); - } - }; - - static_assert(std::is_move_assignable::value && - std::is_move_constructible::value, "dfa is not movable."); - using dfa_vector = std::vector; - - static_assert(std::is_unsigned::value, "Your id type is signed"); - dfa_vector _sm_vector; - - basic_char_state_machine() : - _sm_vector() - { - } - - void append(const string_token_vector &token_vector_, - const internals &internals_, const id_type dfa_index_) - { - const std::size_t dfa_alphabet_ = internals_._dfa_alphabet[dfa_index_]; - const std::size_t alphabet_ = dfa_alphabet_ - transitions_index; - const id_type_vector &source_dfa_ = internals_._dfa[dfa_index_]; - observer_ptr ptr_ = &source_dfa_.front(); - const std::size_t size_ = (source_dfa_.size() - dfa_alphabet_) / - dfa_alphabet_; - typename state::id_type_string_token_map::iterator trans_iter_; - - _sm_vector.push_back(dfa(size_)); - - dfa &dest_dfa_ = _sm_vector.back(); - - if (*ptr_) - { - dest_dfa_._bol_index = *ptr_ - 1; - } - - ptr_ += dfa_alphabet_; - - for (id_type i_ = 0; i_ < size_; ++i_) - { - state &state_ = dest_dfa_._states[i_]; - - state_._end_state = ptr_[end_state_index] != 0; - - if (ptr_[push_dfa_index] != npos()) - { - state_._push_pop_dfa = state::push_dfa; - } - else if (ptr_[end_state_index] & pop_dfa_bit) - { - state_._push_pop_dfa = state::pop_dfa; - } - - state_._id = ptr_[id_index]; - state_._user_id = ptr_[user_id_index]; - state_._push_dfa = ptr_[push_dfa_index]; - state_._next_dfa = ptr_[next_dfa_index]; - - if (ptr_[eol_index]) - { - state_._eol_index = ptr_[eol_index] - 1; - } - - ptr_ += transitions_index; - - for (id_type col_index_ = 0; col_index_ < alphabet_; - ++col_index_, ++ptr_) - { - const id_type next_ = *ptr_; - - if (next_ > 0) - { - trans_iter_ = state_._transitions.find(next_ - 1); - - if (trans_iter_ == state_._transitions.end()) - { - trans_iter_ = state_._transitions.insert - (id_type_string_token_pair(static_cast - (next_ - 1), token_vector_[col_index_])).first; - } - else - { - trans_iter_->second.insert(token_vector_[col_index_]); - } - } - } - } - } - - void clear() - { - _sm_vector.clear(); - } - - bool empty() const - { - return _sm_vector.empty(); - } - - void minimise() - { - const id_type dfas_ = static_cast(_sm_vector.size()); - - for (id_type i_ = 0; i_ < dfas_; ++i_) - { - observer_ptr dfa_ = &_sm_vector[i_]; - - if (dfa_->size() > 0) - { - std::size_t size_ = 0; - - do - { - size_ = dfa_->size(); - minimise_dfa(*dfa_, size_); - } while (dfa_->size() != size_); - } - } - } - - static id_type npos() - { - return traits::npos(); - } - - id_type size() const - { - return static_cast(_sm_vector.size()); - } - - static id_type skip() - { - return ~static_cast(1); - } - - void swap(basic_char_state_machine &csm_) - { - _sm_vector.swap(csm_._sm_vector); - } - -private: - using index_set = std::set; - - void minimise_dfa(dfa &dfa_, std::size_t size_) - { - observer_ptr first_ = &dfa_._states.front(); - observer_ptr end_ = first_ + size_; - id_type index_ = 0; - id_type new_index_ = 0; - id_type_vector lookup_(size_, npos()); - observer_ptr lookup_ptr_ = &lookup_.front(); - index_set index_set_; - - for (; first_ != end_; ++first_, ++index_) - { - observer_ptr second_ = first_ + 1; - - for (id_type curr_index_ = index_ + 1; second_ != end_; - ++curr_index_, ++second_) - { - if (index_set_.find(curr_index_) != index_set_.end()) - { - continue; - } - - if (*first_ == *second_) - { - index_set_.insert(curr_index_); - lookup_ptr_[curr_index_] = new_index_; - } - } - - if (lookup_ptr_[index_] == npos()) - { - lookup_ptr_[index_] = new_index_; - ++new_index_; - } - } - - if (!index_set_.empty()) - { - observer_ptr front_ = &dfa_._states.front(); - dfa new_dfa_(new_index_); - auto set_end_ = index_set_.cend(); - observer_ptr ptr_ = front_; - observer_ptr new_ptr_ = &new_dfa_._states.front(); - - if (dfa_._bol_index != npos()) - { - new_dfa_._bol_index = lookup_ptr_[dfa_._bol_index]; - } - - for (index_ = 0; index_ < size_; ++index_) - { - if (index_set_.find(index_) != set_end_) - { - ++ptr_; - continue; - } - - new_ptr_->_end_state = ptr_->_end_state; - new_ptr_->_id = ptr_->_end_state; - new_ptr_->_user_id = ptr_->_user_id; - new_ptr_->_next_dfa = ptr_->_next_dfa; - - if (ptr_->_eol_index != npos()) - { - new_ptr_->_eol_index = lookup_ptr_[ptr_->_eol_index]; - } - - auto iter_ = ptr_->_transitions.cbegin(); - auto end_ = ptr_->_transitions.cend(); - typename state::id_type_string_token_map::iterator find_; - - for (; iter_ != end_; ++iter_) - { - find_ = new_ptr_->_transitions.find - (lookup_ptr_[iter_->first]); - - if (find_ == new_ptr_->_transitions.end()) - { - new_ptr_->_transitions.insert - (id_type_string_token_pair - (lookup_ptr_[iter_->first], iter_->second)); - } - else - { - find_->second.insert(iter_->second); - } - } - - ++ptr_; - ++new_ptr_; - } - - dfa_.swap(new_dfa_); - } - } -}; - -using char_state_machine = basic_char_state_machine; -using wchar_state_machine = basic_char_state_machine; -using u32char_state_machine = basic_char_state_machine; -} - -#endif diff --git a/YACReaderLibrary/lexertl/stream_shared_iterator.hpp b/YACReaderLibrary/lexertl/stream_shared_iterator.hpp deleted file mode 100644 index 7946390f..00000000 --- a/YACReaderLibrary/lexertl/stream_shared_iterator.hpp +++ /dev/null @@ -1,352 +0,0 @@ -// stream_shared_iterator.hpp -// Copyright (c) 2010-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#ifndef LEXERTL_STREAM_SHARED_ITERATOR_HPP -#define LEXERTL_STREAM_SHARED_ITERATOR_HPP - -#include -// memcpy -#include -#include -#include -#include "runtime_error.hpp" -#include - -namespace lexertl -{ -template -class basic_stream_shared_iterator -{ -public: - using istream = std::basic_istream; - using iterator_category = std::forward_iterator_tag; - using difference_type = std::size_t; - using value_type = char_type; - using pointer = char_type *; - using reference = char_type &; - - basic_stream_shared_iterator() : - _master(false), - _live(false), - _index(shared::npos()), - _shared(nullptr) - { - } - - basic_stream_shared_iterator(istream &stream_, - const std::size_t buff_size_ = 1024, - const std::size_t increment_ = 1024) : - _master(true), - _live(false), - _index(shared::npos()), - // For exception safety don't call new yet - _shared(nullptr) - { - // Safe to call potentially throwing new now. - _shared = new shared(stream_, buff_size_, increment_); - ++_shared->_ref_count; - _shared->_clients.push_back(this); - } - - basic_stream_shared_iterator(const basic_stream_shared_iterator &rhs_) : - _master(false), - _live(false), - _index(rhs_._master ? rhs_._shared->lowest() : rhs_._index), - _shared(rhs_._shared) - { - if (_shared) - { - // New copy of an iterator. - // The assumption is that any copy must be live - // even if the rhs is not (otherwise we will never - // have a record of the start of the current range!) - ++_shared->_ref_count; - _shared->_clients.push_back(this); - _live = true; - } - } - - ~basic_stream_shared_iterator() - { - if (_shared) - { - --_shared->_ref_count; - _shared->erase(this); - - if (_shared->_ref_count == 0) - { - delete _shared; - _shared = nullptr; - } - } - } - - basic_stream_shared_iterator &operator = - (const basic_stream_shared_iterator &rhs_) - { - if (this != &rhs_) - { - _master = false; - _index = rhs_._master ? rhs_._shared->lowest() : rhs_._index; - - if (!_live && !rhs_._live) - { - if (rhs_._shared) - { - ++rhs_._shared->_ref_count; - } - } - else if (!_live && rhs_._live) - { - rhs_._shared->_clients.push_back(this); - - if (!_shared) - { - ++rhs_._shared->_ref_count; - } - } - else if (_live && !rhs_._live) - { - _shared->erase(this); - - if (!rhs_._shared) - { - --_shared->_ref_count; - } - } - - _live = rhs_._live; - _shared = rhs_._shared; - } - - return *this; - } - - bool operator ==(const basic_stream_shared_iterator &rhs_) const - { - return _index == rhs_._index && - (_shared == rhs_._shared || - (_index == shared::npos() || rhs_._index == shared::npos()) && - (!_shared || !rhs_._shared)); - } - - bool operator !=(const basic_stream_shared_iterator &rhs_) const - { - return !(*this == rhs_); - } - - const char_type &operator *() - { - check_master(); - return _shared->_buffer[_index]; - } - - basic_stream_shared_iterator &operator ++() - { - check_master(); - ++_index; - update_state(); - return *this; - } - - basic_stream_shared_iterator operator ++(int) - { - basic_stream_shared_iterator iter_ = *this; - - check_master(); - ++_index; - update_state(); - return iter_; - } - -private: - class shared - { - public: - std::size_t _ref_count; - using char_vector = std::vector; - using iter_list = std::vector; - istream &_stream; - std::size_t _increment; - std::size_t _len; - char_vector _buffer; - iter_list _clients; - - shared(istream &stream_, const std::size_t buff_size_, - const std::size_t increment_) : - _ref_count(0), - _increment(increment_), - _stream(stream_) - { - _buffer.resize(buff_size_); - _stream.read(&_buffer.front(), _buffer.size()); - _len = static_cast(_stream.gcount()); - } - - bool reload_buffer() - { - const std::size_t lowest_ = lowest(); - std::size_t read_ = 0; - - if (lowest_ == 0) - { - // Resize buffer - const std::size_t old_size_ = _buffer.size(); - const std::size_t new_size_ = old_size_ + _increment; - - _buffer.resize(new_size_); - _stream.read(&_buffer.front() + old_size_, _increment); - read_ = static_cast(_stream.gcount()); - - if (read_) - { - read_ += old_size_; - _len = read_; - } - } - else - { - // Some systems have memcpy in namespace std - using namespace std; - const size_t start_ = _buffer.size() - lowest_; - const size_t len_ = _buffer.size() - start_; - - memcpy(&_buffer.front(), &_buffer[lowest_], start_ * - sizeof(char_type)); - _stream.read(&_buffer.front() + start_, len_); - read_ = static_cast(_stream.gcount()); - subtract(lowest_); - - if (read_) - { - read_ += start_; - _len = read_; - } - else - { - _len = highest(); - } - } - - return read_ != 0; - } - - void erase(basic_stream_shared_iterator *ptr_) - { - auto iter_ = std::find(_clients.begin(), _clients.end(), ptr_); - - if (iter_ != _clients.end()) - _clients.erase(iter_); - } - - std::size_t lowest() const - { - std::size_t lowest_ = npos(); - auto iter_ = _clients.cbegin(); - auto end_ = _clients.cend(); - - for (; iter_ != end_; ++iter_) - { - const basic_stream_shared_iterator *ptr_ = *iter_; - - if (ptr_->_index < lowest_) - { - lowest_ = ptr_->_index; - } - } - - if (lowest_ == npos()) - { - lowest_ = 0; - } - - return lowest_; - } - - std::size_t highest() const - { - std::size_t highest_ = 0; - auto iter_ = _clients.cbegin(); - auto end_ = _clients.cend(); - - for (; iter_ != end_; ++iter_) - { - const basic_stream_shared_iterator *ptr_ = *iter_; - - if (ptr_->_index != npos() && ptr_->_index > highest_) - { - highest_ = ptr_->_index; - } - } - - return highest_; - } - - void subtract(const std::size_t lowest_) - { - auto iter_ = _clients.begin(); - auto end_ = _clients.end(); - - for (; iter_ != end_; ++iter_) - { - basic_stream_shared_iterator *ptr_ = *iter_; - - if (ptr_->_index != npos()) - { - ptr_->_index -= lowest_; - } - } - } - - static std::size_t npos() - { - return ~static_cast(0); - } - - private: - shared &operator =(const shared &rhs_); - }; - - bool _master; - bool _live; - std::size_t _index; - shared *_shared; - - void check_master() - { - if (!_shared) - { - throw runtime_error("Cannot manipulate null (end) " - "stream_shared_iterators."); - } - - if (_master) - { - _master = false; - _live = true; - _index = _shared->lowest(); - } - } - - void update_state() - { - if (_index >= _shared->_len) - { - if (!_shared->reload_buffer()) - { - _shared->erase(this); - _index = shared::npos(); - _live = false; - } - } - } -}; - -using stream_shared_iterator = basic_stream_shared_iterator; -using wstream_shared_iterator = basic_stream_shared_iterator; -} - -#endif diff --git a/YACReaderLibrary/lexertl/string_token.hpp b/YACReaderLibrary/lexertl/string_token.hpp deleted file mode 100644 index e108bd12..00000000 --- a/YACReaderLibrary/lexertl/string_token.hpp +++ /dev/null @@ -1,439 +0,0 @@ -// string_token.hpp -// Copyright (c) 2005-2018 Ben Hanson (http://www.benhanson.net/) -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_STRING_TOKEN_HPP -#define LEXERTL_STRING_TOKEN_HPP - -#include "char_traits.hpp" -#include // Needed by GCC 4.4 -#include -#include -#include -#include - -namespace lexertl -{ -template -struct basic_string_token -{ - using char_type = ch_type; - using char_traits = basic_char_traits; - using index_type = typename char_traits::index_type; - using range = std::pair; - using range_vector = std::vector; - using string = std::basic_string; - using string_token = basic_string_token; - - range_vector _ranges; - - basic_string_token() : - _ranges() - { - } - - basic_string_token(char_type ch_) : - _ranges() - { - insert(range(ch_, ch_)); - } - - basic_string_token(char_type first_, char_type second_) : - _ranges() - { - insert(range(first_, second_)); - } - - void clear() - { - _ranges.clear(); - } - - bool empty() const - { - return _ranges.empty(); - } - - bool any() const - { - return _ranges.size() == 1 && _ranges.front().first == 0 && - _ranges.front().second == char_traits::max_val(); - } - - bool operator <(const basic_string_token &rhs_) const - { - return _ranges < rhs_._ranges; - } - - bool operator ==(const basic_string_token &rhs_) const - { - return _ranges == rhs_._ranges; - } - - bool negatable() const - { - std::size_t size_ = 0; - auto iter_ = _ranges.cbegin(); - auto end_ = _ranges.cend(); - - for (; iter_ != end_; ++iter_) - { - size_ += static_cast(iter_->second) + 1 - - static_cast(iter_->first); - } - - return size_ > static_cast(char_traits::max_val()) / 2; - } - - void swap(basic_string_token &rhs_) - { - _ranges.swap(rhs_._ranges); - } - - void insert(const basic_string_token &rhs_) - { - auto iter_ = rhs_._ranges.cbegin(); - auto end_ = rhs_._ranges.cend(); - - for (; iter_ != end_; ++iter_) - { - insert(*iter_); - } - } - - // Deliberately pass by value - may modify - typename range_vector::iterator insert(range rhs_) - { - bool insert_ = true; - auto iter_ = _ranges.begin(); - auto end_ = _ranges.end(); - auto erase_iter_ = end_; - - while (iter_ != end_) - { - // follows current item - if (rhs_.first > iter_->second) - { - if (rhs_.first == iter_->second + 1) - { - // Auto normalise - rhs_.first = iter_->first; - } - else - { - // No intersection, consider next - ++iter_; - continue; - } - } - // Precedes current item - else if (rhs_.second < iter_->first) - { - if (rhs_.second == iter_->first - 1) - { - // Auto normalise - rhs_.second = iter_->second; - } - else - { - // insert here - break; - } - } - else - { - // overlap (under) - if (rhs_.first < iter_->first) - { - if (rhs_.second < iter_->second) - { - rhs_.second = iter_->second; - } - } - // overlap (over) - else if (rhs_.second > iter_->second) - { - if (rhs_.first > iter_->first) - { - rhs_.first = iter_->first; - } - } - // subset - else - { - insert_ = false; - iter_ = _ranges.end(); - break; - } - } - - // Code minimisation: this always applies unless we have already - // exited the loop, or "continue" executed. - if (erase_iter_ == end_) - { - erase_iter_ = iter_; - } - - ++iter_; - } - - if (erase_iter_ != end_) - { - if (insert_) - { - // Re-use obsolete location - *erase_iter_ = rhs_; - ++erase_iter_; - } - - iter_ = _ranges.erase(erase_iter_, iter_); - } - else if (insert_) - { - iter_ = _ranges.insert(iter_, rhs_); - } - - return iter_; - } - - void negate() - { - index_type next_ = 0; - const index_type max_ = char_traits::max_val(); - string_token temp_; - auto iter_ = _ranges.cbegin(); - auto end_ = _ranges.cend(); - bool finished_ = false; - - for (; iter_ != end_; ++iter_) - { - if (next_ < iter_->first) - { - temp_.insert(range(next_, - static_cast(iter_->first - 1))); - } - - if (iter_->second < max_) - { - next_ = iter_->second + 1; - } - else - { - finished_ = true; - break; - } - } - - if (!finished_) - { - temp_.insert(range(next_, max_)); - } - - swap(temp_); - } - - void intersect(basic_string_token &rhs_, basic_string_token &overlap_) - { - auto lhs_iter_ = _ranges.begin(); - auto lhs_end_ = _ranges.end(); - auto rhs_iter_ = rhs_._ranges.begin(); - auto rhs_end_ = rhs_._ranges.end(); - - while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_) - { - if (rhs_iter_->first > lhs_iter_->second) - { - ++lhs_iter_; - } - else if (rhs_iter_->second < lhs_iter_->first) - { - ++rhs_iter_; - } - else - { - range range_; - - if (rhs_iter_->first > lhs_iter_->first) - { - range_.first = rhs_iter_->first; - } - else - { - range_.first = lhs_iter_->first; - } - - if (rhs_iter_->second < lhs_iter_->second) - { - range_.second = rhs_iter_->second; - } - else - { - range_.second = lhs_iter_->second; - } - - adjust(range_, *this, lhs_iter_, lhs_end_); - adjust(range_, rhs_, rhs_iter_, rhs_end_); - overlap_.insert(range_); - } - } - } - - void remove(basic_string_token &rhs_) - { - auto lhs_iter_ = _ranges.begin(); - auto lhs_end_ = _ranges.end(); - auto rhs_iter_ = rhs_._ranges.begin(); - auto rhs_end_ = rhs_._ranges.end(); - - while (lhs_iter_ != lhs_end_ && rhs_iter_ != rhs_end_) - { - if (rhs_iter_->first > lhs_iter_->second) - { - ++lhs_iter_; - } - else if (rhs_iter_->second < lhs_iter_->first) - { - ++rhs_iter_; - } - else - { - range range_; - - if (rhs_iter_->first > lhs_iter_->first) - { - range_.first = rhs_iter_->first; - } - else - { - range_.first = lhs_iter_->first; - } - - if (rhs_iter_->second < lhs_iter_->second) - { - range_.second = rhs_iter_->second; - } - else - { - range_.second = lhs_iter_->second; - } - - adjust(range_, *this, lhs_iter_, lhs_end_); - } - } - } - - static string escape_char(const typename char_traits::index_type ch_) - { - string out_; - - switch (ch_) - { - case '\0': - out_ += '\\'; - out_ += '0'; - break; - case '\a': - out_ += '\\'; - out_ += 'a'; - break; - case '\b': - out_ += '\\'; - out_ += 'b'; - break; - case 27: - out_ += '\\'; - out_ += 'x'; - out_ += '1'; - out_ += 'b'; - break; - case '\f': - out_ += '\\'; - out_ += 'f'; - break; - case '\n': - out_ += '\\'; - out_ += 'n'; - break; - case '\r': - out_ += '\\'; - out_ += 'r'; - break; - case '\t': - out_ += '\\'; - out_ += 't'; - break; - case '\v': - out_ += '\\'; - out_ += 'v'; - break; - case '\\': - out_ += '\\'; - out_ += '\\'; - break; - case '"': - out_ += '\\'; - out_ += '"'; - break; - case '\'': - out_ += '\\'; - out_ += '\''; - break; - default: - { - if (ch_ < 32 || ch_ > 126) - { - std::basic_stringstream ss_; - - out_ += '\\'; - out_ += 'x'; - ss_ << std::hex << - static_cast(ch_); - out_ += ss_.str(); - } - else - { - out_ += ch_; - } - - break; - } - } - - return out_; - } - -private: - void adjust(const range &range_, basic_string_token &token_, - typename range_vector::iterator &iter_, - typename range_vector::iterator &end_) - { - if (range_.first > iter_->first) - { - const index_type second_ = iter_->second; - - iter_->second = range_.first - 1; - - if (range_.second < second_) - { - range new_range_(static_cast(range_.second + 1), - second_); - - iter_ = token_.insert(new_range_); - end_ = token_._ranges.end(); - } - } - else if (range_.second < iter_->second) - { - iter_->first = range_.second + 1; - } - else - { - iter_ = token_._ranges.erase(iter_); - end_ = token_._ranges.end(); - } - } -}; -} - -#endif diff --git a/YACReaderLibrary/lexertl/utf_iterators.hpp b/YACReaderLibrary/lexertl/utf_iterators.hpp deleted file mode 100644 index 0bd64e7c..00000000 --- a/YACReaderLibrary/lexertl/utf_iterators.hpp +++ /dev/null @@ -1,508 +0,0 @@ -// utf_iterators.hpp -// Copyright (c) 2015-2018 Ben Hanson (http://www.benhanson.net/) -// Inspired by http://utfcpp.sourceforge.net/ -// -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) -#ifndef LEXERTL_UTF_ITERATORS_HPP -#define LEXERTL_UTF_ITERATORS_HPP - -#include - -namespace lexertl -{ -template -class basic_utf8_in_iterator : - public std::iterator -{ -public: - using value_type = char_type; - using difference_type = - typename std::iterator_traits::difference_type; - using iterator_category = std::forward_iterator_tag; - - basic_utf8_in_iterator() : - _it(char_iterator()), - _end(char_iterator()), - _char(0) - { - } - - explicit basic_utf8_in_iterator(const char_iterator &it_, - const char_iterator &end_) : - _it(it_), - _end(it_), - _char(0) - { - if (it_ != end_) - { - next(); - } - } - - char_type operator *() const - { - return _char; - } - - bool operator ==(const basic_utf8_in_iterator &rhs_) const - { - return _it == rhs_._it; - } - - bool operator !=(const basic_utf8_in_iterator &rhs_) const - { - return _it != rhs_._it; - } - - basic_utf8_in_iterator &operator ++() - { - _it = _end; - next(); - return *this; - } - - basic_utf8_in_iterator operator ++(int) - { - basic_utf8_in_iterator temp_ = *this; - - _it = _end; - next(); - return temp_; - } - - basic_utf8_in_iterator operator +(const std::size_t count_) const - { - basic_utf8_in_iterator temp_ = *this; - - for (std::size_t i_ = 0; i_ < count_; ++i_) - { - ++temp_; - } - - return temp_; - } - - basic_utf8_in_iterator operator -(const std::size_t count_) const - { - basic_utf8_in_iterator temp_ = *this; - - for (std::size_t i_ = 0; i_ < count_; ++i_) - { - temp_._end = temp_._it; - --temp_._it; - - while ((*temp_._it & 0xc0) == 0x80) --temp_._it; - } - - temp_.next(); - return temp_; - } - -private: - char_iterator _it; - char_iterator _end; - char_type _char; - - void next() - { - const char len_ = len(_it); - char_type ch_ = *_it & 0xff; - - switch (len_) - { - case 1: - _end = _it; - ++_end; - break; - case 2: - _end = _it; - ++_end; - - if ((*_end & 0xc0) != 0x80) break; - - ch_ = (ch_ << 6 & 0x7ff) | (*_end & 0x3f); - ++_end; - break; - case 3: - _end = _it; - ++_end; - - if ((*_end & 0xc0) != 0x80) break; - - ch_ = (ch_ << 12 & 0xffff) | ((*_end & 0xff) << 6 & 0xfff); - ++_end; - - if ((*_end & 0xc0) != 0x80) break; - - ch_ |= *_end & 0x3f; - ++_end; - break; - case 4: - _end = _it; - ++_end; - - if ((*_end & 0xc0) != 0x80) break; - - ch_ = (ch_ << 18 & 0x1fffff) | ((*_end & 0xff) << 12 & 0x3ffff); - ++_end; - - if ((*_end & 0xc0) != 0x80) break; - - ch_ |= (*_end & 0xff) << 6 & 0xfff; - ++_end; - - if ((*_end & 0xc0) != 0x80) break; - - ch_ |= *_end & 0x3f; - ++_end; - break; - } - - _char = ch_; - } - - char len(const char_iterator &it_) const - { - const unsigned char ch_ = *it_; - - return ch_ < 0x80 ? 1 : - ch_ >> 5 == 0x06 ? 2 : - ch_ >> 4 == 0x0e ? 3 : - ch_ >> 3 == 0x1e ? 4 : - 1; - } -}; - -template -class basic_utf8_out_iterator : - public std::iterator -{ -public: - using value_type = char; - using difference_type = - typename std::iterator_traits::difference_type; - using iterator_category = std::forward_iterator_tag; - - basic_utf8_out_iterator() : - _count(0), - _index(0) - { - } - - explicit basic_utf8_out_iterator(const char_iterator &it_, - const char_iterator &end_) : - _it(it_), - _count(0), - _index(0) - { - if (it_ != end_) - { - next(); - } - } - - char operator *() const - { - return _bytes[_index]; - } - - bool operator ==(const basic_utf8_out_iterator &rhs_) const - { - return _it == rhs_._it; - } - - bool operator !=(const basic_utf8_out_iterator &rhs_) const - { - return _it != rhs_._it; - } - - basic_utf8_out_iterator &operator ++() - { - ++_index; - - if (_index >= _count) - { - ++_it; - next(); - } - - return *this; - } - - basic_utf8_out_iterator operator ++(int) - { - basic_utf8_out_iterator temp_ = *this; - - ++_index; - - if (_index >= _count) - { - ++_it; - next(); - } - - return temp_; - } - -private: - char_iterator _it; - char _bytes[4]; - unsigned char _count; - unsigned char _index; - - void next() - { - const std::size_t ch_ = *_it; - - _count = len(ch_); - _index = 0; - - switch (_count) - { - case 1: - _bytes[0] = static_cast(ch_); - break; - case 2: - _bytes[0] = static_cast((ch_ >> 6) | 0xc0); - _bytes[1] = (ch_ & 0x3f) | 0x80; - break; - case 3: - _bytes[0] = static_cast((ch_ >> 12) | 0xe0); - _bytes[1] = ((ch_ >> 6) & 0x3f) | 0x80; - _bytes[2] = (ch_ & 0x3f) | 0x80; - break; - case 4: - _bytes[0] = static_cast((ch_ >> 18) | 0xf0); - _bytes[1] = ((ch_ >> 12) & 0x3f) | 0x80; - _bytes[2] = ((ch_ >> 6) & 0x3f) | 0x80; - _bytes[3] = (ch_ & 0x3f) | 0x80; - break; - } - } - - char len(const std::size_t ch_) const - { - return ch_ < 0x80 ? 1 : - ch_ < 0x800 ? 2 : - ch_ < 0x10000 ? 3 : - 4; - } -}; - -template -class basic_utf16_in_iterator : - public std::iterator -{ -public: - using value_type = char_type; - using difference_type = - typename std::iterator_traits::difference_type; - using iterator_category = std::forward_iterator_tag; - - basic_utf16_in_iterator() : - _it(char_iterator()), - _end(char_iterator()), - _char(0) - { - } - - explicit basic_utf16_in_iterator(const char_iterator &it_, - const char_iterator &end_) : - _it(it_), - _end(it_), - _char(0) - { - if (it_ != end_) - { - next(); - } - } - - char_type operator *() const - { - return _char; - } - - bool operator ==(const basic_utf16_in_iterator &rhs_) const - { - return _it == rhs_._it; - } - - bool operator !=(const basic_utf16_in_iterator &rhs_) const - { - return _it != rhs_._it; - } - - basic_utf16_in_iterator &operator ++() - { - _it = _end; - next(); - return *this; - } - - basic_utf16_in_iterator operator ++(int) - { - basic_utf16_in_iterator temp_ = *this; - - _it = _end; - next(); - return temp_; - } - - basic_utf16_in_iterator operator +(const std::size_t count_) const - { - basic_utf16_in_iterator temp_ = *this; - - for (std::size_t i_ = 0; i_ < count_; ++i_) - { - ++temp_; - } - - return temp_; - } - - basic_utf16_in_iterator operator -(const std::size_t count_) const - { - basic_utf16_in_iterator temp_ = *this; - - for (std::size_t i_ = 0; i_ < count_; ++i_) - { - temp_._end = temp_._it; - --temp_._it; - - if (*temp_._it >= 0xdc00 && *temp_._it <= 0xdfff) --temp_._it; - } - - temp_.next(); - return temp_; - } - -private: - char_iterator _it; - char_iterator _end; - char_type _char; - - void next() - { - char_type ch_ = *_it & 0xffff; - - _end = _it; - - if (ch_ >= 0xd800 && ch_ <= 0xdbff) - { - const char_type surrogate_ = *++_end & 0xffff; - - ch_ = (((ch_ - 0xd800) << 10) | (surrogate_ - 0xdc00)) + 0x10000; - } - - _char = ch_; - ++_end; - } -}; - -template -class basic_utf16_out_iterator : - public std::iterator -{ -public: - using value_type = wchar_t; - using difference_type = - typename std::iterator_traits::difference_type; - using iterator_category = std::forward_iterator_tag; - - basic_utf16_out_iterator() : - _count(0), - _index(0) - { - } - - explicit basic_utf16_out_iterator(const char_iterator &it_, - const char_iterator &end_) : - _it(it_), - _count(0), - _index(0) - { - if (it_ != end_) - { - next(); - } - } - - wchar_t operator *() const - { - return _chars[_index]; - } - - bool operator ==(const basic_utf16_out_iterator &rhs_) const - { - return _it == rhs_._it; - } - - bool operator !=(const basic_utf16_out_iterator &rhs_) const - { - return _it != rhs_._it; - } - - basic_utf16_out_iterator &operator ++() - { - ++_index; - - if (_index >= _count) - { - ++_it; - next(); - } - - return *this; - } - - basic_utf16_out_iterator operator ++(int) - { - basic_utf16_out_iterator temp_ = *this; - - ++_index; - - if (_index >= _count) - { - ++_it; - next(); - } - - return temp_; - } - -private: - char_iterator _it; - wchar_t _chars[2]; - unsigned char _count; - unsigned char _index; - - void next() - { - const std::size_t ch_ = *_it; - - _count = len(ch_); - _index = 0; - - switch (_count) - { - case 1: - _chars[0] = static_cast(ch_); - break; - case 2: - _chars[0] = static_cast((ch_ >> 10) + 0xdc00u - - (0x10000 >> 10)); - _chars[1] = static_cast((ch_ & 0x3ff) + 0xdc00u); - break; - } - } - - char len(const std::size_t ch_) const - { - return ch_ > 0xffff ? 2 : 1; - } -}; -} - -#endif From ddb140d4307aba896eeaf7f87d3d9f0d83c90ddb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Tue, 12 Jan 2021 18:57:25 +0100 Subject: [PATCH 15/32] Remove misplaced code --- YACReaderLibrary/db/comic_model.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/YACReaderLibrary/db/comic_model.cpp b/YACReaderLibrary/db/comic_model.cpp index 59fa9887..abfa2aff 100644 --- a/YACReaderLibrary/db/comic_model.cpp +++ b/YACReaderLibrary/db/comic_model.cpp @@ -644,9 +644,7 @@ void ComicModel::setupModelData(const SearchModifiers modifier, const QString &f } catch (const std::exception &e) { QLOG_ERROR() << "Unable to parse query: " << e.what(); } - selectQuery.exec(); - setupModelData(selectQuery); connectionName = db.connectionName(); } QSqlDatabase::removeDatabase(connectionName); From f09c5955d8967078b2caa13feaf9607ed66a3e45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 08:39:16 +0100 Subject: [PATCH 16/32] Remove space and atWord tokens `atWord` wasn't used at all and spaces should be eaten by the lexer And added `unspecified` token --- YACReaderLibrary/db/query_lexer.cpp | 18 ++++-------------- YACReaderLibrary/db/query_lexer.h | 4 +--- YACReaderLibrary/db/query_parser.cpp | 7 ++----- YACReaderLibrary/db/query_parser.h | 2 +- 4 files changed, 8 insertions(+), 23 deletions(-) diff --git a/YACReaderLibrary/db/query_lexer.cpp b/YACReaderLibrary/db/query_lexer.cpp index 27944ff4..832b4978 100644 --- a/YACReaderLibrary/db/query_lexer.cpp +++ b/YACReaderLibrary/db/query_lexer.cpp @@ -7,17 +7,16 @@ QueryLexer::QueryLexer(const std::string &input) Token QueryLexer::next() { + while (isSpace(peek())) { + get(); + } + switch (peek()) { case '\0': return Token(Token::Type::eof); case '(': case ')': return single(Token::Type::opcode); - case ' ': - case '\t': - case '\r': - case '\n': - return space(); case '"': return quotedWord(); default: @@ -40,15 +39,6 @@ Token QueryLexer::single(Token::Type type) return Token(type, input.substr(index++, 1)); } -Token QueryLexer::space() -{ - auto start = index; - get(); - while (isSpace(peek())) - get(); - return Token(Token::Type::space, input.substr(start, index - start)); -} - Token QueryLexer::word() { auto start = index; diff --git a/YACReaderLibrary/db/query_lexer.h b/YACReaderLibrary/db/query_lexer.h index b2c892f6..4cc2b61f 100644 --- a/YACReaderLibrary/db/query_lexer.h +++ b/YACReaderLibrary/db/query_lexer.h @@ -9,10 +9,9 @@ public: enum class Type { eof, opcode, - atWord, word, quotedWord, - space + undefined }; Token(Type type, std::string lexeme = "") @@ -49,7 +48,6 @@ private: char get(); Token single(Token::Type type); - Token space(); Token word(); Token quotedWord(); diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index de9dc39e..5a10d238 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -136,9 +136,6 @@ bool QueryParser::isEof() const void QueryParser::advance() { currentToken = lexer.next(); - - if (tokenType() == Token::Type::space) - advance(); } QueryParser::FieldType QueryParser::fieldType(const std::string &str) @@ -184,7 +181,7 @@ QueryParser::TreeNode QueryParser::andExpression() return { "and", { lhs, andExpression() } }; } - if ((isIn(tokenType(), { Token::Type::atWord, Token::Type::word, Token::Type::quotedWord }) || token() == "(") && lcaseToken() != "or") { + if ((isIn(tokenType(), { Token::Type::word, Token::Type::quotedWord }) || token() == "(") && lcaseToken() != "or") { return { "and", { lhs, andExpression() } }; } @@ -210,7 +207,7 @@ QueryParser::TreeNode QueryParser::locationExpression() } return res; } - if (!isIn(tokenType(), { Token::Type::atWord, Token::Type::word, Token::Type::quotedWord })) { + if (!isIn(tokenType(), { Token::Type::word, Token::Type::quotedWord })) { throw std::invalid_argument("Invalid syntax. Expected a lookup name or a word"); } return baseToken(); diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index 95fd48c8..aa688fc2 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -62,7 +62,7 @@ private: void advance(); QueryLexer lexer = QueryLexer(""); - Token currentToken = Token(Token::Type::eof); + Token currentToken = Token(Token::Type::undefined); template static bool isIn(const T &e, const std::list &v) From 260f538de3ab24ac8768fa66892b41984fe6aacd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 08:49:48 +0100 Subject: [PATCH 17/32] Use an explicit constructor for TreeNode List initialization ended using movable constructors which surprisingly caused data troubles in release mode, at least in VC2019 compiler. The tree being messed up caused crashes while SQL was generated. I have no explanation for it. --- YACReaderLibrary/db/query_parser.cpp | 17 +++++++++-------- YACReaderLibrary/db/query_parser.h | 5 +++++ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index 5a10d238..38381e5c 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -168,7 +168,7 @@ QueryParser::TreeNode QueryParser::orExpression() auto lhs = andExpression(); if (lcaseToken() == "or") { advance(); - return { "or", { lhs, orExpression() } }; + return TreeNode("or", { lhs, orExpression() }); } return lhs; } @@ -178,11 +178,11 @@ QueryParser::TreeNode QueryParser::andExpression() auto lhs = notExpression(); if (lcaseToken() == "and") { advance(); - return { "and", { lhs, andExpression() } }; + return TreeNode("and", { lhs, andExpression() }); } if ((isIn(tokenType(), { Token::Type::word, Token::Type::quotedWord }) || token() == "(") && lcaseToken() != "or") { - return { "and", { lhs, andExpression() } }; + return TreeNode("and", { lhs, andExpression() }); } return lhs; @@ -192,7 +192,7 @@ QueryParser::TreeNode QueryParser::notExpression() { if (lcaseToken() == "not") { advance(); - return { "not", { notExpression() } }; + return TreeNode("not", { notExpression() }); } return locationExpression(); } @@ -216,7 +216,7 @@ QueryParser::TreeNode QueryParser::locationExpression() QueryParser::TreeNode QueryParser::baseToken() { if (tokenType() == Token::Type::quotedWord) { - return { "token", { { "all", {} }, { token(true), {} } } }; + return TreeNode("token", { TreeNode("all", {}), TreeNode(token(true), {}) }); } auto words(split(token(true), ':')); @@ -225,9 +225,10 @@ QueryParser::TreeNode QueryParser::baseToken() auto loc(toLower(words[0].toStdString())); words.erase(words.begin()); if (words.size() == 1 && tokenType() == Token::Type::quotedWord) { - return { "token", { { loc, {} }, { token(true), {} } } }; + return TreeNode("token", { TreeNode(loc, {}), TreeNode(token(true), {}) }); } - return { "token", { { loc, {} }, { join(words, ":"), {} } } }; + return TreeNode("token", { TreeNode(loc, {}), TreeNode(join(words, ":"), {}) }); } - return { "token", { { "all", {} }, { join(words, ":"), {} } } }; + + return TreeNode("token", { TreeNode("all", {}), TreeNode(join(words, ":"), {}) }); } diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index aa688fc2..7aca5ca9 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -45,6 +45,11 @@ public: std::string t; std::vector children; + explicit TreeNode(std::string t, std::vector children) + : t(t), children(children) + { + } + int buildSqlString(std::string &sqlString, int bindPosition = 0) const; int bindValues(QSqlQuery &selectQuery, int bindPosition = 0) const; }; From 7b361004589d144776eaed704a5b31d93cc91616 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 08:51:08 +0100 Subject: [PATCH 18/32] Use QString::fromStdString Just to keep things consistent in the whole round trip conversion --- YACReaderLibrary/db/query_parser.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index 38381e5c..36916b53 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -53,9 +53,9 @@ int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) if (t == "token") { std::string bind_string(":bindPosition" + std::to_string(++bindPosition)); if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean })) { - selectQuery.bindValue(bind_string.c_str(), std::stoi(children[1].t)); + selectQuery.bindValue(QString::fromStdString(bind_string), std::stoi(children[1].t)); } else { - selectQuery.bindValue(bind_string.c_str(), ("%%" + children[1].t + "%%").c_str()); + selectQuery.bindValue(QString::fromStdString(bind_string), QString::fromStdString("%%" + children[1].t + "%%")); } } else if (t == "not") { bindPosition = children[0].bindValues(selectQuery, bindPosition); @@ -159,7 +159,7 @@ std::string QueryParser::join(const QStringList &strings, const std::string &del QStringList QueryParser::split(const std::string &string, char delim) { - auto words = QString(string.c_str()).split(delim); + auto words = QString::fromStdString(string).split(delim); return words; } From fa5ce254253041e9d3e506a568f64a2db7c0c07d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 09:03:17 +0100 Subject: [PATCH 19/32] Add concurrent queue based on lambdas --- YACReaderLibrary/YACReaderLibrary.pro | 1 + common/concurrent_queue.h | 132 ++++++++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 common/concurrent_queue.h diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index 156056df..6a81ad51 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -76,6 +76,7 @@ QT += sql network widgets script # Input HEADERS += comic_flow.h \ + ../common/concurrent_queue.h \ create_library_dialog.h \ db/query_lexer.h \ library_creator.h \ diff --git a/common/concurrent_queue.h b/common/concurrent_queue.h new file mode 100644 index 00000000..5b92f6f1 --- /dev/null +++ b/common/concurrent_queue.h @@ -0,0 +1,132 @@ +#ifndef CONCURRENT_QUEUE_H +#define CONCURRENT_QUEUE_H + +#include +#include +#include +#include +#include + +namespace YACReader { +class ConcurrentQueue +{ +public: + explicit ConcurrentQueue(int threadCount) + : jobsLeft(0), + bailout(false) + { + threads = std::vector(threadCount); + for (int index = 0; index < threadCount; ++index) { + threads[index] = std::thread([this] { + this->nextJob(); + }); + } + } + + ~ConcurrentQueue() + { + joinAll(); + } + + void enqueue(std::function job) + { + { + std::lock_guard lock(queueMutex); + _queue.emplace(job); + } + + { + std::lock_guard lock(jobsLeftMutex); + ++jobsLeft; + } + + jobAvailableVar.notify_one(); + } + + void cancellPending() + { + std::unique_lock lock(jobsLeftMutex); + _queue = std::queue>(); + jobsLeft = 0; + } + + void waitAll() + { + std::unique_lock lock(jobsLeftMutex); + if (jobsLeft > 0) { + _waitVar.wait(lock, [this] { + return jobsLeft == 0; + }); + } + } + +private: + std::vector threads; + std::queue> _queue; + int jobsLeft; + bool bailout; + std::condition_variable jobAvailableVar; + std::condition_variable _waitVar; + std::mutex jobsLeftMutex; + std::mutex queueMutex; + + void nextJob() + { + while (true) { + std::function job; + + { + std::unique_lock lock(queueMutex); + + if (bailout) { + return; + } + + jobAvailableVar.wait(lock, [this] { + return _queue.size() > 0 || bailout; + }); + + if (bailout) { + return; + } + + job = _queue.front(); + _queue.pop(); + } + + job(); + + { + std::lock_guard lock(jobsLeftMutex); + --jobsLeft; + } + + _waitVar.notify_one(); + } + } + + void joinAll() + { + { + std::lock_guard lock(queueMutex); + + if (bailout) { + return; + } + + bailout = true; + } + + jobAvailableVar.notify_all(); + + for (auto &x : threads) { + if (x.joinable()) { + x.join(); + } + } + } +}; + +} + +#endif // CONCURRENT_QUEUE_H From dbdc7bd965b6ab0e6e48afcba708198e2b8f78f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 09:10:58 +0100 Subject: [PATCH 20/32] Add a class for processing search queries and create the comics model data --- YACReaderLibrary/YACReaderLibrary.pro | 2 + .../db/comic_query_result_procesor.cpp | 112 ++++++++++++++++++ .../db/comic_query_result_procesor.h | 32 +++++ 3 files changed, 146 insertions(+) create mode 100644 YACReaderLibrary/db/comic_query_result_procesor.cpp create mode 100644 YACReaderLibrary/db/comic_query_result_procesor.h diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index 6a81ad51..4162cb36 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -78,6 +78,7 @@ QT += sql network widgets script HEADERS += comic_flow.h \ ../common/concurrent_queue.h \ create_library_dialog.h \ + db/comic_query_result_procesor.h \ db/query_lexer.h \ library_creator.h \ library_window.h \ @@ -154,6 +155,7 @@ HEADERS += comic_flow.h \ SOURCES += comic_flow.cpp \ create_library_dialog.cpp \ + db/comic_query_result_procesor.cpp \ db/query_lexer.cpp \ library_creator.cpp \ library_window.cpp \ diff --git a/YACReaderLibrary/db/comic_query_result_procesor.cpp b/YACReaderLibrary/db/comic_query_result_procesor.cpp new file mode 100644 index 00000000..270075aa --- /dev/null +++ b/YACReaderLibrary/db/comic_query_result_procesor.cpp @@ -0,0 +1,112 @@ +#include "comic_query_result_procesor.h" + +#include "comic_item.h" +#include "comic_model.h" +#include "data_base_management.h" +#include "qnaturalsorting.h" +#include "db_helper.h" +#include "query_parser.h" + +#include "QsLog.h" + +QString getLastExecutedQuery(const QSqlQuery &query) +{ + QString str = query.lastQuery(); + QMapIterator it(query.boundValues()); + while (it.hasNext()) { + it.next(); + str.replace(it.key(), it.value().toString()); + } + return str; +} + +YACReader::ComicQueryResultProcesor::ComicQueryResultProcesor() + : querySearchQueue(1) +{ +} + +void YACReader::ComicQueryResultProcesor::createModelData(const YACReader::SearchModifiers modifier, const QString &filter, const QString &databasePath) +{ + querySearchQueue.cancellPending(); + + querySearchQueue.enqueue([=] { + QString connectionName = ""; + { + QSqlDatabase db = DataBaseManagement::loadDatabase(databasePath); + QSqlQuery selectQuery(db); + + std::string queryString("SELECT ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read,ci.isBis,ci.currentPage,ci.rating,ci.hasBeenOpened " + "FROM comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) LEFT JOIN folder f ON (f.id == c.parentId) WHERE "); + + try { + QueryParser parser; + auto result = parser.parse(filter.toStdString()); + result.buildSqlString(queryString); + + switch (modifier) { + case YACReader::NoModifiers: + queryString += " LIMIT :limit"; + break; + + case YACReader::OnlyRead: + queryString += " AND ci.read = 1 LIMIT :limit"; + break; + + case YACReader::OnlyUnread: + queryString += " AND ci.read = 0 LIMIT :limit"; + break; + + default: + queryString += " LIMIT :limit"; + QLOG_ERROR() << "not implemented"; + break; + } + selectQuery.prepare(queryString.c_str()); + selectQuery.bindValue(":limit", 500); //TODO, load this value from settings + result.bindValues(selectQuery); + + selectQuery.exec(); + + auto data = modelData(selectQuery); + + emit newData(data, databasePath); + } catch (const std::exception &e) { + //Do nothing, uncomplete search string will end here and it is part of how the QueryParser works + //I don't like the idea of using exceptions for this though + } + + connectionName = db.connectionName(); + } + QSqlDatabase::removeDatabase(connectionName); + }); +} + +QList *YACReader::ComicQueryResultProcesor::modelData(QSqlQuery &sqlquery) +{ + auto list = new QList(); + + int numColumns = sqlquery.record().count(); + + while (sqlquery.next()) { + QList data; + + for (int i = 0; i < numColumns; i++) + data << sqlquery.value(i); + + list->append(new ComicItem(data)); + } + + std::sort(list->begin(), list->end(), [](const ComicItem *c1, const ComicItem *c2) { + if (c1->data(ComicModel::Number).isNull() && c2->data(ComicModel::Number).isNull()) { + return naturalSortLessThanCI(c1->data(ComicModel::FileName).toString(), c2->data(ComicModel::FileName).toString()); + } else { + if (c1->data(ComicModel::Number).isNull() == false && c2->data(ComicModel::Number).isNull() == false) { + return c1->data(ComicModel::Number).toInt() < c2->data(ComicModel::Number).toInt(); + } else { + return c2->data(ComicModel::Number).isNull(); + } + } + }); + + return list; +} diff --git a/YACReaderLibrary/db/comic_query_result_procesor.h b/YACReaderLibrary/db/comic_query_result_procesor.h new file mode 100644 index 00000000..ba71da99 --- /dev/null +++ b/YACReaderLibrary/db/comic_query_result_procesor.h @@ -0,0 +1,32 @@ +#ifndef COMIC_QUERY_RESULT_PROCESOR_H +#define COMIC_QUERY_RESULT_PROCESOR_H + +#include +#include + +#include "yacreader_global_gui.h" +#include "concurrent_queue.h" + +class ComicItem; + +namespace YACReader { + +class ComicQueryResultProcesor : public QObject +{ + Q_OBJECT +public: + ComicQueryResultProcesor(); + +public slots: + void createModelData(const SearchModifiers modifier, const QString &filter, const QString &databasePath); +signals: + void newData(QList *, const QString &); + +private: + ConcurrentQueue querySearchQueue; + + QList *modelData(QSqlQuery &sqlquery); +}; +}; + +#endif // COMIC_QUERY_RESULT_PROCESOR_H From ccc382df7d3bf82df0a39e0a879255cb0d55d92e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 09:17:18 +0100 Subject: [PATCH 21/32] Use ComicQueryResultProcesor --- YACReaderLibrary/db/comic_model.cpp | 52 +++++------------------------ YACReaderLibrary/db/comic_model.h | 4 +-- YACReaderLibrary/library_window.cpp | 34 +++++++++++-------- YACReaderLibrary/library_window.h | 3 ++ 4 files changed, 33 insertions(+), 60 deletions(-) diff --git a/YACReaderLibrary/db/comic_model.cpp b/YACReaderLibrary/db/comic_model.cpp index abfa2aff..ea03a617 100644 --- a/YACReaderLibrary/db/comic_model.cpp +++ b/YACReaderLibrary/db/comic_model.cpp @@ -596,61 +596,25 @@ void ComicModel::setupReadingModelData(const QString &databasePath) endResetModel(); } -void ComicModel::setupModelData(const SearchModifiers modifier, const QString &filter, const QString &databasePath) +void ComicModel::setModelData(QList *data, const QString &databasePath) { - beginResetModel(); - qDeleteAll(_data); - _data.clear(); _databasePath = databasePath; - QString connectionName = ""; - { - QSqlDatabase db = DataBaseManagement::loadDatabase(databasePath); - QSqlQuery selectQuery(db); + beginResetModel(); - std::string queryString("SELECT ci.number,ci.title,c.fileName,ci.numPages,c.id,c.parentId,c.path,ci.hash,ci.read,ci.isBis,ci.currentPage,ci.rating,ci.hasBeenOpened " - "FROM comic c INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) LEFT JOIN folder f ON (f.id == c.parentId) WHERE "); + qDeleteAll(_data); - try { - QueryParser parser; - auto result = parser.parse(filter.toStdString()); - result.buildSqlString(queryString); + _data.clear(); - switch (modifier) { - case YACReader::NoModifiers: - queryString += " LIMIT :limit"; - break; + QLOG_ERROR() << "-d2>" << data->size(); - case YACReader::OnlyRead: - queryString += " AND ci.read = 1 LIMIT :limit"; - break; + _data.append(*data); - case YACReader::OnlyUnread: - queryString += " AND ci.read = 0 LIMIT :limit"; - break; - - default: - queryString += " LIMIT :limit"; - QLOG_ERROR() << "not implemented"; - break; - } - selectQuery.prepare(queryString.c_str()); - selectQuery.bindValue(":limit", 500); //TODO, load this value from settings - result.bindValues(selectQuery); - - selectQuery.exec(); - - setupModelData(selectQuery); - } catch (const std::exception &e) { - QLOG_ERROR() << "Unable to parse query: " << e.what(); - } - - connectionName = db.connectionName(); - } - QSqlDatabase::removeDatabase(connectionName); endResetModel(); emit searchNumResults(_data.length()); + + delete data; } QString ComicModel::getComicPath(QModelIndex mi) diff --git a/YACReaderLibrary/db/comic_model.h b/YACReaderLibrary/db/comic_model.h index 263eaa43..b15e50e7 100644 --- a/YACReaderLibrary/db/comic_model.h +++ b/YACReaderLibrary/db/comic_model.h @@ -89,8 +89,6 @@ public: void setupReadingListModelData(unsigned long long int parentReadingList, const QString &databasePath); void setupFavoritesModelData(const QString &databasePath); void setupReadingModelData(const QString &databasePath); - //configures the model for showing the comics matching the filter criteria. - void setupModelData(const SearchModifiers modifier, const QString &filter, const QString &databasePath); //Métodos de conveniencia QStringList getPaths(const QString &_source); @@ -142,6 +140,8 @@ public slots: void addComicsToLabel(const QList &comicIds, qulonglong labelId); void addComicsToReadingList(const QList &comicIds, qulonglong readingListId); + void setModelData(QList *data, const QString &databasePath); + protected: private: void setupModelData(QSqlQuery &sqlquery); diff --git a/YACReaderLibrary/library_window.cpp b/YACReaderLibrary/library_window.cpp index 8da72b0c..aad471a7 100644 --- a/YACReaderLibrary/library_window.cpp +++ b/YACReaderLibrary/library_window.cpp @@ -92,7 +92,7 @@ using namespace YACReader; LibraryWindow::LibraryWindow() - : QMainWindow(), fullscreen(false), previousFilter(""), fetching(false), status(LibraryWindow::Normal), removeError(false) + : QMainWindow(), fullscreen(false), previousFilter(""), fetching(false), status(LibraryWindow::Normal), removeError(false), comicQueryResultProcesor() { setupUI(); @@ -1053,10 +1053,9 @@ void LibraryWindow::createConnections() connect(optionsDialog, SIGNAL(optionsChanged()), this, SLOT(reloadOptions())); connect(optionsDialog, SIGNAL(editShortcuts()), editShortcutsDialog, SLOT(show())); - //Folders filter - //connect(clearFoldersFilter,SIGNAL(clicked()),foldersFilter,SLOT(clear())); + //Search filter connect(searchEdit, SIGNAL(filterChanged(YACReader::SearchModifiers, QString)), this, SLOT(setSearchFilter(YACReader::SearchModifiers, QString))); - //connect(includeComicsCheckBox,SIGNAL(stateChanged(int)),this,SLOT(searchInFiles(int))); + connect(&comicQueryResultProcesor, &ComicQueryResultProcesor::newData, this, &LibraryWindow::setComicSearchFilterData); //ContextMenus connect(openContainingFolderComicAction, SIGNAL(triggered()), this, SLOT(openContainingFolderComic())); @@ -2070,23 +2069,30 @@ void LibraryWindow::toNormal() void LibraryWindow::setSearchFilter(const YACReader::SearchModifiers modifier, QString filter) { if (!filter.isEmpty()) { - status = LibraryWindow::Searching; + //TODO move search query for folders to its own async processor foldersModelProxy->setFilter(modifier, filter, true); //includeComicsCheckBox->isChecked()); - comicsModel->setupModelData(modifier, filter, foldersModel->getDatabase()); - comicsViewsManager->comicsView->enableFilterMode(true); - comicsViewsManager->comicsView->setModel(comicsModel); //TODO, columns are messed up after ResetModel some times, this shouldn't be necesary - foldersView->expandAll(); - - if (comicsModel->rowCount() == 0) - comicsViewsManager->showNoSearchResultsView(); - else - comicsViewsManager->showComicsView(); + comicQueryResultProcesor.createModelData(modifier, filter, foldersModel->getDatabase()); } else if (status == LibraryWindow::Searching) { //if no searching, then ignore this clearSearchFilter(); navigationController->loadPreviousStatus(); } } +void LibraryWindow::setComicSearchFilterData(QList *data, const QString &databasePath) +{ + status = LibraryWindow::Searching; + + comicsModel->setModelData(data, databasePath); + comicsViewsManager->comicsView->enableFilterMode(true); + comicsViewsManager->comicsView->setModel(comicsModel); //TODO, columns are messed up after ResetModel some times, this shouldn't be necesary + foldersView->expandAll(); + + if (comicsModel->rowCount() == 0) + comicsViewsManager->showNoSearchResultsView(); + else + comicsViewsManager->showComicsView(); +} + void LibraryWindow::clearSearchFilter() { foldersModelProxy->clear(); diff --git a/YACReaderLibrary/library_window.h b/YACReaderLibrary/library_window.h index 993b58b9..e4ebeda7 100644 --- a/YACReaderLibrary/library_window.h +++ b/YACReaderLibrary/library_window.h @@ -10,6 +10,7 @@ #include "yacreader_libraries.h" #include "yacreader_navigation_controller.h" +#include "comic_query_result_procesor.h" #include @@ -327,6 +328,7 @@ public slots: void toNormal(); void toFullScreen(); void setSearchFilter(const YACReader::SearchModifiers modifier, QString filter); + void setComicSearchFilterData(QList *, const QString &); void clearSearchFilter(); void showProperties(); void exportLibrary(QString destPath); @@ -399,6 +401,7 @@ private: std::future upgradeLibraryFuture; TrayIconController *trayIconController; + ComicQueryResultProcesor comicQueryResultProcesor; }; #endif From 047fd246094102c2683171fa4d2b73ba6678e606 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 09:17:49 +0100 Subject: [PATCH 22/32] Make the search line edit bigger --- custom_widgets/yacreader_search_line_edit.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/custom_widgets/yacreader_search_line_edit.cpp b/custom_widgets/yacreader_search_line_edit.cpp index 3a209a27..0232184e 100644 --- a/custom_widgets/yacreader_search_line_edit.cpp +++ b/custom_widgets/yacreader_search_line_edit.cpp @@ -40,7 +40,7 @@ YACReaderSearchLineEdit::YACReaderSearchLineEdit(QWidget *parent) #ifdef Q_OS_MAC setMaximumWidth(212); #else - setMaximumWidth(173); + setMaximumWidth(255); setFixedHeight(26); #endif From 5343d24f26e88fbaaf18324806f41a9181e97372 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 11:37:17 +0100 Subject: [PATCH 23/32] Run folder search filtering in the background It uses the same approach used by the comics search filter --- YACReaderLibrary/YACReaderLibrary.pro | 2 + YACReaderLibrary/db/folder_model.cpp | 268 +----------------- YACReaderLibrary/db/folder_model.h | 50 +--- .../db/folder_query_result_processor.cpp | 176 ++++++++++++ .../db/folder_query_result_processor.h | 36 +++ YACReaderLibrary/library_window.cpp | 12 +- YACReaderLibrary/library_window.h | 3 + 7 files changed, 238 insertions(+), 309 deletions(-) create mode 100644 YACReaderLibrary/db/folder_query_result_processor.cpp create mode 100644 YACReaderLibrary/db/folder_query_result_processor.h diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index 4162cb36..d0f0f0d3 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -79,6 +79,7 @@ HEADERS += comic_flow.h \ ../common/concurrent_queue.h \ create_library_dialog.h \ db/comic_query_result_procesor.h \ + db/folder_query_result_processor.h \ db/query_lexer.h \ library_creator.h \ library_window.h \ @@ -156,6 +157,7 @@ HEADERS += comic_flow.h \ SOURCES += comic_flow.cpp \ create_library_dialog.cpp \ db/comic_query_result_procesor.cpp \ + db/folder_query_result_processor.cpp \ db/query_lexer.cpp \ library_creator.cpp \ library_window.cpp \ diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index 1944e1e1..b20ff402 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -1,54 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). -** All rights reserved. -** Contact: Nokia Corporation (qt-info@nokia.com) -** -** This file is part of the examples of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:BSD$ -** You may use this file under the terms of the BSD license as follows: -** -** "Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions are -** met: -** * Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** * Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in -** the documentation and/or other materials provided with the -** distribution. -** * Neither the name of Nokia Corporation and its Subsidiary(-ies) nor -** the names of its contributors may be used to endorse or promote -** products derived from this software without specific prior written -** permission. -** -** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." -** $QT_END_LICENSE$ -** -****************************************************************************/ - -/* - treemodel.cpp - - Provides a simple tree model to show how to create and use hierarchical - models. -*/ - -#include +#include "folder_model.h" #include "folder_item.h" -#include "folder_model.h" #include "data_base_management.h" #include "folder.h" #include "db_helper.h" @@ -57,6 +9,8 @@ #include "QsLog.h" #include "query_parser.h" +#include + #ifdef Q_OS_MAC #include QIcon finishedFolderIcon; @@ -104,7 +58,6 @@ FolderModel::FolderModel(QObject *parent) connect(this, SIGNAL(reset()), this, SIGNAL(modelReset())); } -//! [0] FolderModel::FolderModel(QSqlQuery &sqlquery, QObject *parent) : QAbstractItemModel(parent), rootItem(0) { @@ -117,17 +70,13 @@ FolderModel::FolderModel(QSqlQuery &sqlquery, QObject *parent) setupModelData(sqlquery, rootItem); //sqlquery.finish(); } -//! [0] -//! [1] FolderModel::~FolderModel() { if (rootItem != 0) delete rootItem; } -//! [1] -//! [2] int FolderModel::columnCount(const QModelIndex &parent) const { if (parent.isValid()) @@ -135,9 +84,7 @@ int FolderModel::columnCount(const QModelIndex &parent) const else return rootItem->columnCount(); } -//! [2] -//! [3] QVariant FolderModel::data(const QModelIndex &index, int role) const { if (!index.isValid()) @@ -188,9 +135,7 @@ QVariant FolderModel::data(const QModelIndex &index, int role) const return item->data(index.column()); } -//! [3] -//! [4] Qt::ItemFlags FolderModel::flags(const QModelIndex &index) const { if (!index.isValid()) @@ -198,9 +143,7 @@ Qt::ItemFlags FolderModel::flags(const QModelIndex &index) const return Qt::ItemIsEnabled | Qt::ItemIsSelectable | Qt::ItemIsDropEnabled | Qt::ItemIsDragEnabled; } -//! [4] -//! [5] QVariant FolderModel::headerData(int section, Qt::Orientation orientation, int role) const { @@ -209,9 +152,7 @@ QVariant FolderModel::headerData(int section, Qt::Orientation orientation, return QVariant(); } -//! [5] -//! [6] QModelIndex FolderModel::index(int row, int column, const QModelIndex &parent) const { @@ -231,9 +172,7 @@ QModelIndex FolderModel::index(int row, int column, const QModelIndex &parent) else return QModelIndex(); } -//! [6] -//! [7] QModelIndex FolderModel::parent(const QModelIndex &index) const { if (!index.isValid()) @@ -247,19 +186,7 @@ QModelIndex FolderModel::parent(const QModelIndex &index) const return createIndex(parentItem->row(), 0, parentItem); } -//! [7] -/* -QModelIndex FolderModel::indexFromItem(FolderItem * item,int column) -{ - //if(item->parent() != 0) - // return index(item->row(),column,parent(indexFromItem(item->parent(),column-1))); - //else - // return index(item->row(),0,QModelIndex()); - return createIndex(item->row(), column, item); -}*/ - -//! [8] int FolderModel::rowCount(const QModelIndex &parent) const { FolderItem *parentItem; @@ -273,7 +200,6 @@ int FolderModel::rowCount(const QModelIndex &parent) const return parentItem->childCount(); } -//! [8] void FolderModel::setupModelData(QString path) { @@ -386,27 +312,6 @@ QString FolderModel::getFolderPath(const QModelIndex &folder) return static_cast(folder.internalPointer())->data(FolderModel::Path).toString(); } -/* -void FolderModel::resetFilter() -{ - beginResetModel(); - filter = ""; - includeComics = false; - //TODO hay que liberar la memoria reservada para el filtrado - //items.clear(); - filteredItems.clear(); - FolderItem * root = rootItem; - rootItem = rootBeforeFilter; //TODO si no se aplica el filtro previamente, esto invalidar�a en modelo - if(root !=0) - delete root; - - rootBeforeFilter = 0; - filterEnabled = false; - endResetModel(); - - -}*/ - void FolderModel::updateFolderCompletedStatus(const QModelIndexList &list, bool status) { QString connectionName = ""; @@ -626,7 +531,7 @@ void FolderModel::updateFolderChildrenInfo(qulonglong folderId) //PROXY FolderModelProxy::FolderModelProxy(QObject *parent) - : QSortFilterProxyModel(parent), rootItem(0), includeComics(true), filter(""), filterEnabled(false) + : QSortFilterProxyModel(parent), rootItem(0), filterEnabled(false) { } @@ -649,93 +554,23 @@ bool FolderModelProxy::filterAcceptsRow(int source_row, const QModelIndex &sourc return filteredItems.contains(item->id); } -void FolderModelProxy::setFilter(const YACReader::SearchModifiers modifier, QString filter, bool includeComics) +void FolderModelProxy::setFilterData(QMap *filteredItems, FolderItem *root) { clear(); - this->filter = filter; - this->includeComics = includeComics; - this->modifier = modifier; filterEnabled = true; - setupFilteredModelData(); -} -void FolderModelProxy::setupFilteredModelData() -{ beginResetModel(); - //TODO hay que liberar memoria de anteriores filtrados - - //inicializar el nodo ra�z - if (rootItem != 0) delete rootItem; //TODO comprobar que se libera bien la memoria - rootItem = 0; + rootItem = root; - //inicializar el nodo ra�z - QList rootData; - rootData << "root"; - rootItem = new FolderItem(rootData); - rootItem->id = ROOT; - rootItem->parentItem = 0; + this->filteredItems.insert(*filteredItems); - auto model = static_cast(sourceModel()); + endResetModel(); - QString connectionName = ""; - { - QSqlDatabase db = DataBaseManagement::loadDatabase(model->_databasePath); - - QSqlQuery selectQuery(db); //TODO check - if (!includeComics) { - selectQuery.prepare("select * from folder where id <> 1 and upper(name) like upper(:filter) order by parentId,name "); - selectQuery.bindValue(":filter", "%%" + filter + "%%"); - } else { - std::string queryString("SELECT DISTINCT f.id, f.parentId, f.name, f.path, f.finished, f.completed " - "FROM folder f LEFT JOIN comic c ON (f.id = c.parentId) " - "INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) WHERE "); - - try { - QueryParser parser; - auto result = parser.parse(filter.toStdString()); - result.buildSqlString(queryString); - - switch (modifier) { - case YACReader::NoModifiers: - queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; - break; - - case YACReader::OnlyRead: - queryString += " AND f.id <> 1 AND ci.read = 1 ORDER BY f.parentId,f.name"; - break; - - case YACReader::OnlyUnread: - queryString += " AND f.id <> 1 AND ci.read = 0 ORDER BY f.parentId,f.name"; - break; - - default: - queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; - QLOG_ERROR() << "not implemented"; - break; - } - - selectQuery.prepare(queryString.c_str()); - result.bindValues(selectQuery); - - selectQuery.exec(); - QLOG_DEBUG() << selectQuery.lastError() << "--"; - - setupFilteredModelData(selectQuery, rootItem); - } catch (const std::exception &e) { - QLOG_ERROR() << "Unable to parse query: " << e.what(); - } - } - - connectionName = db.connectionName(); - - endResetModel(); - } - - QSqlDatabase::removeDatabase(connectionName); + delete filteredItems; } void FolderModelProxy::clear() @@ -746,88 +581,3 @@ void FolderModelProxy::clear() QSortFilterProxyModel::clear(); } - -void FolderModelProxy::setupFilteredModelData(QSqlQuery &sqlquery, FolderItem *parent) -{ - auto model = static_cast(sourceModel()); - - //64 bits para la primary key, es decir la misma precisi�n que soporta sqlit 2^64 - filteredItems.clear(); - - //se a�ade el nodo 0 al modelo que representa el arbol de elementos que cumplen con el filtro - filteredItems.insert(parent->id, parent); - - QSqlRecord record = sqlquery.record(); - - int name = record.indexOf("name"); - int path = record.indexOf("path"); - int finished = record.indexOf("finished"); - int completed = record.indexOf("completed"); - int parentIdIndex = record.indexOf("parentId"); - - while (sqlquery.next()) { //se procesan todos los folders que cumplen con el filtro - //datos de la base de datos - QList data; - - data << sqlquery.value(name).toString(); - data << sqlquery.value(path).toString(); - data << sqlquery.value(finished).toBool(); - data << sqlquery.value(completed).toBool(); - - auto item = new FolderItem(data); - item->id = sqlquery.value(0).toULongLong(); - - //id del padre - quint64 parentId = sqlquery.value(parentIdIndex).toULongLong(); - - //se a�ade el item al map, de forma que se pueda encontrar como padre en siguientes iteraciones - if (!filteredItems.contains(item->id)) - filteredItems.insert(item->id, item); - - //es necesario conocer las coordenadas de origen para poder realizar scroll autom�tico en la vista - item->originalItem = model->items.value(item->id); - - //si el padre ya existe en el modelo, el item se a�ade como hijo - if (filteredItems.contains(parentId)) - filteredItems.value(parentId)->appendChild(item); - else //si el padre a�n no se ha a�adido, hay que a�adirlo a �l y todos los padres hasta el nodo ra�z - { - //comprobamos con esta variable si el �ltimo de los padres (antes del nodo ra�z) ya exist�a en el modelo - bool parentPreviousInserted = false; - - //mientras no se alcance el nodo ra�z se procesan todos los padres (de abajo a arriba) - while (parentId != ROOT) { - //el padre no estaba en el modelo filtrado, as� que se rescata del modelo original - FolderItem *parentItem = model->items.value(parentId); - //se debe crear un nuevo nodo (para no compartir los hijos con el nodo original) - FolderItem *newparentItem = new FolderItem(parentItem->getData()); //padre que se a�adir� a la estructura de directorios filtrados - newparentItem->id = parentId; - - newparentItem->originalItem = parentItem; - - //si el modelo contiene al padre, se a�ade el item actual como hijo - if (filteredItems.contains(parentId)) { - filteredItems.value(parentId)->appendChild(item); - parentPreviousInserted = true; - } - //sino se registra el nodo para poder encontrarlo con posterioridad y se a�ade el item actual como hijo - else { - newparentItem->appendChild(item); - filteredItems.insert(newparentItem->id, newparentItem); - parentPreviousInserted = false; - } - - //variables de control del bucle, se avanza hacia el nodo padre - item = newparentItem; - parentId = parentItem->parentItem->id; - } - - //si el nodo es hijo de 1 y no hab�a sido previamente insertado como hijo, se a�ade como tal - if (!parentPreviousInserted) { - filteredItems.value(ROOT)->appendChild(item); - } else { - delete item; - } - } - } -} diff --git a/YACReaderLibrary/db/folder_model.h b/YACReaderLibrary/db/folder_model.h index ecac40ee..9597d07c 100644 --- a/YACReaderLibrary/db/folder_model.h +++ b/YACReaderLibrary/db/folder_model.h @@ -1,43 +1,3 @@ -/**************************************************************************** -** -** Copyright (C) 2011 Nokia Corporation and/or its subsidiary(-ies). -** All rights reserved. -** Contact: Nokia Corporation (qt-info@nokia.com) -** -** This file is part of the examples of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:BSD$ -** You may use this file under the terms of the BSD license as follows: -** -** "Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions are -** met: -** * Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** * Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in -** the documentation and/or other materials provided with the -** distribution. -** * Neither the name of Nokia Corporation and its Subsidiary(-ies) nor -** the names of its contributors may be used to endorse or promote -** products derived from this software without specific prior written -** permission. -** -** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -** "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -** LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -** A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -** OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -** OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE." -** $QT_END_LICENSE$ -** -****************************************************************************/ - #ifndef TREEMODEL_H #define TREEMODEL_H @@ -49,6 +9,7 @@ #include #include "yacreader_global.h" +#include "folder_query_result_processor.h" class FolderItem; @@ -59,9 +20,7 @@ public: explicit FolderModelProxy(QObject *parent = 0); ~FolderModelProxy() override; - void setFilter(const YACReader::SearchModifiers modifier, QString filter, bool includeComics); - void setupFilteredModelData(QSqlQuery &sqlquery, FolderItem *parent); - void setupFilteredModelData(); + void setFilterData(QMap *filteredItems, FolderItem *root); void clear(); bool filterAcceptsRow(int source_row, const QModelIndex &source_parent) const override; @@ -70,8 +29,6 @@ protected: FolderItem *rootItem; QMap filteredItems; //relación entre folders - bool includeComics; - QString filter; bool filterEnabled; YACReader::SearchModifiers modifier; @@ -79,10 +36,10 @@ protected: class FolderModel : public QAbstractItemModel { - Q_OBJECT friend class FolderModelProxy; + friend class YACReader::FolderQueryResultProcessor; public: FolderModel(QObject *parent = 0); @@ -147,6 +104,5 @@ signals: void beforeReset(); void reset(); }; -//! [0] #endif diff --git a/YACReaderLibrary/db/folder_query_result_processor.cpp b/YACReaderLibrary/db/folder_query_result_processor.cpp new file mode 100644 index 00000000..e0b6050d --- /dev/null +++ b/YACReaderLibrary/db/folder_query_result_processor.cpp @@ -0,0 +1,176 @@ +#include "folder_query_result_processor.h" + +#include "folder_item.h" +#include "qnaturalsorting.h" +#include "yacreader_global_gui.h" +#include "query_parser.h" +#include "folder_model.h" +#include "data_base_management.h" + +#include "QsLog.h" + +#include +#include + +//Copy/pasted from "folder_model.cpp" +#define ROOT 1 + +YACReader::FolderQueryResultProcessor::FolderQueryResultProcessor(FolderModel *model) + : querySearchQueue(1), model(model) +{ +} + +void YACReader::FolderQueryResultProcessor::createModelData(const YACReader::SearchModifiers modifier, const QString &filter, bool includeComics) +{ + querySearchQueue.cancellPending(); + + QString connectionName = ""; + { + QSqlDatabase db = DataBaseManagement::loadDatabase(model->getDatabase()); + + QSqlQuery selectQuery(db); //TODO check + if (!includeComics) { + selectQuery.prepare("select * from folder where id <> 1 and upper(name) like upper(:filter) order by parentId,name "); + selectQuery.bindValue(":filter", "%%" + filter + "%%"); + } else { + std::string queryString("SELECT DISTINCT f.id, f.parentId, f.name, f.path, f.finished, f.completed " + "FROM folder f LEFT JOIN comic c ON (f.id = c.parentId) " + "INNER JOIN comic_info ci ON (c.comicInfoId = ci.id) WHERE "); + + try { + QueryParser parser; + auto result = parser.parse(filter.toStdString()); + result.buildSqlString(queryString); + + switch (modifier) { + case YACReader::NoModifiers: + queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; + break; + + case YACReader::OnlyRead: + queryString += " AND f.id <> 1 AND ci.read = 1 ORDER BY f.parentId,f.name"; + break; + + case YACReader::OnlyUnread: + queryString += " AND f.id <> 1 AND ci.read = 0 ORDER BY f.parentId,f.name"; + break; + + default: + queryString += " AND f.id <> 1 ORDER BY f.parentId,f.name"; + QLOG_ERROR() << "not implemented"; + break; + } + + selectQuery.prepare(queryString.c_str()); + result.bindValues(selectQuery); + + selectQuery.exec(); + QLOG_DEBUG() << selectQuery.lastError() << "--"; + + setupFilteredModelData(selectQuery); + } catch (const std::exception &e) { + //Do nothing, uncomplete search string will end here and it is part of how the QueryParser works + //I don't like the idea of using exceptions for this though + } + } + + connectionName = db.connectionName(); + } + + QSqlDatabase::removeDatabase(connectionName); +} + +void YACReader::FolderQueryResultProcessor::setupFilteredModelData(QSqlQuery &sqlquery) +{ + FolderItem *rootItem = 0; + + //inicializar el nodo ra�z + QList rootData; + rootData << "root"; + rootItem = new FolderItem(rootData); + rootItem->id = ROOT; + rootItem->parentItem = 0; + + FolderItem *parent = rootItem; + + QMap *filteredItems = new QMap(); + + //add tree root node + filteredItems->insert(parent->id, parent); + + QSqlRecord record = sqlquery.record(); + + int name = record.indexOf("name"); + int path = record.indexOf("path"); + int finished = record.indexOf("finished"); + int completed = record.indexOf("completed"); + int parentIdIndex = record.indexOf("parentId"); + + while (sqlquery.next()) { //se procesan todos los folders que cumplen con el filtro + //datos de la base de datos + QList data; + + data << sqlquery.value(name).toString(); + data << sqlquery.value(path).toString(); + data << sqlquery.value(finished).toBool(); + data << sqlquery.value(completed).toBool(); + + auto item = new FolderItem(data); + item->id = sqlquery.value(0).toULongLong(); + + //id del padre + quint64 parentId = sqlquery.value(parentIdIndex).toULongLong(); + + //se a�ade el item al map, de forma que se pueda encontrar como padre en siguientes iteraciones + if (!filteredItems->contains(item->id)) + filteredItems->insert(item->id, item); + + //es necesario conocer las coordenadas de origen para poder realizar scroll autom�tico en la vista + item->originalItem = model->items.value(item->id); + + //si el padre ya existe en el modelo, el item se a�ade como hijo + if (filteredItems->contains(parentId)) + filteredItems->value(parentId)->appendChild(item); + else //si el padre a�n no se ha a�adido, hay que a�adirlo a �l y todos los padres hasta el nodo ra�z + { + //comprobamos con esta variable si el �ltimo de los padres (antes del nodo ra�z) ya exist�a en el modelo + bool parentPreviousInserted = false; + + //mientras no se alcance el nodo ra�z se procesan todos los padres (de abajo a arriba) + while (parentId != ROOT) { + //el padre no estaba en el modelo filtrado, as� que se rescata del modelo original + FolderItem *parentItem = model->items.value(parentId); + //se debe crear un nuevo nodo (para no compartir los hijos con el nodo original) + FolderItem *newparentItem = new FolderItem(parentItem->getData()); //padre que se a�adir� a la estructura de directorios filtrados + newparentItem->id = parentId; + + newparentItem->originalItem = parentItem; + + //si el modelo contiene al padre, se a�ade el item actual como hijo + if (filteredItems->contains(parentId)) { + filteredItems->value(parentId)->appendChild(item); + parentPreviousInserted = true; + } + //sino se registra el nodo para poder encontrarlo con posterioridad y se a�ade el item actual como hijo + else { + newparentItem->appendChild(item); + filteredItems->insert(newparentItem->id, newparentItem); + parentPreviousInserted = false; + } + + //variables de control del bucle, se avanza hacia el nodo padre + item = newparentItem; + parentId = parentItem->parentItem->id; + } + + //si el nodo es hijo de 1 y no hab�a sido previamente insertado como hijo, se a�ade como tal + if (!parentPreviousInserted) { + filteredItems->value(ROOT)->appendChild(item); + } else { + delete item; + } + } + } + + emit newData(filteredItems, rootItem); +} diff --git a/YACReaderLibrary/db/folder_query_result_processor.h b/YACReaderLibrary/db/folder_query_result_processor.h new file mode 100644 index 00000000..a5612080 --- /dev/null +++ b/YACReaderLibrary/db/folder_query_result_processor.h @@ -0,0 +1,36 @@ +#ifndef FOLDER_QUERY_RESULT_PROCESSOR_H +#define FOLDER_QUERY_RESULT_PROCESSOR_H + +#include + +#include "yacreader_global_gui.h" +#include "concurrent_queue.h" + +class FolderItem; +class FolderModel; +class QSqlQuery; + +namespace YACReader { + +class FolderQueryResultProcessor : public QObject +{ + Q_OBJECT +public: + FolderQueryResultProcessor(FolderModel *model); + +public slots: + void createModelData(const SearchModifiers modifier, const QString &filter, bool includeComics); + +signals: + void newData(QMap *filteredItems, FolderItem *root); + +private: + ConcurrentQueue querySearchQueue; + + FolderModel *model; + + void setupFilteredModelData(QSqlQuery &sqlquery); +}; +}; + +#endif // FOLDER_QUERY_RESULT_PROCESSOR_H diff --git a/YACReaderLibrary/library_window.cpp b/YACReaderLibrary/library_window.cpp index aad471a7..65f66b61 100644 --- a/YACReaderLibrary/library_window.cpp +++ b/YACReaderLibrary/library_window.cpp @@ -400,6 +400,7 @@ void LibraryWindow::doModels() //folders foldersModel = new FolderModel(); foldersModelProxy = new FolderModelProxy(); + folderQueryResultProcessor = new FolderQueryResultProcessor(foldersModel); //foldersModelProxy->setSourceModel(foldersModel); //comics comicsModel = new ComicModel(this); @@ -1056,6 +1057,7 @@ void LibraryWindow::createConnections() //Search filter connect(searchEdit, SIGNAL(filterChanged(YACReader::SearchModifiers, QString)), this, SLOT(setSearchFilter(YACReader::SearchModifiers, QString))); connect(&comicQueryResultProcesor, &ComicQueryResultProcesor::newData, this, &LibraryWindow::setComicSearchFilterData); + connect(folderQueryResultProcessor, &FolderQueryResultProcessor::newData, this, &LibraryWindow::setFolderSearchFilterData); //ContextMenus connect(openContainingFolderComicAction, SIGNAL(triggered()), this, SLOT(openContainingFolderComic())); @@ -2069,8 +2071,7 @@ void LibraryWindow::toNormal() void LibraryWindow::setSearchFilter(const YACReader::SearchModifiers modifier, QString filter) { if (!filter.isEmpty()) { - //TODO move search query for folders to its own async processor - foldersModelProxy->setFilter(modifier, filter, true); //includeComicsCheckBox->isChecked()); + folderQueryResultProcessor->createModelData(modifier, filter, true); comicQueryResultProcesor.createModelData(modifier, filter, foldersModel->getDatabase()); } else if (status == LibraryWindow::Searching) { //if no searching, then ignore this clearSearchFilter(); @@ -2085,7 +2086,6 @@ void LibraryWindow::setComicSearchFilterData(QList *data, const QSt comicsModel->setModelData(data, databasePath); comicsViewsManager->comicsView->enableFilterMode(true); comicsViewsManager->comicsView->setModel(comicsModel); //TODO, columns are messed up after ResetModel some times, this shouldn't be necesary - foldersView->expandAll(); if (comicsModel->rowCount() == 0) comicsViewsManager->showNoSearchResultsView(); @@ -2093,6 +2093,12 @@ void LibraryWindow::setComicSearchFilterData(QList *data, const QSt comicsViewsManager->showComicsView(); } +void LibraryWindow::setFolderSearchFilterData(QMap *filteredItems, FolderItem *root) +{ + foldersModelProxy->setFilterData(filteredItems, root); + foldersView->expandAll(); +} + void LibraryWindow::clearSearchFilter() { foldersModelProxy->clear(); diff --git a/YACReaderLibrary/library_window.h b/YACReaderLibrary/library_window.h index e4ebeda7..4e626cf3 100644 --- a/YACReaderLibrary/library_window.h +++ b/YACReaderLibrary/library_window.h @@ -11,6 +11,7 @@ #include "yacreader_navigation_controller.h" #include "comic_query_result_procesor.h" +#include "folder_query_result_processor.h" #include @@ -329,6 +330,7 @@ public slots: void toFullScreen(); void setSearchFilter(const YACReader::SearchModifiers modifier, QString filter); void setComicSearchFilterData(QList *, const QString &); + void setFolderSearchFilterData(QMap *filteredItems, FolderItem *root); void clearSearchFilter(); void showProperties(); void exportLibrary(QString destPath); @@ -402,6 +404,7 @@ private: TrayIconController *trayIconController; ComicQueryResultProcesor comicQueryResultProcesor; + FolderQueryResultProcessor *folderQueryResultProcessor; }; #endif From c3b0780e03e062a580184a0d826e503a7dcf9f4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 11:37:37 +0100 Subject: [PATCH 24/32] Remove unused constant --- common/yacreader_global.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/common/yacreader_global.h b/common/yacreader_global.h index e52d73a7..770e35e9 100644 --- a/common/yacreader_global.h +++ b/common/yacreader_global.h @@ -36,8 +36,7 @@ enum YACReaderErrors { enum SearchModifiers { NoModifiers = 0, OnlyRead, - OnlyUnread, - ByAuthor + OnlyUnread }; enum LabelColors { From dde60b78eae8f87c4821eeb455dda534656bfa59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 11:38:45 +0100 Subject: [PATCH 25/32] YACReaderLibrary compiles using c++11 just fine It is probably worth to bump this to c++14 and ideally use c++17, but it will be done in a separate branch. --- YACReaderLibrary/YACReaderLibrary.pro | 2 ++ 1 file changed, 2 insertions(+) diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index d0f0f0d3..a3b20ab4 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -14,6 +14,8 @@ INCLUDEPATH += . \ DEFINES += SERVER_RELEASE NOMINMAX YACREADER_LIBRARY +CONFIG += c++11 + # load default build flags include (../config.pri) include (../dependencies/pdf_backend.pri) From f803b54f2ed76cdadd6de831c88142f5fdc5dec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 12:25:24 +0100 Subject: [PATCH 26/32] Add support for boolean folder fields --- YACReaderLibrary/db/query_parser.cpp | 9 ++++++--- YACReaderLibrary/db/query_parser.h | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index 36916b53..2620a81b 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -7,10 +7,11 @@ const std::map> QueryParser::fieldNames { { FieldType::numeric, { "numpages", "number", "count", "arcnumber", "arccount" } }, { FieldType::text, { "title", "volume", "storyarc", "genere", "writer", "penciller", "inker", "colorist", "letterer", "coverartist", "publisher", "format", "agerating", "synopsis", "characters", "notes" } }, - { FieldType::boolean, { "isbis", "color" } }, + { FieldType::boolean, { "isbis", "color", "read" } }, { FieldType::date, { "date" } }, { FieldType::filename, { "filename" } }, - { FieldType::folder, { "folder" } } + { FieldType::folder, { "folder" } }, + { FieldType::booleanFolder, { "completed", "finished" } }, }; int QueryParser::TreeNode::buildSqlString(std::string &sqlString, int bindPosition) const @@ -30,6 +31,8 @@ int QueryParser::TreeNode::buildSqlString(std::string &sqlString, int bindPositi sqlString += "(UPPER(c." + children[0].t + ") LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; } else if (fieldType(children[0].t) == FieldType::folder) { sqlString += "(UPPER(f.name) LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; + } else if (fieldType(children[0].t) == FieldType::booleanFolder) { + sqlString += "f." + children[0].t + " = :bindPosition" + std::to_string(bindPosition) + " "; } else { sqlString += "(UPPER(ci." + children[0].t + ") LIKE UPPER(:bindPosition" + std::to_string(bindPosition) + ")) "; } @@ -52,7 +55,7 @@ int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) { if (t == "token") { std::string bind_string(":bindPosition" + std::to_string(++bindPosition)); - if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean })) { + if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean, FieldType::booleanFolder })) { selectQuery.bindValue(QString::fromStdString(bind_string), std::stoi(children[1].t)); } else { selectQuery.bindValue(QString::fromStdString(bind_string), QString::fromStdString("%%" + children[1].t + "%%")); diff --git a/YACReaderLibrary/db/query_parser.h b/YACReaderLibrary/db/query_parser.h index 7aca5ca9..4b8f0c4e 100644 --- a/YACReaderLibrary/db/query_parser.h +++ b/YACReaderLibrary/db/query_parser.h @@ -81,6 +81,7 @@ private: boolean, date, folder, + booleanFolder, filename }; static FieldType fieldType(const std::string &str); From f03ad848cbf8f6b580bac5bc9af312ee3259fd79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 15:32:20 +0100 Subject: [PATCH 27/32] Add support for `true` and `false` literals to be used with bool fields The lexer itself should be responsible for parsing those values but it will require a bigger refactoring. --- YACReaderLibrary/db/query_parser.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/YACReaderLibrary/db/query_parser.cpp b/YACReaderLibrary/db/query_parser.cpp index 2620a81b..5a45160b 100644 --- a/YACReaderLibrary/db/query_parser.cpp +++ b/YACReaderLibrary/db/query_parser.cpp @@ -55,8 +55,17 @@ int QueryParser::TreeNode::bindValues(QSqlQuery &selectQuery, int bindPosition) { if (t == "token") { std::string bind_string(":bindPosition" + std::to_string(++bindPosition)); - if (isIn(fieldType(children[0].t), { FieldType::numeric, FieldType::boolean, FieldType::booleanFolder })) { + if (isIn(fieldType(children[0].t), { FieldType::numeric })) { selectQuery.bindValue(QString::fromStdString(bind_string), std::stoi(children[1].t)); + } else if (isIn(fieldType(children[0].t), { FieldType::boolean, FieldType::booleanFolder })) { + auto value = toLower(children[1].t); + if (value == "true") { + selectQuery.bindValue(QString::fromStdString(bind_string), 1); + } else if (value == "false") { + selectQuery.bindValue(QString::fromStdString(bind_string), 0); + } else { + selectQuery.bindValue(QString::fromStdString(bind_string), std::stoi(value)); + } } else { selectQuery.bindValue(QString::fromStdString(bind_string), QString::fromStdString("%%" + children[1].t + "%%")); } From 82eb5c0a8bfe2142cccfd13472b50209bb9322e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 15:51:20 +0100 Subject: [PATCH 28/32] Add fallback for qt < 5.15 --- YACReaderLibrary/db/folder_model.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/YACReaderLibrary/db/folder_model.cpp b/YACReaderLibrary/db/folder_model.cpp index b20ff402..288a7dde 100644 --- a/YACReaderLibrary/db/folder_model.cpp +++ b/YACReaderLibrary/db/folder_model.cpp @@ -566,7 +566,13 @@ void FolderModelProxy::setFilterData(QMap *fil rootItem = root; +#if QT_VERSION < QT_VERSION_CHECK(5, 15, 0) + QMap::iterator i; + for (i = filteredItems->begin(); i != filteredItems->end(); ++i) + this->filteredItems.insert(i.key(), i.value()); +#else this->filteredItems.insert(*filteredItems); +#endif endResetModel(); From 24c0a783de42ed1bac7d4d4930c107bfb5f42ce8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 18:42:21 +0100 Subject: [PATCH 29/32] Remove reference to lexertl's license --- YACReaderLibrary/YACReaderLibrary.pro | 3 --- 1 file changed, 3 deletions(-) diff --git a/YACReaderLibrary/YACReaderLibrary.pro b/YACReaderLibrary/YACReaderLibrary.pro index a3b20ab4..4af13332 100644 --- a/YACReaderLibrary/YACReaderLibrary.pro +++ b/YACReaderLibrary/YACReaderLibrary.pro @@ -331,6 +331,3 @@ translation.files = ../release/languages/yacreaderlibrary_* manpage.path = $$DATADIR/man/man1 manpage.files = ../YACReaderLibrary.1 } - -DISTFILES += \ - lexertl/licence_1_0.txt From 6bb64c54678804b5e8dc559da0bddfb524896944 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 18:42:51 +0100 Subject: [PATCH 30/32] Import non-gui global header --- YACReaderLibrary/db/comic_query_result_procesor.h | 2 +- YACReaderLibrary/db/folder_query_result_processor.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/YACReaderLibrary/db/comic_query_result_procesor.h b/YACReaderLibrary/db/comic_query_result_procesor.h index ba71da99..a948611c 100644 --- a/YACReaderLibrary/db/comic_query_result_procesor.h +++ b/YACReaderLibrary/db/comic_query_result_procesor.h @@ -4,7 +4,7 @@ #include #include -#include "yacreader_global_gui.h" +#include "yacreader_global.h" #include "concurrent_queue.h" class ComicItem; diff --git a/YACReaderLibrary/db/folder_query_result_processor.h b/YACReaderLibrary/db/folder_query_result_processor.h index a5612080..faa59046 100644 --- a/YACReaderLibrary/db/folder_query_result_processor.h +++ b/YACReaderLibrary/db/folder_query_result_processor.h @@ -3,7 +3,7 @@ #include -#include "yacreader_global_gui.h" +#include "yacreader_global.h" #include "concurrent_queue.h" class FolderItem; From 76a307d0d82e23679602aec81258fb83faf37100 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 19:11:25 +0100 Subject: [PATCH 31/32] Remove comments --- YACReaderLibrary/db/comic_model.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/YACReaderLibrary/db/comic_model.h b/YACReaderLibrary/db/comic_model.h index b15e50e7..40f14517 100644 --- a/YACReaderLibrary/db/comic_model.h +++ b/YACReaderLibrary/db/comic_model.h @@ -16,7 +16,6 @@ class ComicItem; using namespace YACReader; -//! [0] class ComicModel : public QAbstractItemModel { Q_OBJECT @@ -164,6 +163,5 @@ signals: void resortedIndexes(QList); void newSelectedIndex(const QModelIndex &); }; -//! [0] #endif From 7e72c8b691ae2402c9f3a16a0456e6f801aa8290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luis=20=C3=81ngel=20San=20Mart=C3=ADn?= Date: Thu, 14 Jan 2021 21:38:30 +0100 Subject: [PATCH 32/32] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e76a4978..c8c6f393 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ spanish only. Sorry for the mess. Version counting is based on semantic versioning (Major.Feature.Patch) ## WIP +### YACReaderLibrary +* New search engine. ## 9.7.1 ### YACReader