libstdc++
regex_executor.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2026 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_executor.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // FIXME convert comments to doxygen format.
32 
33 namespace std _GLIBCXX_VISIBILITY(default)
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37 namespace __detail
38 {
39  /**
40  * @addtogroup regex-detail
41  * @{
42  */
43 
44  template<typename _BiIter, bool _Trivial = is_trivially_copyable<_BiIter>::value>
45  struct _ExecutorFrame;
46 
47  /**
48  * @brief Takes a regex and an input string and does the matching.
49  *
50  * The %_Executor class has two modes: DFS mode and BFS mode, controlled
51  * by the function parameter %__search_mode.
52  */
53  template<typename _BiIter, typename _Alloc, typename _TraitsT>
54  class _Executor
55  {
56  enum class _Search_mode : unsigned char { _BFS = 0, _DFS = 1 };
57  enum class _Match_mode : unsigned char { _Exact, _Prefix };
58 
59  public:
60  typedef typename iterator_traits<_BiIter>::value_type _CharT;
61  typedef basic_regex<_CharT, _TraitsT> _RegexT;
62  typedef _GLIBCXX_STD_C::vector<sub_match<_BiIter>, _Alloc> _ResultsVec;
63  typedef regex_constants::match_flag_type _FlagT;
64  typedef typename _TraitsT::char_class_type _ClassT;
65  typedef _NFA<_TraitsT> _NFAT;
66 
67  public:
68  _Executor(_BiIter __begin,
69  _BiIter __end,
70  _ResultsVec& __results,
71  const _RegexT& __re,
72  _FlagT __flags,
73  bool __use_dfs)
74  : _M_cur_results(__results.get_allocator()),
75  _M_begin(__begin),
76  _M_end(__end),
77  _M_re(__re),
78  _M_nfa(*__re._M_automaton),
79  _M_results(__results),
80  _M_rep_count(_M_nfa.size()),
81  _M_start(_M_nfa._M_start()),
82  _M_visited_states(nullptr),
83  _M_flags(__flags),
84  _M_search_mode(__use_dfs ? _Search_mode::_DFS : _Search_mode::_BFS)
85  {
86  using namespace regex_constants;
87  if (__flags & match_prev_avail) // ignore not_bol and not_bow
88  _M_flags &= ~(match_not_bol | match_not_bow);
89  if (_M_search_mode == _Search_mode::_BFS)
90  _M_visited_states = new bool[_M_nfa.size()];
91  }
92 
93  ~_Executor()
94  { delete[] _M_visited_states; }
95 
96  // Set matched when string exactly matches the pattern.
97  bool
98  _M_match()
99  {
100  _M_current = _M_begin;
101  return _M_main(_Match_mode::_Exact);
102  }
103 
104  // Set matched when some prefix of the string matches the pattern.
105  bool
106  _M_search_from_first()
107  {
108  _M_current = _M_begin;
109  return _M_main(_Match_mode::_Prefix);
110  }
111 
112  bool
113  _M_search();
114 
115  private:
116  void
117  _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
118 
119  void
120  _M_handle_repeat(_Match_mode, _StateIdT);
121 
122  void
123  _M_handle_subexpr_begin(_Match_mode, _StateIdT);
124 
125  void
126  _M_handle_subexpr_end(_Match_mode, _StateIdT);
127 
128  void
129  _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
130 
131  void
132  _M_handle_line_end_assertion(_Match_mode, _StateIdT);
133 
134  void
135  _M_handle_word_boundary(_Match_mode, _StateIdT);
136 
137  void
138  _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
139 
140  void
141  _M_handle_match(_Match_mode, _StateIdT);
142 
143  void
144  _M_handle_backref(_Match_mode, _StateIdT);
145 
146  void
147  _M_handle_accept(_Match_mode, _StateIdT);
148 
149  void
150  _M_handle_alternative(_Match_mode, _StateIdT);
151 
152  void
153  _M_node(_Match_mode, _StateIdT);
154 
155  void
156  _M_dfs(_Match_mode __match_mode, _StateIdT __start);
157 
158  bool
159  _M_main(_Match_mode __match_mode)
160  {
161  if (_M_search_mode == _Search_mode::_DFS)
162  return _M_main_dfs(__match_mode);
163  else
164  return _M_main_bfs(__match_mode);
165  }
166 
167  bool
168  _M_main_dfs(_Match_mode __match_mode);
169 
170  bool
171  _M_main_bfs(_Match_mode __match_mode);
172 
173  bool
174  _M_is_word(_CharT __ch) const
175  {
176  static const _CharT __s[2] = { 'w' };
177  return _M_re._M_automaton->_M_traits.isctype
178  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
179  }
180 
181  bool
182  _M_at_begin() const
183  {
184  if (_M_current == _M_begin)
185  {
186  // match_not_bol means ^ does not match [_M_begin,_M_begin)
187  if (_M_flags & regex_constants::match_not_bol)
188  return false;
189  // match_prev_avail means _M_begin is not the start of the input.
190  if (_M_flags & regex_constants::match_prev_avail)
191  {
192  // For ECMAScript multiline matches, check if the previous
193  // character is a line terminator.
194  if (_M_match_multiline())
195  return _M_is_line_terminator(*std::prev(_M_current));
196  else
197  return false;
198  }
199  else // ^ matches at _M_begin
200  return true;
201  }
202  else if (_M_match_multiline())
203  return _M_is_line_terminator(*std::prev(_M_current));
204  else
205  return false;
206  }
207 
208  bool
209  _M_at_end() const
210  {
211  if (_M_current == _M_end)
212  return !(_M_flags & regex_constants::match_not_eol);
213  else if (_M_match_multiline())
214  return _M_is_line_terminator(*_M_current);
215  else
216  return false;
217  }
218 
219  bool
220  _M_word_boundary() const;
221 
222  bool
223  _M_lookahead(_StateIdT __next);
224 
225  bool
226  _M_is_line_terminator(_CharT __c) const
227  {
228  const auto& __traits = _M_re._M_automaton->_M_traits;
229  const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
230  const char __n{ __ct.narrow(__c, ' ') };
231  if (__n == '\n')
232  return true;
233  if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
234  {
235  if (__n == '\r')
236  return true;
237  // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
238  }
239  return false;
240  }
241 
242  bool
243  _M_match_multiline() const noexcept
244  {
245  constexpr auto __m
247  return (_M_re._M_automaton->_M_options() & __m) == __m;
248  }
249 
250  bool
251  _M_visited(_StateIdT __i)
252  {
253  if (_M_visited_states)
254  {
255  if (_M_visited_states[__i])
256  return true;
257  _M_visited_states[__i] = true;
258  }
259  return false;
260  }
261 
262  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
263 
264  public:
265  _GLIBCXX_STD_C::vector<_ExecutorFrame<_BiIter>> _M_frames;
266  _ResultsVec _M_cur_results;
267  _BiIter _M_current;
268  _BiIter _M_begin;
269  const _BiIter _M_end;
270  const _RegexT& _M_re;
271  const _NFAT& _M_nfa;
272  _ResultsVec& _M_results;
273  _GLIBCXX_STD_C::vector<pair<_BiIter, int>> _M_rep_count;
274  // To record current solution.
275  _StateIdT _M_start;
276  _BiIter _M_sol_pos;
277  // (BFS only) Saves states that need to be considered for the next character.
278  _GLIBCXX_STD_C::vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
279  // (BFS only) Indicates which states are already visited.
280  bool* _M_visited_states;
281  _FlagT _M_flags;
282  const _Search_mode _M_search_mode;
283  // Do we have a solution so far?
284  bool _M_has_sol;
285  };
286 
287  ///@} regex-detail
288 } // namespace __detail
289 _GLIBCXX_END_NAMESPACE_VERSION
290 } // namespace std
291 
292 #include <bits/regex_executor.tcc>
std::size
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:274
std::regex_constants::match_not_bow
constexpr match_flag_type match_not_bow
Definition: regex_constants.h:293
std::regex_constants::match_not_bol
constexpr match_flag_type match_not_bol
Definition: regex_constants.h:280
std::regex_constants::__multiline
constexpr syntax_option_type __multiline
Extension: Equivalent to regex_constants::multiline for C++11 and C++14.
Definition: regex_constants.h:179
std
ISO C++ entities toplevel namespace is std.
std::regex_constants::match_flag_type
match_flag_type
This is a bitmask type indicating regex matching rules.
Definition: regex_constants.h:253
std::regex_constants::ECMAScript
constexpr syntax_option_type ECMAScript
Definition: regex_constants.h:120
regex_executor.tcc
std::regex_constants::match_prev_avail
constexpr match_flag_type match_prev_avail
Definition: regex_constants.h:323
std::regex_constants::match_not_eol
constexpr match_flag_type match_not_eol
Definition: regex_constants.h:287