libstdc++
regex_automaton.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2026 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_automaton.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 #ifdef _GLIBCXX_DEBUG
32 # include <ostream>
33 #endif
34 
35 namespace std _GLIBCXX_VISIBILITY(default)
36 {
37 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 
39 namespace __detail
40 {
41 #ifdef _GLIBCXX_DEBUG
42  inline std::ostream&
43  _State_base::_M_print(std::ostream& __ostr) const
44  {
45  switch (_M_opcode)
46  {
47  case _S_opcode_alternative:
48  case _S_opcode_repeat:
49  __ostr << "alt next=" << _M_next << " alt=" << _M_alt;
50  break;
51  case _S_opcode_subexpr_begin:
52  __ostr << "subexpr begin next=" << _M_next << " index=" << _M_subexpr;
53  break;
54  case _S_opcode_subexpr_end:
55  __ostr << "subexpr end next=" << _M_next << " index=" << _M_subexpr;
56  break;
57  case _S_opcode_backref:
58  __ostr << "backref next=" << _M_next << " index=" << _M_backref_index;
59  break;
60  case _S_opcode_match:
61  __ostr << "match next=" << _M_next;
62  break;
63  case _S_opcode_accept:
64  __ostr << "accept next=" << _M_next;
65  break;
66  default:
67  __ostr << "unknown next=" << _M_next;
68  break;
69  }
70  return __ostr;
71  }
72 
73  // Prints graphviz dot commands for state.
74  inline std::ostream&
75  _State_base::_M_dot(std::ostream& __ostr, _StateIdT __id) const
76  {
77  switch (_M_opcode)
78  {
79  case _S_opcode_alternative:
80  case _S_opcode_repeat:
81  __ostr << __id << " [label=\"" << __id << "\\nALT\"];\n"
82  << __id << " -> " << _M_next
83  << " [label=\"next\", tailport=\"s\"];\n"
84  << __id << " -> " << _M_alt
85  << " [label=\"alt\", tailport=\"n\"];\n";
86  break;
87  case _S_opcode_backref:
88  __ostr << __id << " [label=\"" << __id << "\\nBACKREF "
89  << _M_subexpr << "\"];\n"
90  << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
91  break;
92  case _S_opcode_line_begin_assertion:
93  __ostr << __id << " [label=\"" << __id << "\\nLINE_BEGIN \"];\n"
94  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
95  break;
96  case _S_opcode_line_end_assertion:
97  __ostr << __id << " [label=\"" << __id << "\\nLINE_END \"];\n"
98  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
99  break;
100  case _S_opcode_word_boundary:
101  __ostr << __id << " [label=\"" << __id << "\\nWORD_BOUNDRY "
102  << _M_neg << "\"];\n"
103  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
104  break;
105  case _S_opcode_subexpr_lookahead:
106  __ostr << __id << " [label=\"" << __id << "\\nLOOK_AHEAD\"];\n"
107  << __id << " -> " << _M_next
108  << " [label=\"epsilon\", tailport=\"s\"];\n"
109  << __id << " -> " << _M_alt
110  << " [label=\"<assert>\", tailport=\"n\"];\n";
111  break;
112  case _S_opcode_subexpr_begin:
113  __ostr << __id << " [label=\"" << __id << "\\nSBEGIN "
114  << _M_subexpr << "\"];\n"
115  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
116  break;
117  case _S_opcode_subexpr_end:
118  __ostr << __id << " [label=\"" << __id << "\\nSEND "
119  << _M_subexpr << "\"];\n"
120  << __id << " -> " << _M_next << " [label=\"epsilon\"];\n";
121  break;
122  case _S_opcode_dummy:
123  break;
124  case _S_opcode_match:
125  __ostr << __id << " [label=\"" << __id << "\\nMATCH\"];\n"
126  << __id << " -> " << _M_next << " [label=\"<match>\"];\n";
127  break;
128  case _S_opcode_accept:
129  __ostr << __id << " [label=\"" << __id << "\\nACC\"];\n" ;
130  break;
131  default:
132  _GLIBCXX_DEBUG_ASSERT(false);
133  break;
134  }
135  return __ostr;
136  }
137 
138  template<typename _TraitsT>
139  std::ostream&
140  _NFA<_TraitsT>::_M_dot(std::ostream& __ostr) const
141  {
142  __ostr << "digraph _Nfa {\n"
143  " rankdir=LR;\n";
144  for (size_t __i = 0; __i < this->size(); ++__i)
145  (*this)[__i]._M_dot(__ostr, __i);
146  __ostr << "}\n";
147  return __ostr;
148  }
149 #endif
150 
151  template<typename _TraitsT>
152  _StateIdT
153  _NFA<_TraitsT>::_M_insert_backref(size_t __index)
154  {
155  if (this->_M_flags & regex_constants::__polynomial)
156  __throw_regex_error(regex_constants::error_complexity,
157  "Unexpected back-reference in polynomial mode.");
158  // To figure out whether a backref is valid, a stack is used to store
159  // unfinished sub-expressions. For example, when parsing
160  // "(a(b)(c\\1(d)))" at '\\1', _M_subexpr_count is 3, indicating that 3
161  // sub expressions are parsed or partially parsed(in the stack), aka,
162  // "(a..", "(b)" and "(c..").
163  // _M_paren_stack is {1, 3}, for incomplete "(a.." and "(c..". At this
164  // time, "\\2" is valid, but "\\1" and "\\3" are not.
165  if (__index >= _M_subexpr_count)
166  __throw_regex_error(
168  "Back-reference index exceeds current sub-expression count.");
169  for (auto __it : this->_M_paren_stack)
170  if (__index == __it)
171  __throw_regex_error(
173  "Back-reference referred to an opened sub-expression.");
174  this->_M_has_backref = true;
175  _StateT __tmp(_S_opcode_backref);
176  __tmp._M_backref_index = __index;
177  return _M_insert_state(std::move(__tmp));
178  }
179 
180  template<typename _TraitsT>
181  void
182  _NFA<_TraitsT>::_M_eliminate_dummy()
183  {
184  for (auto& __it : *this)
185  {
186  while (__it._M_next >= 0 && (*this)[__it._M_next]._M_opcode()
187  == _S_opcode_dummy)
188  __it._M_next = (*this)[__it._M_next]._M_next;
189  if (__it._M_has_alt())
190  while (__it._M_alt >= 0 && (*this)[__it._M_alt]._M_opcode()
191  == _S_opcode_dummy)
192  __it._M_alt = (*this)[__it._M_alt]._M_next;
193  }
194  }
195 
196  // Just apply DFS on the sequence and re-link their links.
197  template<typename _TraitsT>
198  _StateSeq<_TraitsT>
199  _StateSeq<_TraitsT>::_M_clone()
200  {
201  _GLIBCXX_STD_C::map<_StateIdT, _StateIdT> __m;
203  __stack.push(_M_start);
204  while (!__stack.empty())
205  {
206  auto __u = __stack.top();
207  __stack.pop();
208  auto __dup = _M_nfa[__u];
209  // _M_insert_state() never return -1
210  auto __id = _M_nfa._M_insert_state(std::move(__dup));
211  __m[__u] = __id;
212  if (__dup._M_has_alt())
213  if (__dup._M_alt != _S_invalid_state_id
214  && __m.count(__dup._M_alt) == 0)
215  __stack.push(__dup._M_alt);
216  if (__u == _M_end)
217  continue;
218  if (__dup._M_next != _S_invalid_state_id
219  && __m.count(__dup._M_next) == 0)
220  __stack.push(__dup._M_next);
221  }
222  for (auto __it : __m)
223  {
224  auto __v = __it.second;
225  auto& __ref = _M_nfa[__v];
226  if (__ref._M_next != _S_invalid_state_id)
227  __ref._M_next = __m.find(__ref._M_next)->second;
228  if (__ref._M_has_alt() && __ref._M_alt != _S_invalid_state_id)
229  __ref._M_alt = __m.find(__ref._M_alt)->second;
230  }
231  return _StateSeq(_M_nfa, __m[_M_start], __m[_M_end]);
232  }
233 } // namespace __detail
234 
235 _GLIBCXX_END_NAMESPACE_VERSION
236 } // namespace
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
Definition: move.h:138
ISO C++ entities toplevel namespace is std.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:274
constexpr syntax_option_type __polynomial
constexpr error_type error_backref(_S_error_backref)
constexpr error_type error_complexity(_S_error_complexity)
A standard container giving FILO behavior.
Definition: stl_stack.h:108
void pop()
Removes first element.
Definition: stl_stack.h:331
void push(const value_type &__x)
Add data to the top of the stack.
Definition: stl_stack.h:286
bool empty() const
Definition: stl_stack.h:243
reference top()
Definition: stl_stack.h:258