libstdc++
regex.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2026 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37  /// @cond undocumented
38 
39  // Result of merging regex_match and regex_search.
40  //
41  // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42  // the other one if possible, for test purpose).
43  //
44  // That __match_mode is true means regex_match, else regex_search.
45  template<typename _BiIter, typename _Alloc,
46  typename _CharT, typename _TraitsT>
47  bool
48  __regex_algo_impl(_BiIter __s,
49  _BiIter __e,
50  match_results<_BiIter, _Alloc>& __m,
51  const basic_regex<_CharT, _TraitsT>& __re,
53  _RegexExecutorPolicy __policy,
54  bool __match_mode)
55  {
56  if (__re._M_automaton == nullptr)
57  return false;
58 
59  typename match_results<_BiIter, _Alloc>::_Unchecked& __res = __m;
60  __m._M_begin = __s;
61  __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63  bool __ret;
64  bool __use_dfs = true;
65  if ((__re.flags() & regex_constants::__polynomial)
66  || (__policy == _RegexExecutorPolicy::_S_alternate
67  && !__re._M_automaton->_M_has_backref))
68  __use_dfs = false;
69 
70  _Executor<_BiIter, _Alloc, _TraitsT>
71  __executor(__s, __e, __res, __re, __flags, __use_dfs);
72  if (__match_mode)
73  __ret = __executor._M_match();
74  else
75  __ret = __executor._M_search();
76 
77  if (__ret)
78  {
79  for (auto& __it : __res)
80  if (!__it.matched)
81  __it.first = __it.second = __e;
82  auto& __pre = __m._M_prefix();
83  auto& __suf = __m._M_suffix();
84  if (__match_mode)
85  {
86  __pre.matched = false;
87  __pre.first = __s;
88  __pre.second = __s;
89  __suf.matched = false;
90  __suf.first = __e;
91  __suf.second = __e;
92  }
93  else
94  {
95  __pre.first = __s;
96  __pre.second = __res[0].first;
97  __pre.matched = (__pre.first != __pre.second);
98  __suf.first = __res[0].second;
99  __suf.second = __e;
100  __suf.matched = (__suf.first != __suf.second);
101  }
102  }
103  else
104  {
105  __m._M_establish_failed_match(__e);
106  }
107  return __ret;
108  }
109 
110  inline void
111  __lookup_collatename(string& __name) noexcept
112  {
113  static const char* const __collatenames[] =
114  {
115  "NUL",
116  "SOH",
117  "STX",
118  "ETX",
119  "EOT",
120  "ENQ",
121  "ACK",
122  "alert",
123  "backspace",
124  "tab",
125  "newline",
126  "vertical-tab",
127  "form-feed",
128  "carriage-return",
129  "SO",
130  "SI",
131  "DLE",
132  "DC1",
133  "DC2",
134  "DC3",
135  "DC4",
136  "NAK",
137  "SYN",
138  "ETB",
139  "CAN",
140  "EM",
141  "SUB",
142  "ESC",
143  "IS4",
144  "IS3",
145  "IS2",
146  "IS1",
147  "space",
148  "exclamation-mark",
149  "quotation-mark",
150  "number-sign",
151  "dollar-sign",
152  "percent-sign",
153  "ampersand",
154  "apostrophe",
155  "left-parenthesis",
156  "right-parenthesis",
157  "asterisk",
158  "plus-sign",
159  "comma",
160  "hyphen",
161  "period",
162  "slash",
163  "zero",
164  "one",
165  "two",
166  "three",
167  "four",
168  "five",
169  "six",
170  "seven",
171  "eight",
172  "nine",
173  "colon",
174  "semicolon",
175  "less-than-sign",
176  "equals-sign",
177  "greater-than-sign",
178  "question-mark",
179  "commercial-at",
180  "A",
181  "B",
182  "C",
183  "D",
184  "E",
185  "F",
186  "G",
187  "H",
188  "I",
189  "J",
190  "K",
191  "L",
192  "M",
193  "N",
194  "O",
195  "P",
196  "Q",
197  "R",
198  "S",
199  "T",
200  "U",
201  "V",
202  "W",
203  "X",
204  "Y",
205  "Z",
206  "left-square-bracket",
207  "backslash",
208  "right-square-bracket",
209  "circumflex",
210  "underscore",
211  "grave-accent",
212  "a",
213  "b",
214  "c",
215  "d",
216  "e",
217  "f",
218  "g",
219  "h",
220  "i",
221  "j",
222  "k",
223  "l",
224  "m",
225  "n",
226  "o",
227  "p",
228  "q",
229  "r",
230  "s",
231  "t",
232  "u",
233  "v",
234  "w",
235  "x",
236  "y",
237  "z",
238  "left-curly-bracket",
239  "vertical-line",
240  "right-curly-bracket",
241  "tilde",
242  "DEL",
243  };
244 
245  for (const auto& __it : __collatenames)
246  if (__name == __it)
247  {
248  __name.assign(1, static_cast<char>(&__it - __collatenames));
249  return;
250  }
251 
252  __name.clear();
253  }
254 
255  /// @endcond
256 } // namespace __detail
257 
258 #pragma GCC diagnostic push
259 #pragma GCC diagnostic ignored "-Wc++17-extensions" // if constexpr
260 
261  template<typename _Ch_type>
262  template<typename _Fwd_iter>
263  typename regex_traits<_Ch_type>::string_type
265  lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
266  {
267  // TODO Add digraph support:
268  // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
269 
270  if constexpr (is_same<char_type, char>::value)
271  {
272  string __s(__first, __last);
273  __detail::__lookup_collatename(__s);
274  return __s;
275  }
276  else
277  {
278  typedef std::ctype<char_type> __ctype_type;
279  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
280 
281  string __s;
282  for (; __first != __last; ++__first)
283  __s += __fctyp.narrow(*__first, 0);
284  __detail::__lookup_collatename(__s);
285  if (__s.empty())
286  return string_type();
287  else
288  return string_type(1, __fctyp.widen(__s[0]));
289  }
290  }
291 
292  template<typename _Ch_type>
293  template<typename _Fwd_iter>
294  typename regex_traits<_Ch_type>::char_class_type
296  lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
297  {
298  if constexpr (__is_any_random_access_iter<_Fwd_iter>::value)
299  if ((__last - __first) > 6) [[__unlikely__]]
300  return {}; // "xdigit" is the longest classname
301 
302  typedef std::ctype<char_type> __ctype_type;
303  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
304 
305  auto __read_ch = [&]() -> char {
306  if (__first == __last)
307  return '\0';
308  char __c = __fctyp.narrow(__fctyp.tolower(*__first), 0);
309  ++__first;
310  return __c;
311  };
312 
313  auto __match = [&](const char* __s) -> bool {
314  do
315  if (__read_ch() != *__s)
316  return false;
317  while (*++__s);
318  return __first == __last;
319  };
320 
321  switch(__read_ch())
322  {
323  case 'a':
324  if (__read_ch() == 'l')
325  switch (__read_ch())
326  {
327  case 'n':
328  if (__match("um")) // "alnum"
329  return ctype_base::alnum;
330  break;
331  case 'p':
332  if (__match("ha")) // "alpha"
333  return ctype_base::alpha;
334  break;
335  }
336  break;
337  case 'b':
338  if (__match("lank")) // "blank"
339  return ctype_base::blank;
340  break;
341  case 'c':
342  if (__match("ntrl")) // "cntrl"
343  return ctype_base::cntrl;
344  break;
345  case 'd':
346  if (__first == __last || __match("igit")) // "d" or "digit"
347  return ctype_base::digit;
348  break;
349  case 'g':
350  if (__match("raph")) // "graph"
351  return ctype_base::graph;
352  break;
353  case 'l':
354  if (__match("ower")) // "lower"
355  return __icase ? ctype_base::alpha : ctype_base::lower;
356  break;
357  case 'p':
358  switch (__read_ch())
359  {
360  case 'r':
361  if (__match("int")) // "print"
362  return ctype_base::print;
363  break;
364  case 'u':
365  if (__match("nct")) // "punct"
366  return ctype_base::punct;
367  break;
368  }
369  break;
370  case 's':
371  if (__first == __last || __match("pace")) // "s" or "space"
372  return ctype_base::space;
373  break;
374  case 'u':
375  if (__match("pper")) // "upper"
376  return __icase ? ctype_base::alpha : ctype_base::upper;
377  break;
378  case 'w':
379  if (__first == __last) // "w"
380  return {ctype_base::alnum, char_class_type::_S_under};
381  break;
382  case 'x':
383  if (__match("digit")) // "xdigit"
384  return ctype_base::xdigit;
385  break;
386  }
387 
388  return {};
389  }
390 
391  template<typename _Ch_type>
392  bool
394  isctype(_Ch_type __c, char_class_type __f) const
395  {
396  typedef std::ctype<char_type> __ctype_type;
397  const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
398 
399  return __fctyp.is(__f._M_base, __c)
400  // [[:w:]]
401  || ((__f._M_extended & _RegexMask::_S_under)
402  && __c == __fctyp.widen('_'));
403  }
404 
405  template<typename _Ch_type>
406  int
408  value(_Ch_type __ch, int __radix) const
409  {
410  if constexpr (sizeof(_Ch_type) > 1)
411  {
412  const auto& __ctyp = std::use_facet<ctype<_Ch_type>>(_M_locale);
413  const char __c = __ctyp.narrow(__ch, '\0');
414  return regex_traits<char>{}.value(__c, __radix);
415  }
416  else
417  {
418  const char __c = static_cast<char>(__ch);
419  const char __max_digit = __radix == 8 ? '7' : '9';
420  if ('0' <= __c && __c <= __max_digit)
421  return __c - '0';
422  if (__radix < 16)
423  return -1;
424  switch (__c)
425  {
426  case 'a':
427  case 'A':
428  return 10;
429  case 'b':
430  case 'B':
431  return 11;
432  case 'c':
433  case 'C':
434  return 12;
435  case 'd':
436  case 'D':
437  return 13;
438  case 'e':
439  case 'E':
440  return 14;
441  case 'f':
442  case 'F':
443  return 15;
444  default:
445  return -1;
446  }
447  }
448  }
449 #pragma GCC diagnostic pop
450 
451  template<typename _Bi_iter, typename _Alloc>
452  template<typename _Out_iter>
453  _Out_iter
455  format(_Out_iter __out,
456  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
457  const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
458  match_flag_type __flags) const
459  {
460  __glibcxx_assert( ready() );
461  regex_traits<char_type> __traits;
462  typedef std::ctype<char_type> __ctype_type;
463  const __ctype_type&
464  __fctyp(use_facet<__ctype_type>(__traits.getloc()));
465 
466  auto __output = [&](size_t __idx)
467  {
468  auto& __sub = (*this)[__idx];
469  if (__sub.matched)
470  __out = std::copy(__sub.first, __sub.second, __out);
471  };
472 
473  if (__flags & regex_constants::format_sed)
474  {
475  bool __escaping = false;
476  for (; __fmt_first != __fmt_last; __fmt_first++)
477  {
478  if (__escaping)
479  {
480  __escaping = false;
481  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
482  __output(__traits.value(*__fmt_first, 10));
483  else
484  *__out++ = *__fmt_first;
485  continue;
486  }
487  if (*__fmt_first == '\\')
488  {
489  __escaping = true;
490  continue;
491  }
492  if (*__fmt_first == '&')
493  {
494  __output(0);
495  continue;
496  }
497  *__out++ = *__fmt_first;
498  }
499  if (__escaping)
500  *__out++ = '\\';
501  }
502  else
503  {
504  while (1)
505  {
506  auto __next = std::find(__fmt_first, __fmt_last, '$');
507  if (__next == __fmt_last)
508  break;
509 
510  __out = std::copy(__fmt_first, __next, __out);
511 
512  auto __eat = [&](char __ch) -> bool
513  {
514  if (*__next == __ch)
515  {
516  ++__next;
517  return true;
518  }
519  return false;
520  };
521 
522  if (++__next == __fmt_last)
523  *__out++ = '$';
524  else if (__eat('$'))
525  *__out++ = '$';
526  else if (__eat('&'))
527  __output(0);
528  else if (__eat('`'))
529  {
530  auto& __sub = _M_prefix();
531  if (__sub.matched)
532  __out = std::copy(__sub.first, __sub.second, __out);
533  }
534  else if (__eat('\''))
535  {
536  auto& __sub = _M_suffix();
537  if (__sub.matched)
538  __out = std::copy(__sub.first, __sub.second, __out);
539  }
540  else if (__fctyp.is(__ctype_type::digit, *__next))
541  {
542  long __num = __traits.value(*__next, 10);
543  if (++__next != __fmt_last
544  && __fctyp.is(__ctype_type::digit, *__next))
545  {
546  __num *= 10;
547  __num += __traits.value(*__next++, 10);
548  }
549  if (0 <= __num && size_t(__num) < this->size())
550  __output(__num);
551  }
552  else
553  *__out++ = '$';
554  __fmt_first = __next;
555  }
556  __out = std::copy(__fmt_first, __fmt_last, __out);
557  }
558  return __out;
559  }
560 
561  template<typename _Out_iter, typename _Bi_iter,
562  typename _Rx_traits, typename _Ch_type>
563  _Out_iter
564  __regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
565  const basic_regex<_Ch_type, _Rx_traits>& __e,
566  const _Ch_type* __fmt, size_t __len,
568  {
569  typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
570  _IterT __i(__first, __last, __e, __flags);
571  _IterT __end;
572  if (__i == __end)
573  {
574  if (!(__flags & regex_constants::format_no_copy))
575  __out = std::copy(__first, __last, __out);
576  }
577  else
578  {
579  sub_match<_Bi_iter> __last;
580  for (; __i != __end; ++__i)
581  {
582  if (!(__flags & regex_constants::format_no_copy))
583  __out = std::copy(__i->prefix().first, __i->prefix().second,
584  __out);
585  __out = __i->format(__out, __fmt, __fmt + __len, __flags);
586  __last = __i->suffix();
588  break;
589  }
590  if (!(__flags & regex_constants::format_no_copy))
591  __out = std::copy(__last.first, __last.second, __out);
592  }
593  return __out;
594  }
595 
596  template<typename _Bi_iter,
597  typename _Ch_type,
598  typename _Rx_traits>
599  bool
601  operator==(const regex_iterator& __rhs) const noexcept
602  {
603  if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
604  return true;
605  return _M_pregex == __rhs._M_pregex
606  && _M_begin == __rhs._M_begin
607  && _M_end == __rhs._M_end
608  && _M_flags == __rhs._M_flags
609  && _M_match[0] == __rhs._M_match[0];
610  }
611 
612  template<typename _Bi_iter,
613  typename _Ch_type,
614  typename _Rx_traits>
617  operator++()
618  {
619  // In all cases in which the call to regex_search returns true,
620  // match.prefix().first shall be equal to the previous value of
621  // match[0].second, and for each index i in the half-open range
622  // [0, match.size()) for which match[i].matched is true,
623  // match[i].position() shall return distance(begin, match[i].first).
624  // [28.12.1.4.5]
625  if (_M_match[0].matched)
626  {
627  auto __start = _M_match[0].second;
628  auto __prefix_first = _M_match[0].second;
629  if (_M_match[0].first == _M_match[0].second)
630  {
631  if (__start == _M_end)
632  {
633  _M_pregex = nullptr;
634  return *this;
635  }
636  else
637  {
638  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
639  _M_flags
642  {
643  __glibcxx_assert(_M_match[0].matched);
644  auto& __prefix = _M_match._M_prefix();
645  __prefix.first = __prefix_first;
646  __prefix.matched = __prefix.first != __prefix.second;
647  // [28.12.1.4.5]
648  _M_match._M_begin = _M_begin;
649  return *this;
650  }
651  else
652  ++__start;
653  }
654  }
656  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
657  {
658  __glibcxx_assert(_M_match[0].matched);
659  auto& __prefix = _M_match._M_prefix();
660  __prefix.first = __prefix_first;
661  __prefix.matched = __prefix.first != __prefix.second;
662  // [28.12.1.4.5]
663  _M_match._M_begin = _M_begin;
664  }
665  else
666  _M_pregex = nullptr;
667  }
668  return *this;
669  }
670 
671  template<typename _Bi_iter,
672  typename _Ch_type,
673  typename _Rx_traits>
676  operator=(const regex_token_iterator& __rhs)
677  {
678  _M_position = __rhs._M_position;
679  _M_subs = __rhs._M_subs;
680  _M_n = __rhs._M_n;
681  _M_suffix = __rhs._M_suffix;
682  _M_has_m1 = __rhs._M_has_m1;
683  _M_normalize_result();
684  return *this;
685  }
686 
687  template<typename _Bi_iter,
688  typename _Ch_type,
689  typename _Rx_traits>
690  bool
692  operator==(const regex_token_iterator& __rhs) const
693  {
694  if (_M_end_of_seq() && __rhs._M_end_of_seq())
695  return true;
696  if (_M_suffix.matched && __rhs._M_suffix.matched
697  && _M_suffix == __rhs._M_suffix)
698  return true;
699  if (_M_end_of_seq() || _M_suffix.matched
700  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
701  return false;
702  return _M_position == __rhs._M_position
703  && _M_n == __rhs._M_n
704  && _M_subs == __rhs._M_subs;
705  }
706 
707  template<typename _Bi_iter,
708  typename _Ch_type,
709  typename _Rx_traits>
712  operator++()
713  {
714  _Position __prev = _M_position;
715  if (_M_suffix.matched)
716  *this = regex_token_iterator();
717  else if (_M_n + 1 < _M_subs.size())
718  {
719  _M_n++;
720  _M_result = &_M_current_match();
721  }
722  else
723  {
724  _M_n = 0;
725  ++_M_position;
726  if (_M_position != _Position())
727  _M_result = &_M_current_match();
728  else if (_M_has_m1 && __prev->suffix().length() != 0)
729  {
730  _M_suffix.matched = true;
731  _M_suffix.first = __prev->suffix().first;
732  _M_suffix.second = __prev->suffix().second;
733  _M_result = &_M_suffix;
734  }
735  else
736  *this = regex_token_iterator();
737  }
738  return *this;
739  }
740 
741  template<typename _Bi_iter,
742  typename _Ch_type,
743  typename _Rx_traits>
744  void
746  _M_init(_Bi_iter __a, _Bi_iter __b)
747  {
748  _M_has_m1 = false;
749  for (auto __it : _M_subs)
750  if (__it == -1)
751  {
752  _M_has_m1 = true;
753  break;
754  }
755  if (_M_position != _Position())
756  _M_result = &_M_current_match();
757  else if (_M_has_m1)
758  {
759  _M_suffix.matched = true;
760  _M_suffix.first = __a;
761  _M_suffix.second = __b;
762  _M_result = &_M_suffix;
763  }
764  else
765  _M_result = nullptr;
766  }
767 
768 _GLIBCXX_END_NAMESPACE_VERSION
769 } // namespace
bool regex_search(_Bi_iter __s, _Bi_iter __e, match_results< _Bi_iter, _Alloc > &__m, const basic_regex< _Ch_type, _Rx_traits > &__re, regex_constants::match_flag_type __flags=regex_constants::match_default)
Definition: regex.h:2440
ISO C++ entities toplevel namespace is std.
constexpr auto size(const _Container &__cont) noexcept(noexcept(__cont.size())) -> decltype(__cont.size())
Return the size of a container.
Definition: range_access.h:274
constexpr syntax_option_type __polynomial
constexpr match_flag_type format_first_only
constexpr match_flag_type match_continuous
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
constexpr match_flag_type format_sed
constexpr match_flag_type match_not_null
constexpr match_flag_type format_no_copy
is_same
Definition: type_traits:1623
Managing sequences of characters and character-like objects.
Definition: cow_string.h:109
bool empty() const noexcept
Definition: cow_string.h:1116
Primary class template ctype facet.
_Out_iter format(_Out_iter __out, const char_type *__fmt_first, const char_type *__fmt_last, match_flag_type __flags=regex_constants::format_default) const
Describes aspects of a regular expression.
Definition: regex.h:100
int value(_Ch_type __ch, int __radix) const
Converts a digit to an int.
Definition: regex.tcc:408
string_type lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
Gets a collation element by name.
Definition: regex.tcc:265
locale_type getloc() const
Gets a copy of the current locale in use by the regex_traits object.
Definition: regex.h:411
bool isctype(_Ch_type __c, char_class_type __f) const
Determines if c is a member of an identified class.
Definition: regex.tcc:394
char_class_type lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase=false) const
Maps one or more characters to a named character classification.
Definition: regex.tcc:296
bool operator==(const regex_iterator &) const noexcept
Tests the equivalence of two regex iterators.
Definition: regex.tcc:601
regex_iterator & operator++()
Increments a regex_iterator.
Definition: regex.tcc:617
bool operator==(const regex_token_iterator &__rhs) const
Compares a regex_token_iterator to another for equality.
Definition: regex.tcc:692
regex_token_iterator & operator=(const regex_token_iterator &__rhs)
Assigns a regex_token_iterator to another.
Definition: regex.tcc:676
regex_token_iterator & operator++()
Increments a regex_token_iterator.
Definition: regex.tcc:712