libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2010 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_compiler.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00034 00035 namespace __regex 00036 { 00037 struct _Scanner_base 00038 { 00039 // FIXME: replace these constanst with constexpr 00040 typedef unsigned int _StateT; 00041 00042 static const _StateT _S_state_at_start = 1 << 0; 00043 static const _StateT _S_state_in_brace = 1 << 2; 00044 static const _StateT _S_state_in_bracket = 1 << 3; 00045 }; 00046 00047 // 00048 // @brief Scans an input range for regex tokens. 00049 // 00050 // The %_Scanner class interprets the regular expression pattern in the input 00051 // range passed to its constructor as a sequence of parse tokens passed to 00052 // the regular expression compiler. The sequence of tokens provided depends 00053 // on the flag settings passed to the constructor: different regular 00054 // expression gramars will interpret the same input pattern in syntactically 00055 // different ways. 00056 // 00057 template<typename _InputIterator> 00058 class _Scanner: public _Scanner_base 00059 { 00060 public: 00061 typedef _InputIterator _IteratorT; 00062 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT; 00063 typedef std::basic_string<_CharT> _StringT; 00064 typedef regex_constants::syntax_option_type _FlagT; 00065 typedef const std::ctype<_CharT> _CtypeT; 00066 00067 // Token types returned from the scanner. 00068 enum _TokenT 00069 { 00070 _S_token_anychar, 00071 _S_token_backref, 00072 _S_token_bracket_begin, 00073 _S_token_bracket_end, 00074 _S_token_inverse_class, 00075 _S_token_char_class_name, 00076 _S_token_closure0, 00077 _S_token_closure1, 00078 _S_token_collelem_multi, 00079 _S_token_collelem_single, 00080 _S_token_collsymbol, 00081 _S_token_comma, 00082 _S_token_dash, 00083 _S_token_dup_count, 00084 _S_token_eof, 00085 _S_token_equiv_class_name, 00086 _S_token_interval_begin, 00087 _S_token_interval_end, 00088 _S_token_line_begin, 00089 _S_token_line_end, 00090 _S_token_opt, 00091 _S_token_or, 00092 _S_token_ord_char, 00093 _S_token_quoted_char, 00094 _S_token_subexpr_begin, 00095 _S_token_subexpr_end, 00096 _S_token_word_begin, 00097 _S_token_word_end, 00098 _S_token_unknown 00099 }; 00100 00101 public: 00102 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, 00103 std::locale __loc) 00104 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags), 00105 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start) 00106 { _M_advance(); } 00107 00108 void 00109 _M_advance(); 00110 00111 _TokenT 00112 _M_token() const 00113 { return _M_curToken; } 00114 00115 const _StringT& 00116 _M_value() const 00117 { return _M_curValue; } 00118 00119 #ifdef _GLIBCXX_DEBUG 00120 std::ostream& 00121 _M_print(std::ostream&); 00122 #endif 00123 00124 private: 00125 void 00126 _M_eat_escape(); 00127 00128 void 00129 _M_scan_in_brace(); 00130 00131 void 00132 _M_scan_in_bracket(); 00133 00134 void 00135 _M_eat_charclass(); 00136 00137 void 00138 _M_eat_equivclass(); 00139 00140 void 00141 _M_eat_collsymbol(); 00142 00143 private: 00144 _IteratorT _M_current; 00145 _IteratorT _M_end; 00146 _FlagT _M_flags; 00147 _CtypeT& _M_ctype; 00148 _TokenT _M_curToken; 00149 _StringT _M_curValue; 00150 _StateT _M_state; 00151 }; 00152 00153 template<typename _InputIterator> 00154 void 00155 _Scanner<_InputIterator>:: 00156 _M_advance() 00157 { 00158 if (_M_current == _M_end) 00159 { 00160 _M_curToken = _S_token_eof; 00161 return; 00162 } 00163 00164 _CharT __c = *_M_current; 00165 if (_M_state & _S_state_in_bracket) 00166 { 00167 _M_scan_in_bracket(); 00168 return; 00169 } 00170 if (_M_state & _S_state_in_brace) 00171 { 00172 _M_scan_in_brace(); 00173 return; 00174 } 00175 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^')) 00176 { 00177 _M_curToken = _S_token_line_begin; 00178 ++_M_current; 00179 return; 00180 } 00181 else if (__c == _M_ctype.widen('$')) 00182 { 00183 _M_curToken = _S_token_line_end; 00184 ++_M_current; 00185 return; 00186 } 00187 else if (__c == _M_ctype.widen('.')) 00188 { 00189 _M_curToken = _S_token_anychar; 00190 ++_M_current; 00191 return; 00192 } 00193 else if (__c == _M_ctype.widen('*')) 00194 { 00195 _M_curToken = _S_token_closure0; 00196 ++_M_current; 00197 return; 00198 } 00199 else if (__c == _M_ctype.widen('+')) 00200 { 00201 _M_curToken = _S_token_closure1; 00202 ++_M_current; 00203 return; 00204 } 00205 else if (__c == _M_ctype.widen('|')) 00206 { 00207 _M_curToken = _S_token_or; 00208 ++_M_current; 00209 return; 00210 } 00211 else if (__c == _M_ctype.widen('[')) 00212 { 00213 _M_curToken = _S_token_bracket_begin; 00214 _M_state |= (_S_state_in_bracket | _S_state_at_start); 00215 ++_M_current; 00216 return; 00217 } 00218 else if (__c == _M_ctype.widen('\\')) 00219 { 00220 _M_eat_escape(); 00221 return; 00222 } 00223 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00224 { 00225 if (__c == _M_ctype.widen('(')) 00226 { 00227 _M_curToken = _S_token_subexpr_begin; 00228 ++_M_current; 00229 return; 00230 } 00231 else if (__c == _M_ctype.widen(')')) 00232 { 00233 _M_curToken = _S_token_subexpr_end; 00234 ++_M_current; 00235 return; 00236 } 00237 else if (__c == _M_ctype.widen('{')) 00238 { 00239 _M_curToken = _S_token_interval_begin; 00240 _M_state |= _S_state_in_brace; 00241 ++_M_current; 00242 return; 00243 } 00244 } 00245 00246 _M_curToken = _S_token_ord_char; 00247 _M_curValue.assign(1, __c); 00248 ++_M_current; 00249 } 00250 00251 00252 template<typename _InputIterator> 00253 void 00254 _Scanner<_InputIterator>:: 00255 _M_scan_in_brace() 00256 { 00257 if (_M_ctype.is(_CtypeT::digit, *_M_current)) 00258 { 00259 _M_curToken = _S_token_dup_count; 00260 _M_curValue.assign(1, *_M_current); 00261 ++_M_current; 00262 while (_M_current != _M_end 00263 && _M_ctype.is(_CtypeT::digit, *_M_current)) 00264 { 00265 _M_curValue += *_M_current; 00266 ++_M_current; 00267 } 00268 return; 00269 } 00270 else if (*_M_current == _M_ctype.widen(',')) 00271 { 00272 _M_curToken = _S_token_comma; 00273 ++_M_current; 00274 return; 00275 } 00276 if (_M_flags & (regex_constants::basic | regex_constants::grep)) 00277 { 00278 if (*_M_current == _M_ctype.widen('\\')) 00279 _M_eat_escape(); 00280 } 00281 else 00282 { 00283 if (*_M_current == _M_ctype.widen('}')) 00284 { 00285 _M_curToken = _S_token_interval_end; 00286 _M_state &= ~_S_state_in_brace; 00287 ++_M_current; 00288 return; 00289 } 00290 } 00291 } 00292 00293 template<typename _InputIterator> 00294 void 00295 _Scanner<_InputIterator>:: 00296 _M_scan_in_bracket() 00297 { 00298 if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^')) 00299 { 00300 _M_curToken = _S_token_inverse_class; 00301 _M_state &= ~_S_state_at_start; 00302 ++_M_current; 00303 return; 00304 } 00305 else if (*_M_current == _M_ctype.widen('[')) 00306 { 00307 ++_M_current; 00308 if (_M_current == _M_end) 00309 { 00310 _M_curToken = _S_token_eof; 00311 return; 00312 } 00313 00314 if (*_M_current == _M_ctype.widen('.')) 00315 { 00316 _M_curToken = _S_token_collsymbol; 00317 _M_eat_collsymbol(); 00318 return; 00319 } 00320 else if (*_M_current == _M_ctype.widen(':')) 00321 { 00322 _M_curToken = _S_token_char_class_name; 00323 _M_eat_charclass(); 00324 return; 00325 } 00326 else if (*_M_current == _M_ctype.widen('=')) 00327 { 00328 _M_curToken = _S_token_equiv_class_name; 00329 _M_eat_equivclass(); 00330 return; 00331 } 00332 } 00333 else if (*_M_current == _M_ctype.widen('-')) 00334 { 00335 _M_curToken = _S_token_dash; 00336 ++_M_current; 00337 return; 00338 } 00339 else if (*_M_current == _M_ctype.widen(']')) 00340 { 00341 if (!(_M_flags & regex_constants::ECMAScript) 00342 || !(_M_state & _S_state_at_start)) 00343 { 00344 // special case: only if _not_ chr first after 00345 // '[' or '[^' and if not ECMAscript 00346 _M_curToken = _S_token_bracket_end; 00347 ++_M_current; 00348 return; 00349 } 00350 } 00351 _M_curToken = _S_token_collelem_single; 00352 _M_curValue.assign(1, *_M_current); 00353 ++_M_current; 00354 } 00355 00356 template<typename _InputIterator> 00357 void 00358 _Scanner<_InputIterator>:: 00359 _M_eat_escape() 00360 { 00361 ++_M_current; 00362 if (_M_current == _M_end) 00363 { 00364 _M_curToken = _S_token_eof; 00365 return; 00366 } 00367 _CharT __c = *_M_current; 00368 ++_M_current; 00369 00370 if (__c == _M_ctype.widen('(')) 00371 { 00372 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00373 { 00374 _M_curToken = _S_token_ord_char; 00375 _M_curValue.assign(1, __c); 00376 } 00377 else 00378 _M_curToken = _S_token_subexpr_begin; 00379 } 00380 else if (__c == _M_ctype.widen(')')) 00381 { 00382 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00383 { 00384 _M_curToken = _S_token_ord_char; 00385 _M_curValue.assign(1, __c); 00386 } 00387 else 00388 _M_curToken = _S_token_subexpr_end; 00389 } 00390 else if (__c == _M_ctype.widen('{')) 00391 { 00392 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00393 { 00394 _M_curToken = _S_token_ord_char; 00395 _M_curValue.assign(1, __c); 00396 } 00397 else 00398 { 00399 _M_curToken = _S_token_interval_begin; 00400 _M_state |= _S_state_in_brace; 00401 } 00402 } 00403 else if (__c == _M_ctype.widen('}')) 00404 { 00405 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00406 { 00407 _M_curToken = _S_token_ord_char; 00408 _M_curValue.assign(1, __c); 00409 } 00410 else 00411 { 00412 if (!(_M_state && _S_state_in_brace)) 00413 __throw_regex_error(regex_constants::error_badbrace); 00414 _M_state &= ~_S_state_in_brace; 00415 _M_curToken = _S_token_interval_end; 00416 } 00417 } 00418 else if (__c == _M_ctype.widen('x')) 00419 { 00420 ++_M_current; 00421 if (_M_current == _M_end) 00422 { 00423 _M_curToken = _S_token_eof; 00424 return; 00425 } 00426 if (_M_ctype.is(_CtypeT::digit, *_M_current)) 00427 { 00428 _M_curValue.assign(1, *_M_current); 00429 ++_M_current; 00430 if (_M_current == _M_end) 00431 { 00432 _M_curToken = _S_token_eof; 00433 return; 00434 } 00435 if (_M_ctype.is(_CtypeT::digit, *_M_current)) 00436 { 00437 _M_curValue += *_M_current; 00438 ++_M_current; 00439 return; 00440 } 00441 } 00442 } 00443 else if (__c == _M_ctype.widen('^') 00444 || __c == _M_ctype.widen('.') 00445 || __c == _M_ctype.widen('*') 00446 || __c == _M_ctype.widen('$') 00447 || __c == _M_ctype.widen('\\')) 00448 { 00449 _M_curToken = _S_token_ord_char; 00450 _M_curValue.assign(1, __c); 00451 } 00452 else if (_M_ctype.is(_CtypeT::digit, __c)) 00453 { 00454 _M_curToken = _S_token_backref; 00455 _M_curValue.assign(1, __c); 00456 } 00457 else 00458 __throw_regex_error(regex_constants::error_escape); 00459 } 00460 00461 00462 // Eats a character class or throwns an exception. 00463 // current point to ':' delimiter on entry, char after ']' on return 00464 template<typename _InputIterator> 00465 void 00466 _Scanner<_InputIterator>:: 00467 _M_eat_charclass() 00468 { 00469 ++_M_current; // skip ':' 00470 if (_M_current == _M_end) 00471 __throw_regex_error(regex_constants::error_ctype); 00472 for (_M_curValue.clear(); 00473 _M_current != _M_end && *_M_current != _M_ctype.widen(':'); 00474 ++_M_current) 00475 _M_curValue += *_M_current; 00476 if (_M_current == _M_end) 00477 __throw_regex_error(regex_constants::error_ctype); 00478 ++_M_current; // skip ':' 00479 if (*_M_current != _M_ctype.widen(']')) 00480 __throw_regex_error(regex_constants::error_ctype); 00481 ++_M_current; // skip ']' 00482 } 00483 00484 00485 template<typename _InputIterator> 00486 void 00487 _Scanner<_InputIterator>:: 00488 _M_eat_equivclass() 00489 { 00490 ++_M_current; // skip '=' 00491 if (_M_current == _M_end) 00492 __throw_regex_error(regex_constants::error_collate); 00493 for (_M_curValue.clear(); 00494 _M_current != _M_end && *_M_current != _M_ctype.widen('='); 00495 ++_M_current) 00496 _M_curValue += *_M_current; 00497 if (_M_current == _M_end) 00498 __throw_regex_error(regex_constants::error_collate); 00499 ++_M_current; // skip '=' 00500 if (*_M_current != _M_ctype.widen(']')) 00501 __throw_regex_error(regex_constants::error_collate); 00502 ++_M_current; // skip ']' 00503 } 00504 00505 00506 template<typename _InputIterator> 00507 void 00508 _Scanner<_InputIterator>:: 00509 _M_eat_collsymbol() 00510 { 00511 ++_M_current; // skip '.' 00512 if (_M_current == _M_end) 00513 __throw_regex_error(regex_constants::error_collate); 00514 for (_M_curValue.clear(); 00515 _M_current != _M_end && *_M_current != _M_ctype.widen('.'); 00516 ++_M_current) 00517 _M_curValue += *_M_current; 00518 if (_M_current == _M_end) 00519 __throw_regex_error(regex_constants::error_collate); 00520 ++_M_current; // skip '.' 00521 if (*_M_current != _M_ctype.widen(']')) 00522 __throw_regex_error(regex_constants::error_collate); 00523 ++_M_current; // skip ']' 00524 } 00525 00526 #ifdef _GLIBCXX_DEBUG 00527 template<typename _InputIterator> 00528 std::ostream& 00529 _Scanner<_InputIterator>:: 00530 _M_print(std::ostream& ostr) 00531 { 00532 switch (_M_curToken) 00533 { 00534 case _S_token_anychar: 00535 ostr << "any-character\n"; 00536 break; 00537 case _S_token_backref: 00538 ostr << "backref\n"; 00539 break; 00540 case _S_token_bracket_begin: 00541 ostr << "bracket-begin\n"; 00542 break; 00543 case _S_token_bracket_end: 00544 ostr << "bracket-end\n"; 00545 break; 00546 case _S_token_char_class_name: 00547 ostr << "char-class-name \"" << _M_curValue << "\"\n"; 00548 break; 00549 case _S_token_closure0: 00550 ostr << "closure0\n"; 00551 break; 00552 case _S_token_closure1: 00553 ostr << "closure1\n"; 00554 break; 00555 case _S_token_collelem_multi: 00556 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n"; 00557 break; 00558 case _S_token_collelem_single: 00559 ostr << "coll-elem-single \"" << _M_curValue << "\"\n"; 00560 break; 00561 case _S_token_collsymbol: 00562 ostr << "collsymbol \"" << _M_curValue << "\"\n"; 00563 break; 00564 case _S_token_comma: 00565 ostr << "comma\n"; 00566 break; 00567 case _S_token_dash: 00568 ostr << "dash\n"; 00569 break; 00570 case _S_token_dup_count: 00571 ostr << "dup count: " << _M_curValue << "\n"; 00572 break; 00573 case _S_token_eof: 00574 ostr << "EOF\n"; 00575 break; 00576 case _S_token_equiv_class_name: 00577 ostr << "equiv-class-name \"" << _M_curValue << "\"\n"; 00578 break; 00579 case _S_token_interval_begin: 00580 ostr << "interval begin\n"; 00581 break; 00582 case _S_token_interval_end: 00583 ostr << "interval end\n"; 00584 break; 00585 case _S_token_line_begin: 00586 ostr << "line begin\n"; 00587 break; 00588 case _S_token_line_end: 00589 ostr << "line end\n"; 00590 break; 00591 case _S_token_opt: 00592 ostr << "opt\n"; 00593 break; 00594 case _S_token_or: 00595 ostr << "or\n"; 00596 break; 00597 case _S_token_ord_char: 00598 ostr << "ordinary character: \"" << _M_value() << "\"\n"; 00599 break; 00600 case _S_token_quoted_char: 00601 ostr << "quoted char\n"; 00602 break; 00603 case _S_token_subexpr_begin: 00604 ostr << "subexpr begin\n"; 00605 break; 00606 case _S_token_subexpr_end: 00607 ostr << "subexpr end\n"; 00608 break; 00609 case _S_token_word_begin: 00610 ostr << "word begin\n"; 00611 break; 00612 case _S_token_word_end: 00613 ostr << "word end\n"; 00614 break; 00615 case _S_token_unknown: 00616 ostr << "-- unknown token --\n"; 00617 break; 00618 } 00619 return ostr; 00620 } 00621 #endif 00622 00623 // Builds an NFA from an input iterator interval. 00624 template<typename _InIter, typename _TraitsT> 00625 class _Compiler 00626 { 00627 public: 00628 typedef _InIter _IterT; 00629 typedef typename std::iterator_traits<_InIter>::value_type _CharT; 00630 typedef std::basic_string<_CharT> _StringT; 00631 typedef regex_constants::syntax_option_type _FlagT; 00632 00633 public: 00634 _Compiler(const _InIter& __b, const _InIter& __e, 00635 _TraitsT& __traits, _FlagT __flags); 00636 00637 const _Nfa& 00638 _M_nfa() const 00639 { return _M_state_store; } 00640 00641 private: 00642 typedef _Scanner<_InIter> _ScannerT; 00643 typedef typename _ScannerT::_TokenT _TokenT; 00644 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; 00645 typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT; 00646 00647 // accepts a specific token or returns false. 00648 bool 00649 _M_match_token(_TokenT __token); 00650 00651 void 00652 _M_disjunction(); 00653 00654 bool 00655 _M_alternative(); 00656 00657 bool 00658 _M_term(); 00659 00660 bool 00661 _M_assertion(); 00662 00663 bool 00664 _M_quantifier(); 00665 00666 bool 00667 _M_atom(); 00668 00669 bool 00670 _M_bracket_expression(); 00671 00672 bool 00673 _M_bracket_list(_RMatcherT& __matcher); 00674 00675 bool 00676 _M_follow_list(_RMatcherT& __matcher); 00677 00678 bool 00679 _M_follow_list2(_RMatcherT& __matcher); 00680 00681 bool 00682 _M_expression_term(_RMatcherT& __matcher); 00683 00684 bool 00685 _M_range_expression(_RMatcherT& __matcher); 00686 00687 bool 00688 _M_start_range(_RMatcherT& __matcher); 00689 00690 bool 00691 _M_collating_symbol(_RMatcherT& __matcher); 00692 00693 bool 00694 _M_equivalence_class(_RMatcherT& __matcher); 00695 00696 bool 00697 _M_character_class(_RMatcherT& __matcher); 00698 00699 int 00700 _M_cur_int_value(int __radix); 00701 00702 private: 00703 _TraitsT& _M_traits; 00704 _ScannerT _M_scanner; 00705 _StringT _M_cur_value; 00706 _Nfa _M_state_store; 00707 _StackT _M_stack; 00708 }; 00709 00710 template<typename _InIter, typename _TraitsT> 00711 _Compiler<_InIter, _TraitsT>:: 00712 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits, 00713 _Compiler<_InIter, _TraitsT>::_FlagT __flags) 00714 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), 00715 _M_state_store(__flags) 00716 { 00717 using std::bind; 00718 using std::placeholders::_1; 00719 using std::placeholders::_2; 00720 typedef _StartTagger<_InIter, _TraitsT> _Start; 00721 typedef _EndTagger<_InIter, _TraitsT> _End; 00722 00723 _StateSeq __r(_M_state_store, 00724 _M_state_store._M_insert_subexpr_begin( 00725 bind(_Start(0), _1, _2))); 00726 _M_disjunction(); 00727 if (!_M_stack.empty()) 00728 { 00729 __r._M_append(_M_stack.top()); 00730 _M_stack.pop(); 00731 } 00732 __r._M_append(_M_state_store. 00733 _M_insert_subexpr_end(0, bind(_End(0), _1, _2))); 00734 __r._M_append(_M_state_store._M_insert_accept()); 00735 } 00736 00737 template<typename _InIter, typename _TraitsT> 00738 bool 00739 _Compiler<_InIter, _TraitsT>:: 00740 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token) 00741 { 00742 if (token == _M_scanner._M_token()) 00743 { 00744 _M_cur_value = _M_scanner._M_value(); 00745 _M_scanner._M_advance(); 00746 return true; 00747 } 00748 return false; 00749 } 00750 00751 template<typename _InIter, typename _TraitsT> 00752 void 00753 _Compiler<_InIter, _TraitsT>:: 00754 _M_disjunction() 00755 { 00756 this->_M_alternative(); 00757 if (_M_match_token(_ScannerT::_S_token_or)) 00758 { 00759 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop(); 00760 this->_M_disjunction(); 00761 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop(); 00762 _M_stack.push(_StateSeq(__alt1, __alt2)); 00763 } 00764 } 00765 00766 template<typename _InIter, typename _TraitsT> 00767 bool 00768 _Compiler<_InIter, _TraitsT>:: 00769 _M_alternative() 00770 { 00771 if (this->_M_term()) 00772 { 00773 _StateSeq __re = _M_stack.top(); _M_stack.pop(); 00774 this->_M_alternative(); 00775 if (!_M_stack.empty()) 00776 { 00777 __re._M_append(_M_stack.top()); 00778 _M_stack.pop(); 00779 } 00780 _M_stack.push(__re); 00781 return true; 00782 } 00783 return false; 00784 } 00785 00786 template<typename _InIter, typename _TraitsT> 00787 bool 00788 _Compiler<_InIter, _TraitsT>:: 00789 _M_term() 00790 { 00791 if (this->_M_assertion()) 00792 return true; 00793 if (this->_M_atom()) 00794 { 00795 this->_M_quantifier(); 00796 return true; 00797 } 00798 return false; 00799 } 00800 00801 template<typename _InIter, typename _TraitsT> 00802 bool 00803 _Compiler<_InIter, _TraitsT>:: 00804 _M_assertion() 00805 { 00806 if (_M_match_token(_ScannerT::_S_token_line_begin)) 00807 { 00808 // __m.push(_Matcher::_S_opcode_line_begin); 00809 return true; 00810 } 00811 if (_M_match_token(_ScannerT::_S_token_line_end)) 00812 { 00813 // __m.push(_Matcher::_S_opcode_line_end); 00814 return true; 00815 } 00816 if (_M_match_token(_ScannerT::_S_token_word_begin)) 00817 { 00818 // __m.push(_Matcher::_S_opcode_word_begin); 00819 return true; 00820 } 00821 if (_M_match_token(_ScannerT::_S_token_word_end)) 00822 { 00823 // __m.push(_Matcher::_S_opcode_word_end); 00824 return true; 00825 } 00826 return false; 00827 } 00828 00829 template<typename _InIter, typename _TraitsT> 00830 bool 00831 _Compiler<_InIter, _TraitsT>:: 00832 _M_quantifier() 00833 { 00834 if (_M_match_token(_ScannerT::_S_token_closure0)) 00835 { 00836 if (_M_stack.empty()) 00837 __throw_regex_error(regex_constants::error_badrepeat); 00838 _StateSeq __r(_M_stack.top(), -1); 00839 __r._M_append(__r._M_front()); 00840 _M_stack.pop(); 00841 _M_stack.push(__r); 00842 return true; 00843 } 00844 if (_M_match_token(_ScannerT::_S_token_closure1)) 00845 { 00846 if (_M_stack.empty()) 00847 __throw_regex_error(regex_constants::error_badrepeat); 00848 _StateSeq __r(_M_state_store, 00849 _M_state_store. 00850 _M_insert_alt(_S_invalid_state_id, 00851 _M_stack.top()._M_front())); 00852 _M_stack.top()._M_append(__r); 00853 return true; 00854 } 00855 if (_M_match_token(_ScannerT::_S_token_opt)) 00856 { 00857 if (_M_stack.empty()) 00858 __throw_regex_error(regex_constants::error_badrepeat); 00859 _StateSeq __r(_M_stack.top(), -1); 00860 _M_stack.pop(); 00861 _M_stack.push(__r); 00862 return true; 00863 } 00864 if (_M_match_token(_ScannerT::_S_token_interval_begin)) 00865 { 00866 if (_M_stack.empty()) 00867 __throw_regex_error(regex_constants::error_badrepeat); 00868 if (!_M_match_token(_ScannerT::_S_token_dup_count)) 00869 __throw_regex_error(regex_constants::error_badbrace); 00870 _StateSeq __r(_M_stack.top()); 00871 int __min_rep = _M_cur_int_value(10); 00872 for (int __i = 1; __i < __min_rep; ++__i) 00873 _M_stack.top()._M_append(__r._M_clone()); 00874 if (_M_match_token(_ScannerT::_S_token_comma)) 00875 if (_M_match_token(_ScannerT::_S_token_dup_count)) 00876 { 00877 int __n = _M_cur_int_value(10) - __min_rep; 00878 if (__n < 0) 00879 __throw_regex_error(regex_constants::error_badbrace); 00880 for (int __i = 0; __i < __n; ++__i) 00881 { 00882 _StateSeq __r(_M_state_store, 00883 _M_state_store. 00884 _M_insert_alt(_S_invalid_state_id, 00885 _M_stack.top()._M_front())); 00886 _M_stack.top()._M_append(__r); 00887 } 00888 } 00889 else 00890 { 00891 _StateSeq __r(_M_stack.top(), -1); 00892 __r._M_push_back(__r._M_front()); 00893 _M_stack.pop(); 00894 _M_stack.push(__r); 00895 } 00896 if (!_M_match_token(_ScannerT::_S_token_interval_end)) 00897 __throw_regex_error(regex_constants::error_brace); 00898 return true; 00899 } 00900 return false; 00901 } 00902 00903 template<typename _InIter, typename _TraitsT> 00904 bool 00905 _Compiler<_InIter, _TraitsT>:: 00906 _M_atom() 00907 { 00908 using std::bind; 00909 using std::placeholders::_1; 00910 using std::placeholders::_2; 00911 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher; 00912 typedef _StartTagger<_InIter, _TraitsT> _Start; 00913 typedef _EndTagger<_InIter, _TraitsT> _End; 00914 00915 if (_M_match_token(_ScannerT::_S_token_anychar)) 00916 { 00917 _M_stack.push(_StateSeq(_M_state_store, 00918 _M_state_store. 00919 _M_insert_matcher(bind(_AnyMatcher, _1)))); 00920 return true; 00921 } 00922 if (_M_match_token(_ScannerT::_S_token_ord_char)) 00923 { 00924 _M_stack.push(_StateSeq 00925 (_M_state_store, _M_state_store. 00926 _M_insert_matcher 00927 (bind(_CMatcher(_M_cur_value[0], _M_traits), _1)))); 00928 return true; 00929 } 00930 if (_M_match_token(_ScannerT::_S_token_quoted_char)) 00931 { 00932 // note that in the ECMA grammar, this case covers backrefs. 00933 _M_stack.push(_StateSeq(_M_state_store, 00934 _M_state_store. 00935 _M_insert_matcher 00936 (bind(_CMatcher(_M_cur_value[0], _M_traits), 00937 _1)))); 00938 return true; 00939 } 00940 if (_M_match_token(_ScannerT::_S_token_backref)) 00941 { 00942 // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); 00943 return true; 00944 } 00945 if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) 00946 { 00947 int __mark = _M_state_store._M_sub_count(); 00948 _StateSeq __r(_M_state_store, 00949 _M_state_store. 00950 _M_insert_subexpr_begin(bind(_Start(__mark), _1, _2))); 00951 this->_M_disjunction(); 00952 if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) 00953 __throw_regex_error(regex_constants::error_paren); 00954 if (!_M_stack.empty()) 00955 { 00956 __r._M_append(_M_stack.top()); 00957 _M_stack.pop(); 00958 } 00959 __r._M_append(_M_state_store._M_insert_subexpr_end 00960 (__mark, bind(_End(__mark), _1, _2))); 00961 _M_stack.push(__r); 00962 return true; 00963 } 00964 return _M_bracket_expression(); 00965 } 00966 00967 template<typename _InIter, typename _TraitsT> 00968 bool 00969 _Compiler<_InIter, _TraitsT>:: 00970 _M_bracket_expression() 00971 { 00972 using std::bind; 00973 using std::placeholders::_1; 00974 if (_M_match_token(_ScannerT::_S_token_bracket_begin)) 00975 { 00976 _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin), 00977 _M_traits); 00978 if (!_M_bracket_list(__matcher) 00979 || !_M_match_token(_ScannerT::_S_token_bracket_end)) 00980 __throw_regex_error(regex_constants::error_brack); 00981 _M_stack.push(_StateSeq(_M_state_store, 00982 _M_state_store._M_insert_matcher 00983 (bind(__matcher, _1)))); 00984 return true; 00985 } 00986 return false; 00987 } 00988 00989 // If the dash is the last character in the bracket expression, it is not 00990 // special. 00991 template<typename _InIter, typename _TraitsT> 00992 bool 00993 _Compiler<_InIter, _TraitsT>:: 00994 _M_bracket_list(_RMatcherT& __matcher) 00995 { 00996 if (_M_follow_list(__matcher)) 00997 { 00998 if (_M_match_token(_ScannerT::_S_token_dash)) 00999 __matcher._M_add_char(_M_cur_value[0]); 01000 return true; 01001 } 01002 return false; 01003 } 01004 01005 template<typename _InIter, typename _TraitsT> 01006 bool 01007 _Compiler<_InIter, _TraitsT>:: 01008 _M_follow_list(_RMatcherT& __matcher) 01009 { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); } 01010 01011 template<typename _InIter, typename _TraitsT> 01012 bool 01013 _Compiler<_InIter, _TraitsT>:: 01014 _M_follow_list2(_RMatcherT& __matcher) 01015 { 01016 if (_M_expression_term(__matcher)) 01017 return _M_follow_list2(__matcher); 01018 return true; 01019 } 01020 01021 template<typename _InIter, typename _TraitsT> 01022 bool 01023 _Compiler<_InIter, _TraitsT>:: 01024 _M_expression_term(_RMatcherT& __matcher) 01025 { 01026 return (_M_collating_symbol(__matcher) 01027 || _M_character_class(__matcher) 01028 || _M_equivalence_class(__matcher) 01029 || (_M_start_range(__matcher) 01030 && _M_range_expression(__matcher))); 01031 } 01032 01033 template<typename _InIter, typename _TraitsT> 01034 bool 01035 _Compiler<_InIter, _TraitsT>:: 01036 _M_range_expression(_RMatcherT& __matcher) 01037 { 01038 if (!_M_collating_symbol(__matcher)) 01039 if (!_M_match_token(_ScannerT::_S_token_dash)) 01040 __throw_regex_error(regex_constants::error_range); 01041 __matcher._M_make_range(); 01042 return true; 01043 } 01044 01045 template<typename _InIter, typename _TraitsT> 01046 bool 01047 _Compiler<_InIter, _TraitsT>:: 01048 _M_start_range(_RMatcherT& __matcher) 01049 { return _M_match_token(_ScannerT::_S_token_dash); } 01050 01051 template<typename _InIter, typename _TraitsT> 01052 bool 01053 _Compiler<_InIter, _TraitsT>:: 01054 _M_collating_symbol(_RMatcherT& __matcher) 01055 { 01056 if (_M_match_token(_ScannerT::_S_token_collelem_single)) 01057 { 01058 __matcher._M_add_char(_M_cur_value[0]); 01059 return true; 01060 } 01061 if (_M_match_token(_ScannerT::_S_token_collsymbol)) 01062 { 01063 __matcher._M_add_collating_element(_M_cur_value); 01064 return true; 01065 } 01066 return false; 01067 } 01068 01069 template<typename _InIter, typename _TraitsT> 01070 bool 01071 _Compiler<_InIter, _TraitsT>:: 01072 _M_equivalence_class(_RMatcherT& __matcher) 01073 { 01074 if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) 01075 { 01076 __matcher._M_add_equivalence_class(_M_cur_value); 01077 return true; 01078 } 01079 return false; 01080 } 01081 01082 template<typename _InIter, typename _TraitsT> 01083 bool 01084 _Compiler<_InIter, _TraitsT>:: 01085 _M_character_class(_RMatcherT& __matcher) 01086 { 01087 if (_M_match_token(_ScannerT::_S_token_char_class_name)) 01088 { 01089 __matcher._M_add_character_class(_M_cur_value); 01090 return true; 01091 } 01092 return false; 01093 } 01094 01095 template<typename _InIter, typename _TraitsT> 01096 int 01097 _Compiler<_InIter, _TraitsT>:: 01098 _M_cur_int_value(int __radix) 01099 { 01100 int __v = 0; 01101 for (typename _StringT::size_type __i = 0; 01102 __i < _M_cur_value.length(); ++__i) 01103 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix); 01104 return __v; 01105 } 01106 01107 template<typename _InIter, typename _TraitsT> 01108 _AutomatonPtr 01109 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t, 01110 regex_constants::syntax_option_type __f) 01111 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t, 01112 __f)._M_nfa())); } 01113 01114 } // namespace __regex 01115 01116 _GLIBCXX_END_NAMESPACE_VERSION 01117 } // namespace 01118 01119 /* vim: set ts=8 sw=2 sts=2: */