libstdc++

regex_compiler.h

Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2010, 2011 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex_compiler.h
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00034 
00035 namespace __regex
00036 {
00037   struct _Scanner_base
00038   {
00039     typedef unsigned int _StateT;
00040 
00041     static constexpr _StateT _S_state_at_start    = 1 << 0;
00042     static constexpr _StateT _S_state_in_brace    = 1 << 2;
00043     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
00044   };
00045 
00046   //
00047   // @brief Scans an input range for regex tokens.
00048   //
00049   // The %_Scanner class interprets the regular expression pattern in the input
00050   // range passed to its constructor as a sequence of parse tokens passed to
00051   // the regular expression compiler.  The sequence of tokens provided depends
00052   // on the flag settings passed to the constructor:  different regular
00053   // expression grammars will interpret the same input pattern in
00054   // syntactically different ways.
00055   //
00056   template<typename _InputIterator>
00057     class _Scanner: public _Scanner_base
00058     {
00059     public:
00060       typedef _InputIterator                                        _IteratorT;
00061       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00062       typedef std::basic_string<_CharT>                             _StringT;
00063       typedef regex_constants::syntax_option_type                   _FlagT;
00064       typedef const std::ctype<_CharT>                              _CtypeT;
00065 
00066       // Token types returned from the scanner.
00067       enum _TokenT
00068       {
00069     _S_token_anychar,
00070     _S_token_backref,
00071     _S_token_bracket_begin,
00072     _S_token_bracket_end,
00073     _S_token_inverse_class,
00074     _S_token_char_class_name,
00075     _S_token_closure0,
00076     _S_token_closure1,
00077     _S_token_collelem_multi,
00078     _S_token_collelem_single,
00079     _S_token_collsymbol,
00080     _S_token_comma,
00081     _S_token_dash,
00082     _S_token_dup_count,
00083     _S_token_eof,
00084     _S_token_equiv_class_name,
00085     _S_token_interval_begin,
00086     _S_token_interval_end,
00087     _S_token_line_begin,
00088     _S_token_line_end,
00089     _S_token_opt,
00090     _S_token_or,
00091     _S_token_ord_char,
00092     _S_token_quoted_char,
00093     _S_token_subexpr_begin,
00094     _S_token_subexpr_end,
00095     _S_token_word_begin,
00096     _S_token_word_end,
00097     _S_token_unknown
00098       };
00099 
00100     public:
00101       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00102            std::locale __loc)
00103       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00104         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00105       { _M_advance(); }
00106 
00107       void
00108       _M_advance();
00109 
00110       _TokenT
00111       _M_token() const
00112       { return _M_curToken; }
00113 
00114       const _StringT&
00115       _M_value() const
00116       { return _M_curValue; }
00117 
00118 #ifdef _GLIBCXX_DEBUG
00119       std::ostream&
00120       _M_print(std::ostream&);
00121 #endif
00122 
00123     private:
00124       void
00125       _M_eat_escape();
00126 
00127       void
00128       _M_scan_in_brace();
00129 
00130       void
00131       _M_scan_in_bracket();
00132 
00133       void
00134       _M_eat_charclass();
00135 
00136       void
00137       _M_eat_equivclass();
00138 
00139       void
00140       _M_eat_collsymbol();
00141 
00142     private:
00143       _IteratorT  _M_current;
00144       _IteratorT  _M_end;
00145       _FlagT      _M_flags;
00146       _CtypeT&    _M_ctype;
00147       _TokenT     _M_curToken;
00148       _StringT    _M_curValue;
00149       _StateT     _M_state;
00150     };
00151 
00152   template<typename _InputIterator>
00153     void
00154     _Scanner<_InputIterator>::
00155     _M_advance()
00156     {
00157       if (_M_current == _M_end)
00158     {
00159       _M_curToken = _S_token_eof;
00160       return;
00161     }
00162 
00163       _CharT __c = *_M_current;
00164       if (_M_state & _S_state_in_bracket)
00165     {
00166       _M_scan_in_bracket();
00167       return;
00168     }
00169       if (_M_state & _S_state_in_brace)
00170     {
00171       _M_scan_in_brace();
00172       return;
00173     }
00174 #if 0
00175       // TODO: re-enable line anchors when _M_assertion is implemented.
00176       // See PR libstdc++/47724
00177       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00178     {
00179       _M_curToken = _S_token_line_begin;
00180       ++_M_current;
00181       return;
00182     }
00183       else if (__c == _M_ctype.widen('$'))
00184     {
00185       _M_curToken = _S_token_line_end;
00186       ++_M_current;
00187       return;
00188     }
00189 #endif
00190       else if (__c == _M_ctype.widen('.'))
00191     {
00192       _M_curToken = _S_token_anychar;
00193       ++_M_current;
00194       return;
00195     }
00196       else if (__c == _M_ctype.widen('*'))
00197     {
00198       _M_curToken = _S_token_closure0;
00199       ++_M_current;
00200       return;
00201     }
00202       else if (__c == _M_ctype.widen('+'))
00203     {
00204       _M_curToken = _S_token_closure1;
00205       ++_M_current;
00206       return;
00207     }
00208       else if (__c == _M_ctype.widen('|'))
00209     {
00210       _M_curToken = _S_token_or;
00211       ++_M_current;
00212       return;
00213     }
00214       else if (__c == _M_ctype.widen('['))
00215     {
00216       _M_curToken = _S_token_bracket_begin;
00217       _M_state |= (_S_state_in_bracket | _S_state_at_start);
00218       ++_M_current;
00219       return;
00220     }
00221       else if (__c == _M_ctype.widen('\\'))
00222     {
00223       _M_eat_escape();
00224       return;
00225     }
00226       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00227     {
00228       if (__c == _M_ctype.widen('('))
00229         {
00230           _M_curToken = _S_token_subexpr_begin;
00231           ++_M_current;
00232           return;
00233         }
00234       else if (__c == _M_ctype.widen(')'))
00235         {
00236           _M_curToken = _S_token_subexpr_end;
00237           ++_M_current;
00238           return;
00239         }
00240       else if (__c == _M_ctype.widen('{'))
00241         {
00242           _M_curToken = _S_token_interval_begin;
00243           _M_state |= _S_state_in_brace;
00244           ++_M_current;
00245           return;
00246         }
00247     }
00248 
00249       _M_curToken = _S_token_ord_char;
00250       _M_curValue.assign(1, __c);
00251       ++_M_current;
00252     }
00253 
00254 
00255   template<typename _InputIterator>
00256     void
00257     _Scanner<_InputIterator>::
00258     _M_scan_in_brace()
00259     {
00260       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00261     {
00262       _M_curToken = _S_token_dup_count;
00263       _M_curValue.assign(1, *_M_current);
00264       ++_M_current;
00265       while (_M_current != _M_end
00266          && _M_ctype.is(_CtypeT::digit, *_M_current))
00267         {
00268           _M_curValue += *_M_current;
00269           ++_M_current;
00270         }
00271       return;
00272     }
00273       else if (*_M_current == _M_ctype.widen(','))
00274     {
00275       _M_curToken = _S_token_comma;
00276       ++_M_current;
00277       return;
00278     }
00279       if (_M_flags & (regex_constants::basic | regex_constants::grep))
00280     {
00281       if (*_M_current == _M_ctype.widen('\\'))
00282         _M_eat_escape();
00283     }
00284       else 
00285     {
00286       if (*_M_current == _M_ctype.widen('}'))
00287         {
00288           _M_curToken = _S_token_interval_end;
00289           _M_state &= ~_S_state_in_brace;
00290           ++_M_current;
00291           return;
00292         }
00293     }
00294     }
00295 
00296   template<typename _InputIterator>
00297     void
00298     _Scanner<_InputIterator>::
00299     _M_scan_in_bracket()
00300     {
00301       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00302     {
00303       _M_curToken = _S_token_inverse_class;
00304       _M_state &= ~_S_state_at_start;
00305       ++_M_current;
00306       return;
00307     }
00308       else if (*_M_current == _M_ctype.widen('['))
00309     {
00310       ++_M_current;
00311       if (_M_current == _M_end)
00312         {
00313           _M_curToken = _S_token_eof;
00314           return;
00315         }
00316 
00317       if (*_M_current == _M_ctype.widen('.'))
00318         {
00319           _M_curToken = _S_token_collsymbol;
00320           _M_eat_collsymbol();
00321           return;
00322         }
00323       else if (*_M_current == _M_ctype.widen(':'))
00324         {
00325           _M_curToken = _S_token_char_class_name;
00326           _M_eat_charclass();
00327           return;
00328         }
00329       else if (*_M_current == _M_ctype.widen('='))
00330         {
00331           _M_curToken = _S_token_equiv_class_name;
00332           _M_eat_equivclass();
00333           return;
00334         }
00335     }
00336       else if (*_M_current == _M_ctype.widen('-'))
00337     {
00338       _M_curToken = _S_token_dash;
00339       ++_M_current;
00340       return;
00341     }
00342       else if (*_M_current == _M_ctype.widen(']'))
00343     {
00344       if (!(_M_flags & regex_constants::ECMAScript)
00345           || !(_M_state & _S_state_at_start))
00346         {
00347           // special case: only if  _not_ chr first after
00348           // '[' or '[^' and if not ECMAscript
00349           _M_curToken = _S_token_bracket_end;
00350           ++_M_current;
00351           return;
00352         }
00353     }
00354       _M_curToken = _S_token_collelem_single;
00355       _M_curValue.assign(1, *_M_current);
00356       ++_M_current;
00357     }
00358 
00359   template<typename _InputIterator>
00360     void
00361     _Scanner<_InputIterator>::
00362     _M_eat_escape()
00363     {
00364       ++_M_current;
00365       if (_M_current == _M_end)
00366     {
00367       _M_curToken = _S_token_eof;
00368       return;
00369     }
00370       _CharT __c = *_M_current;
00371       ++_M_current;
00372 
00373       if (__c == _M_ctype.widen('('))
00374     {
00375       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00376         {
00377           _M_curToken = _S_token_ord_char;
00378           _M_curValue.assign(1, __c);
00379         }
00380       else
00381         _M_curToken = _S_token_subexpr_begin;
00382     }
00383       else if (__c == _M_ctype.widen(')'))
00384     {
00385       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00386         {
00387           _M_curToken = _S_token_ord_char;
00388           _M_curValue.assign(1, __c);
00389         }
00390       else
00391         _M_curToken = _S_token_subexpr_end;
00392     }
00393       else if (__c == _M_ctype.widen('{'))
00394     {
00395       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00396         {
00397           _M_curToken = _S_token_ord_char;
00398           _M_curValue.assign(1, __c);
00399         }
00400       else
00401         {
00402           _M_curToken = _S_token_interval_begin;
00403           _M_state |= _S_state_in_brace;
00404         }
00405     }
00406       else if (__c == _M_ctype.widen('}'))
00407     {
00408       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00409         {
00410           _M_curToken = _S_token_ord_char;
00411           _M_curValue.assign(1, __c);
00412         }
00413       else
00414         {
00415           if (!(_M_state && _S_state_in_brace))
00416         __throw_regex_error(regex_constants::error_badbrace);
00417           _M_state &= ~_S_state_in_brace;
00418           _M_curToken = _S_token_interval_end;
00419         }
00420     }
00421       else if (__c == _M_ctype.widen('x'))
00422     {
00423       ++_M_current;
00424       if (_M_current == _M_end)
00425         {
00426           _M_curToken = _S_token_eof;
00427           return;
00428         }
00429       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00430         {
00431           _M_curValue.assign(1, *_M_current);
00432           ++_M_current;
00433           if (_M_current == _M_end)
00434         {
00435           _M_curToken = _S_token_eof;
00436           return;
00437         }
00438           if (_M_ctype.is(_CtypeT::digit, *_M_current))
00439         {
00440           _M_curValue += *_M_current;
00441           ++_M_current;
00442           return;
00443         }
00444         }
00445     }
00446       else if (__c == _M_ctype.widen('^')
00447            || __c == _M_ctype.widen('.')
00448            || __c == _M_ctype.widen('*')
00449            || __c == _M_ctype.widen('$')
00450            || __c == _M_ctype.widen('\\'))
00451     {
00452       _M_curToken = _S_token_ord_char;
00453       _M_curValue.assign(1, __c);
00454     }
00455       else if (_M_ctype.is(_CtypeT::digit, __c))
00456     {
00457       _M_curToken = _S_token_backref;
00458       _M_curValue.assign(1, __c);
00459     }
00460       else
00461     __throw_regex_error(regex_constants::error_escape);
00462     }
00463 
00464 
00465   // Eats a character class or throwns an exception.
00466   // current point to ':' delimiter on entry, char after ']' on return
00467   template<typename _InputIterator>
00468     void
00469     _Scanner<_InputIterator>::
00470     _M_eat_charclass()
00471     {
00472       ++_M_current; // skip ':'
00473       if (_M_current == _M_end)
00474     __throw_regex_error(regex_constants::error_ctype);
00475       for (_M_curValue.clear();
00476        _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00477        ++_M_current)
00478     _M_curValue += *_M_current;
00479       if (_M_current == _M_end)
00480     __throw_regex_error(regex_constants::error_ctype);
00481       ++_M_current; // skip ':'
00482       if (*_M_current != _M_ctype.widen(']'))
00483     __throw_regex_error(regex_constants::error_ctype);
00484       ++_M_current; // skip ']'
00485     }
00486 
00487 
00488   template<typename _InputIterator>
00489     void
00490     _Scanner<_InputIterator>::
00491     _M_eat_equivclass()
00492     {
00493       ++_M_current; // skip '='
00494       if (_M_current == _M_end)
00495     __throw_regex_error(regex_constants::error_collate);
00496       for (_M_curValue.clear();
00497        _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00498        ++_M_current)
00499     _M_curValue += *_M_current;
00500       if (_M_current == _M_end)
00501     __throw_regex_error(regex_constants::error_collate);
00502       ++_M_current; // skip '='
00503       if (*_M_current != _M_ctype.widen(']'))
00504     __throw_regex_error(regex_constants::error_collate);
00505       ++_M_current; // skip ']'
00506     }
00507 
00508 
00509   template<typename _InputIterator>
00510     void
00511     _Scanner<_InputIterator>::
00512     _M_eat_collsymbol()
00513     {
00514       ++_M_current; // skip '.'
00515       if (_M_current == _M_end)
00516     __throw_regex_error(regex_constants::error_collate);
00517       for (_M_curValue.clear();
00518        _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00519        ++_M_current)
00520     _M_curValue += *_M_current;
00521       if (_M_current == _M_end)
00522     __throw_regex_error(regex_constants::error_collate);
00523       ++_M_current; // skip '.'
00524       if (*_M_current != _M_ctype.widen(']'))
00525     __throw_regex_error(regex_constants::error_collate);
00526       ++_M_current; // skip ']'
00527     }
00528 
00529 #ifdef _GLIBCXX_DEBUG
00530   template<typename _InputIterator>
00531     std::ostream&
00532     _Scanner<_InputIterator>::
00533     _M_print(std::ostream& ostr)
00534     {
00535       switch (_M_curToken)
00536       {
00537     case _S_token_anychar:
00538       ostr << "any-character\n";
00539       break;
00540     case _S_token_backref:
00541       ostr << "backref\n";
00542       break;
00543     case _S_token_bracket_begin:
00544       ostr << "bracket-begin\n";
00545       break;
00546     case _S_token_bracket_end:
00547       ostr << "bracket-end\n";
00548       break;
00549     case _S_token_char_class_name:
00550       ostr << "char-class-name \"" << _M_curValue << "\"\n";
00551       break;
00552     case _S_token_closure0:
00553       ostr << "closure0\n";
00554       break;
00555     case _S_token_closure1:
00556       ostr << "closure1\n";
00557       break;
00558     case _S_token_collelem_multi:
00559       ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00560       break;
00561     case _S_token_collelem_single:
00562       ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00563       break;
00564     case _S_token_collsymbol:
00565       ostr << "collsymbol \"" << _M_curValue << "\"\n";
00566       break;
00567     case _S_token_comma:
00568       ostr << "comma\n";
00569       break;
00570     case _S_token_dash:
00571       ostr << "dash\n";
00572       break;
00573     case _S_token_dup_count:
00574       ostr << "dup count: " << _M_curValue << "\n";
00575       break;
00576     case _S_token_eof:
00577       ostr << "EOF\n";
00578       break;
00579     case _S_token_equiv_class_name:
00580       ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00581       break;
00582     case _S_token_interval_begin:
00583       ostr << "interval begin\n";
00584       break;
00585     case _S_token_interval_end:
00586       ostr << "interval end\n";
00587       break;
00588     case _S_token_line_begin:
00589       ostr << "line begin\n";
00590       break;
00591     case _S_token_line_end:
00592       ostr << "line end\n";
00593       break;
00594     case _S_token_opt:
00595       ostr << "opt\n";
00596       break;
00597     case _S_token_or:
00598       ostr << "or\n";
00599       break;
00600     case _S_token_ord_char:
00601       ostr << "ordinary character: \"" << _M_value() << "\"\n";
00602       break;
00603     case _S_token_quoted_char:
00604       ostr << "quoted char\n";
00605       break;
00606     case _S_token_subexpr_begin:
00607       ostr << "subexpr begin\n";
00608       break;
00609     case _S_token_subexpr_end:
00610       ostr << "subexpr end\n";
00611       break;
00612     case _S_token_word_begin:
00613       ostr << "word begin\n";
00614       break;
00615     case _S_token_word_end:
00616       ostr << "word end\n";
00617       break;
00618     case _S_token_unknown:
00619       ostr << "-- unknown token --\n";
00620       break;
00621       }
00622       return ostr;
00623     }
00624 #endif
00625 
00626   // Builds an NFA from an input iterator interval.
00627   template<typename _InIter, typename _TraitsT>
00628     class _Compiler
00629     {
00630     public:
00631       typedef _InIter                                            _IterT;
00632       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00633       typedef std::basic_string<_CharT>                          _StringT;
00634       typedef regex_constants::syntax_option_type                _FlagT;
00635 
00636     public:
00637       _Compiler(const _InIter& __b, const _InIter& __e,
00638         _TraitsT& __traits, _FlagT __flags);
00639 
00640       const _Nfa&
00641       _M_nfa() const
00642       { return _M_state_store; }
00643 
00644     private:
00645       typedef _Scanner<_InIter>                              _ScannerT;
00646       typedef typename _ScannerT::_TokenT                    _TokenT;
00647       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00648       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
00649 
00650       // accepts a specific token or returns false.
00651       bool
00652       _M_match_token(_TokenT __token);
00653 
00654       void
00655       _M_disjunction();
00656 
00657       bool
00658       _M_alternative();
00659 
00660       bool
00661       _M_term();
00662 
00663       bool
00664       _M_assertion();
00665 
00666       bool
00667       _M_quantifier();
00668 
00669       bool
00670       _M_atom();
00671 
00672       bool
00673       _M_bracket_expression();
00674 
00675       bool
00676       _M_bracket_list(_RMatcherT& __matcher);
00677 
00678       bool
00679       _M_follow_list(_RMatcherT& __matcher);
00680 
00681       bool
00682       _M_follow_list2(_RMatcherT& __matcher);
00683 
00684       bool
00685       _M_expression_term(_RMatcherT& __matcher);
00686 
00687       bool
00688       _M_range_expression(_RMatcherT& __matcher);
00689 
00690       bool
00691       _M_start_range(_RMatcherT& __matcher);
00692 
00693       bool
00694       _M_collating_symbol(_RMatcherT& __matcher);
00695 
00696       bool
00697       _M_equivalence_class(_RMatcherT& __matcher);
00698 
00699       bool
00700       _M_character_class(_RMatcherT& __matcher);
00701 
00702       int
00703       _M_cur_int_value(int __radix);
00704 
00705     private:
00706       _TraitsT&      _M_traits;
00707       _ScannerT      _M_scanner;
00708       _StringT       _M_cur_value;
00709       _Nfa           _M_state_store;
00710       _StackT        _M_stack;
00711     };
00712 
00713   template<typename _InIter, typename _TraitsT>
00714     _Compiler<_InIter, _TraitsT>::
00715     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00716           _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00717     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00718       _M_state_store(__flags)
00719     {
00720       typedef _StartTagger<_InIter, _TraitsT> _Start;
00721       typedef _EndTagger<_InIter, _TraitsT> _End;
00722 
00723       _StateSeq __r(_M_state_store,
00724                 _M_state_store._M_insert_subexpr_begin(_Start(0)));
00725       _M_disjunction();
00726       if (!_M_stack.empty())
00727     {
00728       __r._M_append(_M_stack.top());
00729       _M_stack.pop();
00730     }
00731       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
00732       __r._M_append(_M_state_store._M_insert_accept());
00733     }
00734 
00735   template<typename _InIter, typename _TraitsT>
00736     bool
00737     _Compiler<_InIter, _TraitsT>::
00738     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00739     { 
00740       if (token == _M_scanner._M_token())
00741     {
00742       _M_cur_value = _M_scanner._M_value();
00743       _M_scanner._M_advance();
00744       return true;
00745     }
00746       return false;
00747     }
00748 
00749   template<typename _InIter, typename _TraitsT>
00750     void
00751     _Compiler<_InIter, _TraitsT>::
00752     _M_disjunction()
00753     {
00754       this->_M_alternative();
00755       if (_M_match_token(_ScannerT::_S_token_or))
00756     {
00757       _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00758       this->_M_disjunction();
00759       _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00760       _M_stack.push(_StateSeq(__alt1, __alt2));
00761     }
00762     }
00763 
00764   template<typename _InIter, typename _TraitsT>
00765     bool
00766     _Compiler<_InIter, _TraitsT>::
00767     _M_alternative()
00768     {
00769       if (this->_M_term())
00770     {
00771       _StateSeq __re = _M_stack.top(); _M_stack.pop();
00772       this->_M_alternative();
00773       if (!_M_stack.empty())
00774         {
00775           __re._M_append(_M_stack.top());
00776           _M_stack.pop();
00777         }
00778       _M_stack.push(__re);
00779       return true;
00780     }
00781       return false;
00782     }
00783 
00784   template<typename _InIter, typename _TraitsT>
00785     bool
00786     _Compiler<_InIter, _TraitsT>::
00787     _M_term()
00788     {
00789       if (this->_M_assertion())
00790     return true;
00791       if (this->_M_atom())
00792     {
00793       this->_M_quantifier();
00794       return true;
00795     }
00796       return false;
00797     }
00798 
00799   template<typename _InIter, typename _TraitsT>
00800     bool
00801     _Compiler<_InIter, _TraitsT>::
00802     _M_assertion()
00803     {
00804       if (_M_match_token(_ScannerT::_S_token_line_begin))
00805     {
00806       // __m.push(_Matcher::_S_opcode_line_begin);
00807       return true;
00808     }
00809       if (_M_match_token(_ScannerT::_S_token_line_end))
00810     {
00811       // __m.push(_Matcher::_S_opcode_line_end);
00812       return true;
00813     }
00814       if (_M_match_token(_ScannerT::_S_token_word_begin))
00815     {
00816       // __m.push(_Matcher::_S_opcode_word_begin);
00817       return true;
00818     }
00819       if (_M_match_token(_ScannerT::_S_token_word_end))
00820     {
00821       // __m.push(_Matcher::_S_opcode_word_end);
00822       return true;
00823     }
00824       return false;
00825     }
00826 
00827   template<typename _InIter, typename _TraitsT>
00828     bool
00829     _Compiler<_InIter, _TraitsT>::
00830     _M_quantifier()
00831     {
00832       if (_M_match_token(_ScannerT::_S_token_closure0))
00833     {
00834       if (_M_stack.empty())
00835         __throw_regex_error(regex_constants::error_badrepeat);
00836       _StateSeq __r(_M_stack.top(), -1);
00837       __r._M_append(__r._M_front());
00838       _M_stack.pop();
00839       _M_stack.push(__r);
00840       return true;
00841     }
00842       if (_M_match_token(_ScannerT::_S_token_closure1))
00843     {
00844       if (_M_stack.empty())
00845         __throw_regex_error(regex_constants::error_badrepeat);
00846       _StateSeq __r(_M_state_store,
00847             _M_state_store.
00848             _M_insert_alt(_S_invalid_state_id,
00849                       _M_stack.top()._M_front()));
00850       _M_stack.top()._M_append(__r);
00851       return true;
00852     }
00853       if (_M_match_token(_ScannerT::_S_token_opt))
00854     {
00855       if (_M_stack.empty())
00856       __throw_regex_error(regex_constants::error_badrepeat);
00857       _StateSeq __r(_M_stack.top(), -1);
00858       _M_stack.pop();
00859       _M_stack.push(__r);
00860       return true;
00861     }
00862       if (_M_match_token(_ScannerT::_S_token_interval_begin))
00863     {
00864       if (_M_stack.empty())
00865         __throw_regex_error(regex_constants::error_badrepeat);
00866       if (!_M_match_token(_ScannerT::_S_token_dup_count))
00867         __throw_regex_error(regex_constants::error_badbrace);
00868       _StateSeq __r(_M_stack.top());
00869       int __min_rep = _M_cur_int_value(10);
00870       for (int __i = 1; __i < __min_rep; ++__i)
00871         _M_stack.top()._M_append(__r._M_clone()); 
00872       if (_M_match_token(_ScannerT::_S_token_comma))
00873         if (_M_match_token(_ScannerT::_S_token_dup_count))
00874           {
00875         int __n = _M_cur_int_value(10) - __min_rep;
00876         if (__n < 0)
00877           __throw_regex_error(regex_constants::error_badbrace);
00878         for (int __i = 0; __i < __n; ++__i)
00879           {
00880             _StateSeq __r(_M_state_store,
00881                   _M_state_store.
00882                   _M_insert_alt(_S_invalid_state_id,
00883                         _M_stack.top()._M_front()));
00884             _M_stack.top()._M_append(__r);
00885           }
00886           }
00887         else
00888           {
00889         _StateSeq __r(_M_stack.top(), -1);
00890         __r._M_push_back(__r._M_front());
00891         _M_stack.pop();
00892         _M_stack.push(__r);
00893           }
00894       if (!_M_match_token(_ScannerT::_S_token_interval_end))
00895         __throw_regex_error(regex_constants::error_brace);
00896       return true;
00897     }
00898       return false;
00899     }
00900 
00901   template<typename _InIter, typename _TraitsT>
00902     bool
00903     _Compiler<_InIter, _TraitsT>::
00904     _M_atom()
00905     {
00906       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00907       typedef _StartTagger<_InIter, _TraitsT> _Start;
00908       typedef _EndTagger<_InIter, _TraitsT> _End;
00909 
00910       if (_M_match_token(_ScannerT::_S_token_anychar))
00911     {
00912       _M_stack.push(_StateSeq(_M_state_store,
00913                                   _M_state_store._M_insert_matcher
00914                                   (_AnyMatcher)));
00915       return true;
00916     }
00917       if (_M_match_token(_ScannerT::_S_token_ord_char))
00918     {
00919       _M_stack.push(_StateSeq(_M_state_store,
00920                                   _M_state_store._M_insert_matcher
00921                                   (_CMatcher(_M_cur_value[0], _M_traits))));
00922       return true;
00923     }
00924       if (_M_match_token(_ScannerT::_S_token_quoted_char))
00925     {
00926       // note that in the ECMA grammar, this case covers backrefs.
00927       _M_stack.push(_StateSeq(_M_state_store,
00928                   _M_state_store._M_insert_matcher
00929                   (_CMatcher(_M_cur_value[0], _M_traits))));
00930       return true;
00931     }
00932       if (_M_match_token(_ScannerT::_S_token_backref))
00933     {
00934       // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
00935       return true;
00936     }
00937       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00938     {
00939       int __mark = _M_state_store._M_sub_count();
00940       _StateSeq __r(_M_state_store,
00941             _M_state_store.
00942             _M_insert_subexpr_begin(_Start(__mark)));
00943       this->_M_disjunction();
00944       if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00945         __throw_regex_error(regex_constants::error_paren);
00946       if (!_M_stack.empty())
00947         {
00948           __r._M_append(_M_stack.top());
00949           _M_stack.pop();
00950         }
00951       __r._M_append(_M_state_store._M_insert_subexpr_end
00952             (__mark, _End(__mark)));
00953       _M_stack.push(__r);
00954       return true;
00955     }
00956       return _M_bracket_expression();
00957     }
00958 
00959   template<typename _InIter, typename _TraitsT>
00960     bool
00961     _Compiler<_InIter, _TraitsT>::
00962     _M_bracket_expression()
00963     {
00964       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00965     {
00966       _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00967                    _M_traits);
00968       if (!_M_bracket_list(__matcher)
00969           || !_M_match_token(_ScannerT::_S_token_bracket_end))
00970         __throw_regex_error(regex_constants::error_brack);
00971       _M_stack.push(_StateSeq(_M_state_store,
00972                   _M_state_store._M_insert_matcher(__matcher)));
00973       return true;
00974     }
00975       return false;
00976     }
00977 
00978   // If the dash is the last character in the bracket expression, it is not
00979   // special.
00980   template<typename _InIter, typename _TraitsT>
00981     bool
00982     _Compiler<_InIter, _TraitsT>::
00983     _M_bracket_list(_RMatcherT& __matcher)
00984     {
00985       if (_M_follow_list(__matcher))
00986     {
00987       if (_M_match_token(_ScannerT::_S_token_dash))
00988         __matcher._M_add_char(_M_cur_value[0]);
00989       return true;
00990     }
00991       return false;
00992     }
00993 
00994   template<typename _InIter, typename _TraitsT>
00995     bool
00996     _Compiler<_InIter, _TraitsT>::
00997     _M_follow_list(_RMatcherT& __matcher)
00998     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
00999 
01000   template<typename _InIter, typename _TraitsT>
01001     bool
01002     _Compiler<_InIter, _TraitsT>::
01003     _M_follow_list2(_RMatcherT& __matcher)
01004     {
01005       if (_M_expression_term(__matcher))
01006     return _M_follow_list2(__matcher);
01007       return true;
01008     }
01009 
01010   template<typename _InIter, typename _TraitsT>
01011     bool
01012     _Compiler<_InIter, _TraitsT>::
01013     _M_expression_term(_RMatcherT& __matcher)
01014     {
01015       return (_M_collating_symbol(__matcher)
01016           || _M_character_class(__matcher)
01017           || _M_equivalence_class(__matcher)
01018           || (_M_start_range(__matcher)
01019           && _M_range_expression(__matcher)));
01020     }
01021 
01022   template<typename _InIter, typename _TraitsT>
01023     bool
01024     _Compiler<_InIter, _TraitsT>::
01025     _M_range_expression(_RMatcherT& __matcher)
01026     {
01027       if (!_M_collating_symbol(__matcher))
01028     if (!_M_match_token(_ScannerT::_S_token_dash))
01029       __throw_regex_error(regex_constants::error_range);
01030       __matcher._M_make_range();
01031       return true;
01032     }
01033 
01034   template<typename _InIter, typename _TraitsT>
01035     bool
01036     _Compiler<_InIter, _TraitsT>::
01037     _M_start_range(_RMatcherT& __matcher)
01038     { return _M_match_token(_ScannerT::_S_token_dash); }
01039 
01040   template<typename _InIter, typename _TraitsT>
01041     bool
01042     _Compiler<_InIter, _TraitsT>::
01043     _M_collating_symbol(_RMatcherT& __matcher)
01044     {
01045       if (_M_match_token(_ScannerT::_S_token_collelem_single))
01046     {
01047       __matcher._M_add_char(_M_cur_value[0]);
01048       return true;
01049     }
01050       if (_M_match_token(_ScannerT::_S_token_collsymbol))
01051     {
01052       __matcher._M_add_collating_element(_M_cur_value);
01053       return true;
01054     }
01055       return false;
01056     }
01057 
01058   template<typename _InIter, typename _TraitsT>
01059     bool
01060     _Compiler<_InIter, _TraitsT>::
01061     _M_equivalence_class(_RMatcherT& __matcher)
01062     {
01063       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01064     {
01065       __matcher._M_add_equivalence_class(_M_cur_value);
01066       return true;
01067     }
01068       return false;
01069     }
01070 
01071   template<typename _InIter, typename _TraitsT>
01072     bool
01073     _Compiler<_InIter, _TraitsT>::
01074     _M_character_class(_RMatcherT& __matcher)
01075     {
01076       if (_M_match_token(_ScannerT::_S_token_char_class_name))
01077     {
01078       __matcher._M_add_character_class(_M_cur_value);
01079       return true;
01080     }
01081       return false;
01082     }
01083 
01084   template<typename _InIter, typename _TraitsT>
01085     int
01086     _Compiler<_InIter, _TraitsT>::
01087     _M_cur_int_value(int __radix)
01088     {
01089       int __v = 0;
01090       for (typename _StringT::size_type __i = 0;
01091        __i < _M_cur_value.length(); ++__i)
01092     __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01093       return __v;
01094     }
01095 
01096   template<typename _InIter, typename _TraitsT>
01097     _AutomatonPtr
01098     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01099           regex_constants::syntax_option_type __f)
01100     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01101                                         __f)._M_nfa())); }
01102 
01103 } // namespace __regex
01104 
01105 _GLIBCXX_END_NAMESPACE_VERSION
01106 } // namespace
01107 
01108 /* vim: set ts=8 sw=2 sts=2: */