libstdc++

regex_compiler.h

Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2010 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex_compiler.h
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00034 
00035 namespace __regex
00036 {
00037   struct _Scanner_base
00038   {
00039     // FIXME: replace these constanst with constexpr
00040     typedef unsigned int _StateT;
00041 
00042     static const _StateT _S_state_at_start    = 1 << 0;
00043     static const _StateT _S_state_in_brace    = 1 << 2;
00044     static const _StateT _S_state_in_bracket  = 1 << 3;
00045   };
00046 
00047   //
00048   // @brief Scans an input range for regex tokens.
00049   //
00050   // The %_Scanner class interprets the regular expression pattern in the input
00051   // range passed to its constructor as a sequence of parse tokens passed to
00052   // the regular expression compiler.  The sequence of tokens provided depends
00053   // on the flag settings passed to the constructor:  different regular
00054   // expression gramars will interpret the same input pattern in syntactically
00055   // different ways.
00056   //
00057   template<typename _InputIterator>
00058     class _Scanner: public _Scanner_base
00059     {
00060     public:
00061       typedef _InputIterator                                        _IteratorT;
00062       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00063       typedef std::basic_string<_CharT>                             _StringT;
00064       typedef regex_constants::syntax_option_type                   _FlagT;
00065       typedef const std::ctype<_CharT>                              _CtypeT;
00066 
00067       // Token types returned from the scanner.
00068       enum _TokenT
00069       {
00070     _S_token_anychar,
00071     _S_token_backref,
00072     _S_token_bracket_begin,
00073     _S_token_bracket_end,
00074     _S_token_inverse_class,
00075     _S_token_char_class_name,
00076     _S_token_closure0,
00077     _S_token_closure1,
00078     _S_token_collelem_multi,
00079     _S_token_collelem_single,
00080     _S_token_collsymbol,
00081     _S_token_comma,
00082     _S_token_dash,
00083     _S_token_dup_count,
00084     _S_token_eof,
00085     _S_token_equiv_class_name,
00086     _S_token_interval_begin,
00087     _S_token_interval_end,
00088     _S_token_line_begin,
00089     _S_token_line_end,
00090     _S_token_opt,
00091     _S_token_or,
00092     _S_token_ord_char,
00093     _S_token_quoted_char,
00094     _S_token_subexpr_begin,
00095     _S_token_subexpr_end,
00096     _S_token_word_begin,
00097     _S_token_word_end,
00098     _S_token_unknown
00099       };
00100 
00101     public:
00102       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00103            std::locale __loc)
00104       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00105         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00106       { _M_advance(); }
00107 
00108       void
00109       _M_advance();
00110 
00111       _TokenT
00112       _M_token() const
00113       { return _M_curToken; }
00114 
00115       const _StringT&
00116       _M_value() const
00117       { return _M_curValue; }
00118 
00119 #ifdef _GLIBCXX_DEBUG
00120       std::ostream&
00121       _M_print(std::ostream&);
00122 #endif
00123 
00124     private:
00125       void
00126       _M_eat_escape();
00127 
00128       void
00129       _M_scan_in_brace();
00130 
00131       void
00132       _M_scan_in_bracket();
00133 
00134       void
00135       _M_eat_charclass();
00136 
00137       void
00138       _M_eat_equivclass();
00139 
00140       void
00141       _M_eat_collsymbol();
00142 
00143     private:
00144       _IteratorT  _M_current;
00145       _IteratorT  _M_end;
00146       _FlagT      _M_flags;
00147       _CtypeT&    _M_ctype;
00148       _TokenT     _M_curToken;
00149       _StringT    _M_curValue;
00150       _StateT     _M_state;
00151     };
00152 
00153   template<typename _InputIterator>
00154     void
00155     _Scanner<_InputIterator>::
00156     _M_advance()
00157     {
00158       if (_M_current == _M_end)
00159     {
00160       _M_curToken = _S_token_eof;
00161       return;
00162     }
00163 
00164       _CharT __c = *_M_current;
00165       if (_M_state & _S_state_in_bracket)
00166     {
00167       _M_scan_in_bracket();
00168       return;
00169     }
00170       if (_M_state & _S_state_in_brace)
00171     {
00172       _M_scan_in_brace();
00173       return;
00174     }
00175       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00176     {
00177       _M_curToken = _S_token_line_begin;
00178       ++_M_current;
00179       return;
00180     }
00181       else if (__c == _M_ctype.widen('$'))
00182     {
00183       _M_curToken = _S_token_line_end;
00184       ++_M_current;
00185       return;
00186     }
00187       else if (__c == _M_ctype.widen('.'))
00188     {
00189       _M_curToken = _S_token_anychar;
00190       ++_M_current;
00191       return;
00192     }
00193       else if (__c == _M_ctype.widen('*'))
00194     {
00195       _M_curToken = _S_token_closure0;
00196       ++_M_current;
00197       return;
00198     }
00199       else if (__c == _M_ctype.widen('+'))
00200     {
00201       _M_curToken = _S_token_closure1;
00202       ++_M_current;
00203       return;
00204     }
00205       else if (__c == _M_ctype.widen('|'))
00206     {
00207       _M_curToken = _S_token_or;
00208       ++_M_current;
00209       return;
00210     }
00211       else if (__c == _M_ctype.widen('['))
00212     {
00213       _M_curToken = _S_token_bracket_begin;
00214       _M_state |= (_S_state_in_bracket | _S_state_at_start);
00215       ++_M_current;
00216       return;
00217     }
00218       else if (__c == _M_ctype.widen('\\'))
00219     {
00220       _M_eat_escape();
00221       return;
00222     }
00223       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00224     {
00225       if (__c == _M_ctype.widen('('))
00226         {
00227           _M_curToken = _S_token_subexpr_begin;
00228           ++_M_current;
00229           return;
00230         }
00231       else if (__c == _M_ctype.widen(')'))
00232         {
00233           _M_curToken = _S_token_subexpr_end;
00234           ++_M_current;
00235           return;
00236         }
00237       else if (__c == _M_ctype.widen('{'))
00238         {
00239           _M_curToken = _S_token_interval_begin;
00240           _M_state |= _S_state_in_brace;
00241           ++_M_current;
00242           return;
00243         }
00244     }
00245 
00246       _M_curToken = _S_token_ord_char;
00247       _M_curValue.assign(1, __c);
00248       ++_M_current;
00249     }
00250 
00251 
00252   template<typename _InputIterator>
00253     void
00254     _Scanner<_InputIterator>::
00255     _M_scan_in_brace()
00256     {
00257       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00258     {
00259       _M_curToken = _S_token_dup_count;
00260       _M_curValue.assign(1, *_M_current);
00261       ++_M_current;
00262       while (_M_current != _M_end
00263          && _M_ctype.is(_CtypeT::digit, *_M_current))
00264         {
00265           _M_curValue += *_M_current;
00266           ++_M_current;
00267         }
00268       return;
00269     }
00270       else if (*_M_current == _M_ctype.widen(','))
00271     {
00272       _M_curToken = _S_token_comma;
00273       ++_M_current;
00274       return;
00275     }
00276       if (_M_flags & (regex_constants::basic | regex_constants::grep))
00277     {
00278       if (*_M_current == _M_ctype.widen('\\'))
00279         _M_eat_escape();
00280     }
00281       else 
00282     {
00283       if (*_M_current == _M_ctype.widen('}'))
00284         {
00285           _M_curToken = _S_token_interval_end;
00286           _M_state &= ~_S_state_in_brace;
00287           ++_M_current;
00288           return;
00289         }
00290     }
00291     }
00292 
00293   template<typename _InputIterator>
00294     void
00295     _Scanner<_InputIterator>::
00296     _M_scan_in_bracket()
00297     {
00298       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00299     {
00300       _M_curToken = _S_token_inverse_class;
00301       _M_state &= ~_S_state_at_start;
00302       ++_M_current;
00303       return;
00304     }
00305       else if (*_M_current == _M_ctype.widen('['))
00306     {
00307       ++_M_current;
00308       if (_M_current == _M_end)
00309         {
00310           _M_curToken = _S_token_eof;
00311           return;
00312         }
00313 
00314       if (*_M_current == _M_ctype.widen('.'))
00315         {
00316           _M_curToken = _S_token_collsymbol;
00317           _M_eat_collsymbol();
00318           return;
00319         }
00320       else if (*_M_current == _M_ctype.widen(':'))
00321         {
00322           _M_curToken = _S_token_char_class_name;
00323           _M_eat_charclass();
00324           return;
00325         }
00326       else if (*_M_current == _M_ctype.widen('='))
00327         {
00328           _M_curToken = _S_token_equiv_class_name;
00329           _M_eat_equivclass();
00330           return;
00331         }
00332     }
00333       else if (*_M_current == _M_ctype.widen('-'))
00334     {
00335       _M_curToken = _S_token_dash;
00336       ++_M_current;
00337       return;
00338     }
00339       else if (*_M_current == _M_ctype.widen(']'))
00340     {
00341       if (!(_M_flags & regex_constants::ECMAScript)
00342           || !(_M_state & _S_state_at_start))
00343         {
00344           // special case: only if  _not_ chr first after
00345           // '[' or '[^' and if not ECMAscript
00346           _M_curToken = _S_token_bracket_end;
00347           ++_M_current;
00348           return;
00349         }
00350     }
00351       _M_curToken = _S_token_collelem_single;
00352       _M_curValue.assign(1, *_M_current);
00353       ++_M_current;
00354     }
00355 
00356   template<typename _InputIterator>
00357     void
00358     _Scanner<_InputIterator>::
00359     _M_eat_escape()
00360     {
00361       ++_M_current;
00362       if (_M_current == _M_end)
00363     {
00364       _M_curToken = _S_token_eof;
00365       return;
00366     }
00367       _CharT __c = *_M_current;
00368       ++_M_current;
00369 
00370       if (__c == _M_ctype.widen('('))
00371     {
00372       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00373         {
00374           _M_curToken = _S_token_ord_char;
00375           _M_curValue.assign(1, __c);
00376         }
00377       else
00378         _M_curToken = _S_token_subexpr_begin;
00379     }
00380       else if (__c == _M_ctype.widen(')'))
00381     {
00382       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00383         {
00384           _M_curToken = _S_token_ord_char;
00385           _M_curValue.assign(1, __c);
00386         }
00387       else
00388         _M_curToken = _S_token_subexpr_end;
00389     }
00390       else if (__c == _M_ctype.widen('{'))
00391     {
00392       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00393         {
00394           _M_curToken = _S_token_ord_char;
00395           _M_curValue.assign(1, __c);
00396         }
00397       else
00398         {
00399           _M_curToken = _S_token_interval_begin;
00400           _M_state |= _S_state_in_brace;
00401         }
00402     }
00403       else if (__c == _M_ctype.widen('}'))
00404     {
00405       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00406         {
00407           _M_curToken = _S_token_ord_char;
00408           _M_curValue.assign(1, __c);
00409         }
00410       else
00411         {
00412           if (!(_M_state && _S_state_in_brace))
00413         __throw_regex_error(regex_constants::error_badbrace);
00414           _M_state &= ~_S_state_in_brace;
00415           _M_curToken = _S_token_interval_end;
00416         }
00417     }
00418       else if (__c == _M_ctype.widen('x'))
00419     {
00420       ++_M_current;
00421       if (_M_current == _M_end)
00422         {
00423           _M_curToken = _S_token_eof;
00424           return;
00425         }
00426       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00427         {
00428           _M_curValue.assign(1, *_M_current);
00429           ++_M_current;
00430           if (_M_current == _M_end)
00431         {
00432           _M_curToken = _S_token_eof;
00433           return;
00434         }
00435           if (_M_ctype.is(_CtypeT::digit, *_M_current))
00436         {
00437           _M_curValue += *_M_current;
00438           ++_M_current;
00439           return;
00440         }
00441         }
00442     }
00443       else if (__c == _M_ctype.widen('^')
00444            || __c == _M_ctype.widen('.')
00445            || __c == _M_ctype.widen('*')
00446            || __c == _M_ctype.widen('$')
00447            || __c == _M_ctype.widen('\\'))
00448     {
00449       _M_curToken = _S_token_ord_char;
00450       _M_curValue.assign(1, __c);
00451     }
00452       else if (_M_ctype.is(_CtypeT::digit, __c))
00453     {
00454       _M_curToken = _S_token_backref;
00455       _M_curValue.assign(1, __c);
00456     }
00457       else
00458     __throw_regex_error(regex_constants::error_escape);
00459     }
00460 
00461 
00462   // Eats a character class or throwns an exception.
00463   // current point to ':' delimiter on entry, char after ']' on return
00464   template<typename _InputIterator>
00465     void
00466     _Scanner<_InputIterator>::
00467     _M_eat_charclass()
00468     {
00469       ++_M_current; // skip ':'
00470       if (_M_current == _M_end)
00471     __throw_regex_error(regex_constants::error_ctype);
00472       for (_M_curValue.clear();
00473        _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00474        ++_M_current)
00475     _M_curValue += *_M_current;
00476       if (_M_current == _M_end)
00477     __throw_regex_error(regex_constants::error_ctype);
00478       ++_M_current; // skip ':'
00479       if (*_M_current != _M_ctype.widen(']'))
00480     __throw_regex_error(regex_constants::error_ctype);
00481       ++_M_current; // skip ']'
00482     }
00483 
00484 
00485   template<typename _InputIterator>
00486     void
00487     _Scanner<_InputIterator>::
00488     _M_eat_equivclass()
00489     {
00490       ++_M_current; // skip '='
00491       if (_M_current == _M_end)
00492     __throw_regex_error(regex_constants::error_collate);
00493       for (_M_curValue.clear();
00494        _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00495        ++_M_current)
00496     _M_curValue += *_M_current;
00497       if (_M_current == _M_end)
00498     __throw_regex_error(regex_constants::error_collate);
00499       ++_M_current; // skip '='
00500       if (*_M_current != _M_ctype.widen(']'))
00501     __throw_regex_error(regex_constants::error_collate);
00502       ++_M_current; // skip ']'
00503     }
00504 
00505 
00506   template<typename _InputIterator>
00507     void
00508     _Scanner<_InputIterator>::
00509     _M_eat_collsymbol()
00510     {
00511       ++_M_current; // skip '.'
00512       if (_M_current == _M_end)
00513     __throw_regex_error(regex_constants::error_collate);
00514       for (_M_curValue.clear();
00515        _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00516        ++_M_current)
00517     _M_curValue += *_M_current;
00518       if (_M_current == _M_end)
00519     __throw_regex_error(regex_constants::error_collate);
00520       ++_M_current; // skip '.'
00521       if (*_M_current != _M_ctype.widen(']'))
00522     __throw_regex_error(regex_constants::error_collate);
00523       ++_M_current; // skip ']'
00524     }
00525 
00526 #ifdef _GLIBCXX_DEBUG
00527   template<typename _InputIterator>
00528     std::ostream&
00529     _Scanner<_InputIterator>::
00530     _M_print(std::ostream& ostr)
00531     {
00532       switch (_M_curToken)
00533       {
00534     case _S_token_anychar:
00535       ostr << "any-character\n";
00536       break;
00537     case _S_token_backref:
00538       ostr << "backref\n";
00539       break;
00540     case _S_token_bracket_begin:
00541       ostr << "bracket-begin\n";
00542       break;
00543     case _S_token_bracket_end:
00544       ostr << "bracket-end\n";
00545       break;
00546     case _S_token_char_class_name:
00547       ostr << "char-class-name \"" << _M_curValue << "\"\n";
00548       break;
00549     case _S_token_closure0:
00550       ostr << "closure0\n";
00551       break;
00552     case _S_token_closure1:
00553       ostr << "closure1\n";
00554       break;
00555     case _S_token_collelem_multi:
00556       ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00557       break;
00558     case _S_token_collelem_single:
00559       ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00560       break;
00561     case _S_token_collsymbol:
00562       ostr << "collsymbol \"" << _M_curValue << "\"\n";
00563       break;
00564     case _S_token_comma:
00565       ostr << "comma\n";
00566       break;
00567     case _S_token_dash:
00568       ostr << "dash\n";
00569       break;
00570     case _S_token_dup_count:
00571       ostr << "dup count: " << _M_curValue << "\n";
00572       break;
00573     case _S_token_eof:
00574       ostr << "EOF\n";
00575       break;
00576     case _S_token_equiv_class_name:
00577       ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00578       break;
00579     case _S_token_interval_begin:
00580       ostr << "interval begin\n";
00581       break;
00582     case _S_token_interval_end:
00583       ostr << "interval end\n";
00584       break;
00585     case _S_token_line_begin:
00586       ostr << "line begin\n";
00587       break;
00588     case _S_token_line_end:
00589       ostr << "line end\n";
00590       break;
00591     case _S_token_opt:
00592       ostr << "opt\n";
00593       break;
00594     case _S_token_or:
00595       ostr << "or\n";
00596       break;
00597     case _S_token_ord_char:
00598       ostr << "ordinary character: \"" << _M_value() << "\"\n";
00599       break;
00600     case _S_token_quoted_char:
00601       ostr << "quoted char\n";
00602       break;
00603     case _S_token_subexpr_begin:
00604       ostr << "subexpr begin\n";
00605       break;
00606     case _S_token_subexpr_end:
00607       ostr << "subexpr end\n";
00608       break;
00609     case _S_token_word_begin:
00610       ostr << "word begin\n";
00611       break;
00612     case _S_token_word_end:
00613       ostr << "word end\n";
00614       break;
00615     case _S_token_unknown:
00616       ostr << "-- unknown token --\n";
00617       break;
00618       }
00619       return ostr;
00620     }
00621 #endif
00622 
00623   // Builds an NFA from an input iterator interval.
00624   template<typename _InIter, typename _TraitsT>
00625     class _Compiler
00626     {
00627     public:
00628       typedef _InIter                                            _IterT;
00629       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00630       typedef std::basic_string<_CharT>                          _StringT;
00631       typedef regex_constants::syntax_option_type                _FlagT;
00632 
00633     public:
00634       _Compiler(const _InIter& __b, const _InIter& __e,
00635         _TraitsT& __traits, _FlagT __flags);
00636 
00637       const _Nfa&
00638       _M_nfa() const
00639       { return _M_state_store; }
00640 
00641     private:
00642       typedef _Scanner<_InIter>                              _ScannerT;
00643       typedef typename _ScannerT::_TokenT                    _TokenT;
00644       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00645       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
00646 
00647       // accepts a specific token or returns false.
00648       bool
00649       _M_match_token(_TokenT __token);
00650 
00651       void
00652       _M_disjunction();
00653 
00654       bool
00655       _M_alternative();
00656 
00657       bool
00658       _M_term();
00659 
00660       bool
00661       _M_assertion();
00662 
00663       bool
00664       _M_quantifier();
00665 
00666       bool
00667       _M_atom();
00668 
00669       bool
00670       _M_bracket_expression();
00671 
00672       bool
00673       _M_bracket_list(_RMatcherT& __matcher);
00674 
00675       bool
00676       _M_follow_list(_RMatcherT& __matcher);
00677 
00678       bool
00679       _M_follow_list2(_RMatcherT& __matcher);
00680 
00681       bool
00682       _M_expression_term(_RMatcherT& __matcher);
00683 
00684       bool
00685       _M_range_expression(_RMatcherT& __matcher);
00686 
00687       bool
00688       _M_start_range(_RMatcherT& __matcher);
00689 
00690       bool
00691       _M_collating_symbol(_RMatcherT& __matcher);
00692 
00693       bool
00694       _M_equivalence_class(_RMatcherT& __matcher);
00695 
00696       bool
00697       _M_character_class(_RMatcherT& __matcher);
00698 
00699       int
00700       _M_cur_int_value(int __radix);
00701 
00702     private:
00703       _TraitsT&      _M_traits;
00704       _ScannerT      _M_scanner;
00705       _StringT       _M_cur_value;
00706       _Nfa           _M_state_store;
00707       _StackT        _M_stack;
00708     };
00709 
00710   template<typename _InIter, typename _TraitsT>
00711     _Compiler<_InIter, _TraitsT>::
00712     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00713           _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00714     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00715       _M_state_store(__flags)
00716     {
00717       using std::bind;
00718       using std::placeholders::_1;
00719       using std::placeholders::_2;
00720       typedef _StartTagger<_InIter, _TraitsT> _Start;
00721       typedef _EndTagger<_InIter, _TraitsT> _End;
00722 
00723       _StateSeq __r(_M_state_store,
00724                 _M_state_store._M_insert_subexpr_begin(
00725                         bind(_Start(0), _1, _2)));
00726       _M_disjunction();
00727       if (!_M_stack.empty())
00728     {
00729       __r._M_append(_M_stack.top());
00730       _M_stack.pop();
00731     }
00732       __r._M_append(_M_state_store.
00733             _M_insert_subexpr_end(0, bind(_End(0), _1, _2)));
00734       __r._M_append(_M_state_store._M_insert_accept());
00735     }
00736 
00737   template<typename _InIter, typename _TraitsT>
00738     bool
00739     _Compiler<_InIter, _TraitsT>::
00740     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00741     { 
00742       if (token == _M_scanner._M_token())
00743     {
00744       _M_cur_value = _M_scanner._M_value();
00745       _M_scanner._M_advance();
00746       return true;
00747     }
00748       return false;
00749     }
00750 
00751   template<typename _InIter, typename _TraitsT>
00752     void
00753     _Compiler<_InIter, _TraitsT>::
00754     _M_disjunction()
00755     {
00756       this->_M_alternative();
00757       if (_M_match_token(_ScannerT::_S_token_or))
00758     {
00759       _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00760       this->_M_disjunction();
00761       _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00762       _M_stack.push(_StateSeq(__alt1, __alt2));
00763     }
00764     }
00765 
00766   template<typename _InIter, typename _TraitsT>
00767     bool
00768     _Compiler<_InIter, _TraitsT>::
00769     _M_alternative()
00770     {
00771       if (this->_M_term())
00772     {
00773       _StateSeq __re = _M_stack.top(); _M_stack.pop();
00774       this->_M_alternative();
00775       if (!_M_stack.empty())
00776         {
00777           __re._M_append(_M_stack.top());
00778           _M_stack.pop();
00779         }
00780       _M_stack.push(__re);
00781       return true;
00782     }
00783       return false;
00784     }
00785 
00786   template<typename _InIter, typename _TraitsT>
00787     bool
00788     _Compiler<_InIter, _TraitsT>::
00789     _M_term()
00790     {
00791       if (this->_M_assertion())
00792     return true;
00793       if (this->_M_atom())
00794     {
00795       this->_M_quantifier();
00796       return true;
00797     }
00798       return false;
00799     }
00800 
00801   template<typename _InIter, typename _TraitsT>
00802     bool
00803     _Compiler<_InIter, _TraitsT>::
00804     _M_assertion()
00805     {
00806       if (_M_match_token(_ScannerT::_S_token_line_begin))
00807     {
00808       // __m.push(_Matcher::_S_opcode_line_begin);
00809       return true;
00810     }
00811       if (_M_match_token(_ScannerT::_S_token_line_end))
00812     {
00813       // __m.push(_Matcher::_S_opcode_line_end);
00814       return true;
00815     }
00816       if (_M_match_token(_ScannerT::_S_token_word_begin))
00817     {
00818       // __m.push(_Matcher::_S_opcode_word_begin);
00819       return true;
00820     }
00821       if (_M_match_token(_ScannerT::_S_token_word_end))
00822     {
00823       // __m.push(_Matcher::_S_opcode_word_end);
00824       return true;
00825     }
00826       return false;
00827     }
00828 
00829   template<typename _InIter, typename _TraitsT>
00830     bool
00831     _Compiler<_InIter, _TraitsT>::
00832     _M_quantifier()
00833     {
00834       if (_M_match_token(_ScannerT::_S_token_closure0))
00835     {
00836       if (_M_stack.empty())
00837         __throw_regex_error(regex_constants::error_badrepeat);
00838       _StateSeq __r(_M_stack.top(), -1);
00839       __r._M_append(__r._M_front());
00840       _M_stack.pop();
00841       _M_stack.push(__r);
00842       return true;
00843     }
00844       if (_M_match_token(_ScannerT::_S_token_closure1))
00845     {
00846       if (_M_stack.empty())
00847         __throw_regex_error(regex_constants::error_badrepeat);
00848       _StateSeq __r(_M_state_store,
00849             _M_state_store.
00850             _M_insert_alt(_S_invalid_state_id,
00851                       _M_stack.top()._M_front()));
00852       _M_stack.top()._M_append(__r);
00853       return true;
00854     }
00855       if (_M_match_token(_ScannerT::_S_token_opt))
00856     {
00857       if (_M_stack.empty())
00858       __throw_regex_error(regex_constants::error_badrepeat);
00859       _StateSeq __r(_M_stack.top(), -1);
00860       _M_stack.pop();
00861       _M_stack.push(__r);
00862       return true;
00863     }
00864       if (_M_match_token(_ScannerT::_S_token_interval_begin))
00865     {
00866       if (_M_stack.empty())
00867         __throw_regex_error(regex_constants::error_badrepeat);
00868       if (!_M_match_token(_ScannerT::_S_token_dup_count))
00869         __throw_regex_error(regex_constants::error_badbrace);
00870       _StateSeq __r(_M_stack.top());
00871       int __min_rep = _M_cur_int_value(10);
00872       for (int __i = 1; __i < __min_rep; ++__i)
00873         _M_stack.top()._M_append(__r._M_clone()); 
00874       if (_M_match_token(_ScannerT::_S_token_comma))
00875         if (_M_match_token(_ScannerT::_S_token_dup_count))
00876           {
00877         int __n = _M_cur_int_value(10) - __min_rep;
00878         if (__n < 0)
00879           __throw_regex_error(regex_constants::error_badbrace);
00880         for (int __i = 0; __i < __n; ++__i)
00881           {
00882             _StateSeq __r(_M_state_store,
00883                   _M_state_store.
00884                   _M_insert_alt(_S_invalid_state_id,
00885                         _M_stack.top()._M_front()));
00886             _M_stack.top()._M_append(__r);
00887           }
00888           }
00889         else
00890           {
00891         _StateSeq __r(_M_stack.top(), -1);
00892         __r._M_push_back(__r._M_front());
00893         _M_stack.pop();
00894         _M_stack.push(__r);
00895           }
00896       if (!_M_match_token(_ScannerT::_S_token_interval_end))
00897         __throw_regex_error(regex_constants::error_brace);
00898       return true;
00899     }
00900       return false;
00901     }
00902 
00903   template<typename _InIter, typename _TraitsT>
00904     bool
00905     _Compiler<_InIter, _TraitsT>::
00906     _M_atom()
00907     {
00908       using std::bind;
00909       using std::placeholders::_1;
00910       using std::placeholders::_2;
00911       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00912       typedef _StartTagger<_InIter, _TraitsT> _Start;
00913       typedef _EndTagger<_InIter, _TraitsT> _End;
00914 
00915       if (_M_match_token(_ScannerT::_S_token_anychar))
00916     {
00917       _M_stack.push(_StateSeq(_M_state_store,
00918                   _M_state_store.
00919                   _M_insert_matcher(bind(_AnyMatcher, _1))));
00920       return true;
00921     }
00922       if (_M_match_token(_ScannerT::_S_token_ord_char))
00923     {
00924       _M_stack.push(_StateSeq
00925             (_M_state_store, _M_state_store. 
00926              _M_insert_matcher
00927              (bind(_CMatcher(_M_cur_value[0], _M_traits), _1))));
00928       return true;
00929     }
00930       if (_M_match_token(_ScannerT::_S_token_quoted_char))
00931     {
00932       // note that in the ECMA grammar, this case covers backrefs.
00933       _M_stack.push(_StateSeq(_M_state_store,
00934                   _M_state_store.
00935                   _M_insert_matcher
00936                   (bind(_CMatcher(_M_cur_value[0], _M_traits),
00937                     _1))));
00938       return true;
00939     }
00940       if (_M_match_token(_ScannerT::_S_token_backref))
00941     {
00942       // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
00943       return true;
00944     }
00945       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00946     {
00947       int __mark = _M_state_store._M_sub_count();
00948       _StateSeq __r(_M_state_store,
00949             _M_state_store.
00950             _M_insert_subexpr_begin(bind(_Start(__mark), _1, _2)));
00951       this->_M_disjunction();
00952       if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00953         __throw_regex_error(regex_constants::error_paren);
00954       if (!_M_stack.empty())
00955         {
00956           __r._M_append(_M_stack.top());
00957           _M_stack.pop();
00958         }
00959       __r._M_append(_M_state_store._M_insert_subexpr_end
00960             (__mark, bind(_End(__mark), _1, _2)));
00961       _M_stack.push(__r);
00962       return true;
00963     }
00964       return _M_bracket_expression();
00965     }
00966 
00967   template<typename _InIter, typename _TraitsT>
00968     bool
00969     _Compiler<_InIter, _TraitsT>::
00970     _M_bracket_expression()
00971     {
00972       using std::bind;
00973       using std::placeholders::_1;
00974       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00975     {
00976       _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00977                    _M_traits);
00978       if (!_M_bracket_list(__matcher)
00979           || !_M_match_token(_ScannerT::_S_token_bracket_end))
00980         __throw_regex_error(regex_constants::error_brack);
00981       _M_stack.push(_StateSeq(_M_state_store,
00982                   _M_state_store._M_insert_matcher
00983                   (bind(__matcher, _1))));
00984       return true;
00985     }
00986       return false;
00987     }
00988 
00989   // If the dash is the last character in the bracket expression, it is not
00990   // special.
00991   template<typename _InIter, typename _TraitsT>
00992     bool
00993     _Compiler<_InIter, _TraitsT>::
00994     _M_bracket_list(_RMatcherT& __matcher)
00995     {
00996       if (_M_follow_list(__matcher))
00997     {
00998       if (_M_match_token(_ScannerT::_S_token_dash))
00999         __matcher._M_add_char(_M_cur_value[0]);
01000       return true;
01001     }
01002       return false;
01003     }
01004 
01005   template<typename _InIter, typename _TraitsT>
01006     bool
01007     _Compiler<_InIter, _TraitsT>::
01008     _M_follow_list(_RMatcherT& __matcher)
01009     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
01010 
01011   template<typename _InIter, typename _TraitsT>
01012     bool
01013     _Compiler<_InIter, _TraitsT>::
01014     _M_follow_list2(_RMatcherT& __matcher)
01015     {
01016       if (_M_expression_term(__matcher))
01017     return _M_follow_list2(__matcher);
01018       return true;
01019     }
01020 
01021   template<typename _InIter, typename _TraitsT>
01022     bool
01023     _Compiler<_InIter, _TraitsT>::
01024     _M_expression_term(_RMatcherT& __matcher)
01025     {
01026       return (_M_collating_symbol(__matcher)
01027           || _M_character_class(__matcher)
01028           || _M_equivalence_class(__matcher)
01029           || (_M_start_range(__matcher)
01030           && _M_range_expression(__matcher)));
01031     }
01032 
01033   template<typename _InIter, typename _TraitsT>
01034     bool
01035     _Compiler<_InIter, _TraitsT>::
01036     _M_range_expression(_RMatcherT& __matcher)
01037     {
01038       if (!_M_collating_symbol(__matcher))
01039     if (!_M_match_token(_ScannerT::_S_token_dash))
01040       __throw_regex_error(regex_constants::error_range);
01041       __matcher._M_make_range();
01042       return true;
01043     }
01044 
01045   template<typename _InIter, typename _TraitsT>
01046     bool
01047     _Compiler<_InIter, _TraitsT>::
01048     _M_start_range(_RMatcherT& __matcher)
01049     { return _M_match_token(_ScannerT::_S_token_dash); }
01050 
01051   template<typename _InIter, typename _TraitsT>
01052     bool
01053     _Compiler<_InIter, _TraitsT>::
01054     _M_collating_symbol(_RMatcherT& __matcher)
01055     {
01056       if (_M_match_token(_ScannerT::_S_token_collelem_single))
01057     {
01058       __matcher._M_add_char(_M_cur_value[0]);
01059       return true;
01060     }
01061       if (_M_match_token(_ScannerT::_S_token_collsymbol))
01062     {
01063       __matcher._M_add_collating_element(_M_cur_value);
01064       return true;
01065     }
01066       return false;
01067     }
01068 
01069   template<typename _InIter, typename _TraitsT>
01070     bool
01071     _Compiler<_InIter, _TraitsT>::
01072     _M_equivalence_class(_RMatcherT& __matcher)
01073     {
01074       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01075     {
01076       __matcher._M_add_equivalence_class(_M_cur_value);
01077       return true;
01078     }
01079       return false;
01080     }
01081 
01082   template<typename _InIter, typename _TraitsT>
01083     bool
01084     _Compiler<_InIter, _TraitsT>::
01085     _M_character_class(_RMatcherT& __matcher)
01086     {
01087       if (_M_match_token(_ScannerT::_S_token_char_class_name))
01088     {
01089       __matcher._M_add_character_class(_M_cur_value);
01090       return true;
01091     }
01092       return false;
01093     }
01094 
01095   template<typename _InIter, typename _TraitsT>
01096     int
01097     _Compiler<_InIter, _TraitsT>::
01098     _M_cur_int_value(int __radix)
01099     {
01100       int __v = 0;
01101       for (typename _StringT::size_type __i = 0;
01102        __i < _M_cur_value.length(); ++__i)
01103     __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01104       return __v;
01105     }
01106 
01107   template<typename _InIter, typename _TraitsT>
01108     _AutomatonPtr
01109     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01110           regex_constants::syntax_option_type __f)
01111     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01112                                         __f)._M_nfa())); }
01113 
01114 } // namespace __regex
01115 
01116 _GLIBCXX_END_NAMESPACE_VERSION
01117 } // namespace
01118 
01119 /* vim: set ts=8 sw=2 sts=2: */