libstdc++
locale_conv.h
Go to the documentation of this file.
1 // wstring_convert implementation -*- C++ -*-
2 
3 // Copyright (C) 2015-2019 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /** @file bits/locale_conv.h
26  * This is an internal header file, included by other library headers.
27  * Do not attempt to use it directly. @headername{locale}
28  */
29 
30 #ifndef _LOCALE_CONV_H
31 #define _LOCALE_CONV_H 1
32 
33 #if __cplusplus < 201103L
34 # include <bits/c++0x_warning.h>
35 #else
36 
37 #include <streambuf>
38 #include <bits/stringfwd.h>
39 #include <bits/allocator.h>
40 #include <bits/codecvt.h>
41 #include <bits/unique_ptr.h>
42 
43 namespace std _GLIBCXX_VISIBILITY(default)
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47  /**
48  * @addtogroup locales
49  * @{
50  */
51 
52  template<typename _OutStr, typename _InChar, typename _Codecvt,
53  typename _State, typename _Fn>
54  bool
55  __do_str_codecvt(const _InChar* __first, const _InChar* __last,
56  _OutStr& __outstr, const _Codecvt& __cvt, _State& __state,
57  size_t& __count, _Fn __fn)
58  {
59  if (__first == __last)
60  {
61  __outstr.clear();
62  __count = 0;
63  return true;
64  }
65 
66  size_t __outchars = 0;
67  auto __next = __first;
68  const auto __maxlen = __cvt.max_length() + 1;
69 
70  codecvt_base::result __result;
71  do
72  {
73  __outstr.resize(__outstr.size() + (__last - __next) * __maxlen);
74  auto __outnext = &__outstr.front() + __outchars;
75  auto const __outlast = &__outstr.back() + 1;
76  __result = (__cvt.*__fn)(__state, __next, __last, __next,
77  __outnext, __outlast, __outnext);
78  __outchars = __outnext - &__outstr.front();
79  }
80  while (__result == codecvt_base::partial && __next != __last
81  && (__outstr.size() - __outchars) < __maxlen);
82 
83  if (__result == codecvt_base::error)
84  {
85  __count = __next - __first;
86  return false;
87  }
88 
89  if (__result == codecvt_base::noconv)
90  {
91  __outstr.assign(__first, __last);
92  __count = __last - __first;
93  }
94  else
95  {
96  __outstr.resize(__outchars);
97  __count = __next - __first;
98  }
99 
100  return true;
101  }
102 
103  // Convert narrow character string to wide.
104  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
105  inline bool
106  __str_codecvt_in(const char* __first, const char* __last,
107  basic_string<_CharT, _Traits, _Alloc>& __outstr,
108  const codecvt<_CharT, char, _State>& __cvt,
109  _State& __state, size_t& __count)
110  {
111  using _Codecvt = codecvt<_CharT, char, _State>;
112  using _ConvFn
113  = codecvt_base::result
114  (_Codecvt::*)(_State&, const char*, const char*, const char*&,
115  _CharT*, _CharT*, _CharT*&) const;
116  _ConvFn __fn = &codecvt<_CharT, char, _State>::in;
117  return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
118  __count, __fn);
119  }
120 
121  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
122  inline bool
123  __str_codecvt_in(const char* __first, const char* __last,
124  basic_string<_CharT, _Traits, _Alloc>& __outstr,
125  const codecvt<_CharT, char, _State>& __cvt)
126  {
127  _State __state = {};
128  size_t __n;
129  return __str_codecvt_in(__first, __last, __outstr, __cvt, __state, __n);
130  }
131 
132  // Convert wide character string to narrow.
133  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
134  inline bool
135  __str_codecvt_out(const _CharT* __first, const _CharT* __last,
136  basic_string<char, _Traits, _Alloc>& __outstr,
137  const codecvt<_CharT, char, _State>& __cvt,
138  _State& __state, size_t& __count)
139  {
140  using _Codecvt = codecvt<_CharT, char, _State>;
141  using _ConvFn
142  = codecvt_base::result
143  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
144  char*, char*, char*&) const;
145  _ConvFn __fn = &codecvt<_CharT, char, _State>::out;
146  return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
147  __count, __fn);
148  }
149 
150  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
151  inline bool
152  __str_codecvt_out(const _CharT* __first, const _CharT* __last,
153  basic_string<char, _Traits, _Alloc>& __outstr,
154  const codecvt<_CharT, char, _State>& __cvt)
155  {
156  _State __state = {};
157  size_t __n;
158  return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
159  }
160 
161 #ifdef _GLIBCXX_USE_CHAR8_T
162 
163  // Convert wide character string to narrow.
164  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
165  inline bool
166  __str_codecvt_out(const _CharT* __first, const _CharT* __last,
167  basic_string<char8_t, _Traits, _Alloc>& __outstr,
168  const codecvt<_CharT, char8_t, _State>& __cvt,
169  _State& __state, size_t& __count)
170  {
171  using _Codecvt = codecvt<_CharT, char8_t, _State>;
172  using _ConvFn
173  = codecvt_base::result
174  (_Codecvt::*)(_State&, const _CharT*, const _CharT*, const _CharT*&,
175  char8_t*, char8_t*, char8_t*&) const;
177  return __do_str_codecvt(__first, __last, __outstr, __cvt, __state,
178  __count, __fn);
179  }
180 
181  template<typename _CharT, typename _Traits, typename _Alloc, typename _State>
182  inline bool
183  __str_codecvt_out(const _CharT* __first, const _CharT* __last,
184  basic_string<char8_t, _Traits, _Alloc>& __outstr,
185  const codecvt<_CharT, char8_t, _State>& __cvt)
186  {
187  _State __state = {};
188  size_t __n;
189  return __str_codecvt_out(__first, __last, __outstr, __cvt, __state, __n);
190  }
191 
192 #endif // _GLIBCXX_USE_CHAR8_T
193 
194 #ifdef _GLIBCXX_USE_WCHAR_T
195 
196 _GLIBCXX_BEGIN_NAMESPACE_CXX11
197 
198  /// String conversions
199  template<typename _Codecvt, typename _Elem = wchar_t,
200  typename _Wide_alloc = allocator<_Elem>,
201  typename _Byte_alloc = allocator<char>>
203  {
204  public:
207  typedef typename _Codecvt::state_type state_type;
208  typedef typename wide_string::traits_type::int_type int_type;
209 
210  /// Default constructor.
211  wstring_convert() : _M_cvt(new _Codecvt()) { }
212 
213  /** Constructor.
214  *
215  * @param __pcvt The facet to use for conversions.
216  *
217  * Takes ownership of @p __pcvt and will delete it in the destructor.
218  */
219  explicit
220  wstring_convert(_Codecvt* __pcvt) : _M_cvt(__pcvt)
221  {
222  if (!_M_cvt)
223  __throw_logic_error("wstring_convert");
224  }
225 
226  /** Construct with an initial converstion state.
227  *
228  * @param __pcvt The facet to use for conversions.
229  * @param __state Initial conversion state.
230  *
231  * Takes ownership of @p __pcvt and will delete it in the destructor.
232  * The object's conversion state will persist between conversions.
233  */
234  wstring_convert(_Codecvt* __pcvt, state_type __state)
235  : _M_cvt(__pcvt), _M_state(__state), _M_with_cvtstate(true)
236  {
237  if (!_M_cvt)
238  __throw_logic_error("wstring_convert");
239  }
240 
241  /** Construct with error strings.
242  *
243  * @param __byte_err A string to return on failed conversions.
244  * @param __wide_err A wide string to return on failed conversions.
245  */
246  explicit
247  wstring_convert(const byte_string& __byte_err,
248  const wide_string& __wide_err = wide_string())
249  : _M_cvt(new _Codecvt),
250  _M_byte_err_string(__byte_err), _M_wide_err_string(__wide_err),
251  _M_with_strings(true)
252  {
253  if (!_M_cvt)
254  __throw_logic_error("wstring_convert");
255  }
256 
257  ~wstring_convert() = default;
258 
259  // _GLIBCXX_RESOLVE_LIB_DEFECTS
260  // 2176. Special members for wstring_convert and wbuffer_convert
261  wstring_convert(const wstring_convert&) = delete;
262  wstring_convert& operator=(const wstring_convert&) = delete;
263 
264  /// @{ Convert from bytes.
265  wide_string
266  from_bytes(char __byte)
267  {
268  char __bytes[2] = { __byte };
269  return from_bytes(__bytes, __bytes+1);
270  }
271 
272  wide_string
273  from_bytes(const char* __ptr)
274  { return from_bytes(__ptr, __ptr+char_traits<char>::length(__ptr)); }
275 
276  wide_string
277  from_bytes(const byte_string& __str)
278  {
279  auto __ptr = __str.data();
280  return from_bytes(__ptr, __ptr + __str.size());
281  }
282 
283  wide_string
284  from_bytes(const char* __first, const char* __last)
285  {
286  if (!_M_with_cvtstate)
287  _M_state = state_type();
288  wide_string __out{ _M_wide_err_string.get_allocator() };
289  if (__str_codecvt_in(__first, __last, __out, *_M_cvt, _M_state,
290  _M_count))
291  return __out;
292  if (_M_with_strings)
293  return _M_wide_err_string;
294  __throw_range_error("wstring_convert::from_bytes");
295  }
296  /// @}
297 
298  /// @{ Convert to bytes.
299  byte_string
300  to_bytes(_Elem __wchar)
301  {
302  _Elem __wchars[2] = { __wchar };
303  return to_bytes(__wchars, __wchars+1);
304  }
305 
306  byte_string
307  to_bytes(const _Elem* __ptr)
308  {
309  return to_bytes(__ptr, __ptr+wide_string::traits_type::length(__ptr));
310  }
311 
312  byte_string
313  to_bytes(const wide_string& __wstr)
314  {
315  auto __ptr = __wstr.data();
316  return to_bytes(__ptr, __ptr + __wstr.size());
317  }
318 
319  byte_string
320  to_bytes(const _Elem* __first, const _Elem* __last)
321  {
322  if (!_M_with_cvtstate)
323  _M_state = state_type();
324  byte_string __out{ _M_byte_err_string.get_allocator() };
325  if (__str_codecvt_out(__first, __last, __out, *_M_cvt, _M_state,
326  _M_count))
327  return __out;
328  if (_M_with_strings)
329  return _M_byte_err_string;
330  __throw_range_error("wstring_convert::to_bytes");
331  }
332  /// @}
333 
334  // _GLIBCXX_RESOLVE_LIB_DEFECTS
335  // 2174. wstring_convert::converted() should be noexcept
336  /// The number of elements successfully converted in the last conversion.
337  size_t converted() const noexcept { return _M_count; }
338 
339  /// The final conversion state of the last conversion.
340  state_type state() const { return _M_state; }
341 
342  private:
343  unique_ptr<_Codecvt> _M_cvt;
344  byte_string _M_byte_err_string;
345  wide_string _M_wide_err_string;
346  state_type _M_state = state_type();
347  size_t _M_count = 0;
348  bool _M_with_cvtstate = false;
349  bool _M_with_strings = false;
350  };
351 
352 _GLIBCXX_END_NAMESPACE_CXX11
353 
354  /// Buffer conversions
355  template<typename _Codecvt, typename _Elem = wchar_t,
356  typename _Tr = char_traits<_Elem>>
357  class wbuffer_convert : public basic_streambuf<_Elem, _Tr>
358  {
360 
361  public:
362  typedef typename _Codecvt::state_type state_type;
363 
364  /// Default constructor.
366 
367  /** Constructor.
368  *
369  * @param __bytebuf The underlying byte stream buffer.
370  * @param __pcvt The facet to use for conversions.
371  * @param __state Initial conversion state.
372  *
373  * Takes ownership of @p __pcvt and will delete it in the destructor.
374  */
375  explicit
376  wbuffer_convert(streambuf* __bytebuf, _Codecvt* __pcvt = new _Codecvt,
377  state_type __state = state_type())
378  : _M_buf(__bytebuf), _M_cvt(__pcvt), _M_state(__state)
379  {
380  if (!_M_cvt)
381  __throw_logic_error("wbuffer_convert");
382 
383  _M_always_noconv = _M_cvt->always_noconv();
384 
385  if (_M_buf)
386  {
387  this->setp(_M_put_area, _M_put_area + _S_buffer_length);
388  this->setg(_M_get_area + _S_putback_length,
389  _M_get_area + _S_putback_length,
390  _M_get_area + _S_putback_length);
391  }
392  }
393 
394  ~wbuffer_convert() = default;
395 
396  // _GLIBCXX_RESOLVE_LIB_DEFECTS
397  // 2176. Special members for wstring_convert and wbuffer_convert
398  wbuffer_convert(const wbuffer_convert&) = delete;
399  wbuffer_convert& operator=(const wbuffer_convert&) = delete;
400 
401  streambuf* rdbuf() const noexcept { return _M_buf; }
402 
403  streambuf*
404  rdbuf(streambuf *__bytebuf) noexcept
405  {
406  auto __prev = _M_buf;
407  _M_buf = __bytebuf;
408  return __prev;
409  }
410 
411  /// The conversion state following the last conversion.
412  state_type state() const noexcept { return _M_state; }
413 
414  protected:
415  int
417  { return _M_buf && _M_conv_put() && !_M_buf->pubsync() ? 0 : -1; }
418 
420  overflow(typename _Wide_streambuf::int_type __out)
421  {
422  if (!_M_buf || !_M_conv_put())
423  return _Tr::eof();
424  else if (!_Tr::eq_int_type(__out, _Tr::eof()))
425  return this->sputc(__out);
426  return _Tr::not_eof(__out);
427  }
428 
431  {
432  if (!_M_buf)
433  return _Tr::eof();
434 
435  if (this->gptr() < this->egptr() || (_M_buf && _M_conv_get()))
436  return _Tr::to_int_type(*this->gptr());
437  else
438  return _Tr::eof();
439  }
440 
441  streamsize
442  xsputn(const typename _Wide_streambuf::char_type* __s, streamsize __n)
443  {
444  if (!_M_buf || __n == 0)
445  return 0;
446  streamsize __done = 0;
447  do
448  {
449  auto __nn = std::min<streamsize>(this->epptr() - this->pptr(),
450  __n - __done);
451  _Tr::copy(this->pptr(), __s + __done, __nn);
452  this->pbump(__nn);
453  __done += __nn;
454  } while (__done < __n && _M_conv_put());
455  return __done;
456  }
457 
458  private:
459  // fill the get area from converted contents of the byte stream buffer
460  bool
461  _M_conv_get()
462  {
463  const streamsize __pb1 = this->gptr() - this->eback();
464  const streamsize __pb2 = _S_putback_length;
465  const streamsize __npb = std::min(__pb1, __pb2);
466 
467  _Tr::move(_M_get_area + _S_putback_length - __npb,
468  this->gptr() - __npb, __npb);
469 
470  streamsize __nbytes = sizeof(_M_get_buf) - _M_unconv;
471  __nbytes = std::min(__nbytes, _M_buf->in_avail());
472  if (__nbytes < 1)
473  __nbytes = 1;
474  __nbytes = _M_buf->sgetn(_M_get_buf + _M_unconv, __nbytes);
475  if (__nbytes < 1)
476  return false;
477  __nbytes += _M_unconv;
478 
479  // convert _M_get_buf into _M_get_area
480 
481  _Elem* __outbuf = _M_get_area + _S_putback_length;
482  _Elem* __outnext = __outbuf;
483  const char* __bnext = _M_get_buf;
484 
485  codecvt_base::result __result;
486  if (_M_always_noconv)
487  __result = codecvt_base::noconv;
488  else
489  {
490  _Elem* __outend = _M_get_area + _S_buffer_length;
491 
492  __result = _M_cvt->in(_M_state,
493  __bnext, __bnext + __nbytes, __bnext,
494  __outbuf, __outend, __outnext);
495  }
496 
497  if (__result == codecvt_base::noconv)
498  {
499  // cast is safe because noconv means _Elem is same type as char
500  auto __get_buf = reinterpret_cast<const _Elem*>(_M_get_buf);
501  _Tr::copy(__outbuf, __get_buf, __nbytes);
502  _M_unconv = 0;
503  return true;
504  }
505 
506  if ((_M_unconv = _M_get_buf + __nbytes - __bnext))
507  char_traits<char>::move(_M_get_buf, __bnext, _M_unconv);
508 
509  this->setg(__outbuf, __outbuf, __outnext);
510 
511  return __result != codecvt_base::error;
512  }
513 
514  // unused
515  bool
516  _M_put(...)
517  { return false; }
518 
519  bool
520  _M_put(const char* __p, streamsize __n)
521  {
522  if (_M_buf->sputn(__p, __n) < __n)
523  return false;
524  return true;
525  }
526 
527  // convert the put area and write to the byte stream buffer
528  bool
529  _M_conv_put()
530  {
531  _Elem* const __first = this->pbase();
532  const _Elem* const __last = this->pptr();
533  const streamsize __pending = __last - __first;
534 
535  if (_M_always_noconv)
536  return _M_put(__first, __pending);
537 
538  char __outbuf[2 * _S_buffer_length];
539 
540  const _Elem* __next = __first;
541  const _Elem* __start;
542  do
543  {
544  __start = __next;
545  char* __outnext = __outbuf;
546  char* const __outlast = __outbuf + sizeof(__outbuf);
547  auto __result = _M_cvt->out(_M_state, __next, __last, __next,
548  __outnext, __outlast, __outnext);
549  if (__result == codecvt_base::error)
550  return false;
551  else if (__result == codecvt_base::noconv)
552  return _M_put(__next, __pending);
553 
554  if (!_M_put(__outbuf, __outnext - __outbuf))
555  return false;
556  }
557  while (__next != __last && __next != __start);
558 
559  if (__next != __last)
560  _Tr::move(__first, __next, __last - __next);
561 
562  this->pbump(__first - __next);
563  return __next != __first;
564  }
565 
566  streambuf* _M_buf;
567  unique_ptr<_Codecvt> _M_cvt;
568  state_type _M_state;
569 
570  static const streamsize _S_buffer_length = 32;
571  static const streamsize _S_putback_length = 3;
572  _Elem _M_put_area[_S_buffer_length];
573  _Elem _M_get_area[_S_buffer_length];
574  streamsize _M_unconv = 0;
575  char _M_get_buf[_S_buffer_length-_S_putback_length];
576  bool _M_always_noconv;
577  };
578 
579 #endif // _GLIBCXX_USE_WCHAR_T
580 
581  /// @} group locales
582 
583 _GLIBCXX_END_NAMESPACE_VERSION
584 } // namespace
585 
586 #endif // __cplusplus
587 
588 #endif /* _LOCALE_CONV_H */
wide_string from_bytes(const byte_string &__str)
Convert from bytes.
Definition: locale_conv.h:277
wide_string from_bytes(const char *__first, const char *__last)
Convert from bytes.
Definition: locale_conv.h:284
state_type state() const noexcept
The conversion state following the last conversion.
Definition: locale_conv.h:412
void setg(char_type *__gbeg, char_type *__gnext, char_type *__gend)
Setting the three read area pointers.
Definition: streambuf:516
int pubsync()
Calls virtual sync function.
Definition: streambuf:278
wstring_convert(_Codecvt *__pcvt)
Definition: locale_conv.h:220
Basis for explicit traits specializations.
Definition: char_traits.h:284
ISO C++ entities toplevel namespace is std.
char_type * gptr() const
Definition: streambuf:492
result in(state_type &__state, const extern_type *__from, const extern_type *__from_end, const extern_type *&__from_next, intern_type *__to, intern_type *__to_end, intern_type *&__to_next) const
Convert from external to internal character set.
Definition: codecvt.h:196
streamsize sgetn(char_type *__s, streamsize __n)
Entry point for xsgetn.
Definition: streambuf:364
byte_string to_bytes(const _Elem *__ptr)
Convert to bytes.
Definition: locale_conv.h:307
Managing sequences of characters and character-like objects.
char_type * pbase() const
Access to the put area.
Definition: streambuf:536
The actual work of input and output (interface).
Definition: iosfwd:80
wstring_convert(_Codecvt *__pcvt, state_type __state)
Definition: locale_conv.h:234
const _CharT * data() const noexcept
Return const pointer to contents.
streamsize sputn(const char_type *__s, streamsize __n)
Entry point for all single-character output functions.
Definition: streambuf:457
char_type * pptr() const
Definition: streambuf:539
int_type sputc(char_type __c)
Entry point for all single-character output functions.
Definition: streambuf:431
wbuffer_convert(streambuf *__bytebuf, _Codecvt *__pcvt=new _Codecvt, state_type __state=state_type())
Definition: locale_conv.h:376
traits_type::int_type int_type
Definition: streambuf:133
char_type * epptr() const
Definition: streambuf:542
byte_string to_bytes(const wide_string &__wstr)
Convert to bytes.
Definition: locale_conv.h:313
wide_string from_bytes(char __byte)
Convert from bytes.
Definition: locale_conv.h:266
void pbump(int __n)
Moving the write position.
Definition: streambuf:552
Buffer conversions.
Definition: locale_conv.h:357
char_type * egptr() const
Definition: streambuf:495
byte_string to_bytes(const _Elem *__first, const _Elem *__last)
Convert to bytes.
Definition: locale_conv.h:320
result out(state_type &__state, const intern_type *__from, const intern_type *__from_end, const intern_type *&__from_next, extern_type *__to, extern_type *__to_end, extern_type *&__to_next) const
Convert from internal to external character set.
Definition: codecvt.h:116
wide_string from_bytes(const char *__ptr)
Convert from bytes.
Definition: locale_conv.h:273
int sync()
Synchronizes the buffer arrays with the controlled sequences.
Definition: locale_conv.h:416
size_type size() const noexcept
Returns the number of characters in the string, not including any null-termination.
allocator_type get_allocator() const noexcept
Return copy of allocator used to construct this string.
char_type * eback() const
Access to the get area.
Definition: streambuf:489
ptrdiff_t streamsize
Integral type for I/O operation counts and buffer sizes.
Definition: postypes.h:98
wstring_convert(const byte_string &__byte_err, const wide_string &__wide_err=wide_string())
Definition: locale_conv.h:247
basic_streambuf< char > streambuf
Base class for char buffers.
Definition: iosfwd:135
byte_string to_bytes(_Elem __wchar)
Convert to bytes.
Definition: locale_conv.h:300
void setp(char_type *__pbeg, char_type *__pend)
Setting the three write area pointers.
Definition: streambuf:562
String conversions.
Definition: locale_conv.h:202
_Wide_streambuf::int_type underflow()
Fetches more data from the controlled sequence.
Definition: locale_conv.h:430
streamsize in_avail()
Looking ahead into the stream.
Definition: streambuf:291
wstring_convert()
Default constructor.
Definition: locale_conv.h:211
wbuffer_convert()
Default constructor.
Definition: locale_conv.h:365
_GLIBCXX14_CONSTEXPR const _Tp & min(const _Tp &, const _Tp &)
This does what you think it does.
Definition: stl_algobase.h:198
size_t converted() const noexcept
The number of elements successfully converted in the last conversion.
Definition: locale_conv.h:337
state_type state() const
The final conversion state of the last conversion.
Definition: locale_conv.h:340