MyGUI  3.4.1
MyGUI_UString.cpp
Go to the documentation of this file.
1 /*
2  * This source file is part of MyGUI. For the latest info, see http://mygui.info/
3  * Distributed under the MIT License
4  * (See accompanying file COPYING.MIT or copy at http://opensource.org/licenses/MIT)
5  */
6 
7 #include "MyGUI_Precompiled.h"
8 #include "MyGUI_UString.h"
9 
10 namespace MyGUI
11 {
12 
13  //--------------------------------------------------------------------------
15  {
16  mString = nullptr;
17  }
18  //--------------------------------------------------------------------------
20  {
21  mIter += c;
22  }
23  //--------------------------------------------------------------------------
25  {
26  mIter -= c;
27  }
28  //--------------------------------------------------------------------------
30  {
31  mIter = i.mIter;
32  mString = i.mString;
33  }
34  //--------------------------------------------------------------------------
36  {
37  return mIter == mString->mData.begin();
38  }
39  //--------------------------------------------------------------------------
41  {
42  return mIter == mString->mData.end();
43  }
44  //--------------------------------------------------------------------------
46  {
47  return mIter - mString->mData.begin();
48  }
49  //--------------------------------------------------------------------------
51  {
52  mIter = mString->mData.begin() + index;
53  }
54  //--------------------------------------------------------------------------
56  {
57  size_type current_index = _get_index();
58  return mString->getChar( current_index );
59  }
60  //--------------------------------------------------------------------------
62  {
63  size_type current_index = _get_index();
64  int change = mString->setChar( current_index, uc );
65  _jump_to( current_index );
66  return change;
67  }
68  //--------------------------------------------------------------------------
70  {
71  _seekFwd( 1 ); // move 1 code point forward
72  if ( _test_end() ) return; // exit if we hit the end
73  if ( _utf16_surrogate_follow( mIter[0] ) ) {
74  // landing on a follow code point means we might be part of a bigger character
75  // so we test for that
76  code_point lead_half = 0;
77  //NB: we can't possibly be at the beginning here, so no need to test
78  lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
79  if ( _utf16_surrogate_lead( lead_half ) ) {
80  _seekFwd( 1 ); // if so, then advance 1 more code point
81  }
82  }
83  }
84  //--------------------------------------------------------------------------
86  {
87  _seekRev( 1 ); // move 1 code point backwards
88  if ( _test_begin() ) return; // exit if we hit the beginning
89  if ( _utf16_surrogate_follow( mIter[0] ) ) {
90  // landing on a follow code point means we might be part of a bigger character
91  // so we test for that
92  code_point lead_half = 0;
93  lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
94  if ( _utf16_surrogate_lead( lead_half ) ) {
95  _seekRev( 1 ); // if so, then rewind 1 more code point
96  }
97  }
98  }
99  //--------------------------------------------------------------------------
100  //--------------------------------------------------------------------------
101  //--------------------------------------------------------------------------
102  //--------------------------------------------------------------------------
104  //--------------------------------------------------------------------------
106  {
107  _become( i );
108  }
109  //--------------------------------------------------------------------------
111  {
112  _become( i );
113  return *this;
114  }
115  //--------------------------------------------------------------------------
117  {
118  _seekFwd( 1 );
119  return *this;
120  }
121  //--------------------------------------------------------------------------
123  {
124  _fwd_iterator tmp( *this );
125  _seekFwd( 1 );
126  return tmp;
127  }
128  //--------------------------------------------------------------------------
130  {
131  _seekRev( 1 );
132  return *this;
133  }
134  //--------------------------------------------------------------------------
136  {
137  _fwd_iterator tmp( *this );
138  _seekRev( 1 );
139  return tmp;
140  }
141  //--------------------------------------------------------------------------
143  {
144  _fwd_iterator tmp( *this );
145  if ( n < 0 )
146  tmp._seekRev( -n );
147  else
148  tmp._seekFwd( n );
149  return tmp;
150  }
151  //--------------------------------------------------------------------------
153  {
154  _fwd_iterator tmp( *this );
155  if ( n < 0 )
156  tmp._seekFwd( -n );
157  else
158  tmp._seekRev( n );
159  return tmp;
160  }
161  //--------------------------------------------------------------------------
163  {
164  if ( n < 0 )
165  _seekRev( -n );
166  else
167  _seekFwd( n );
168  return *this;
169  }
170  //--------------------------------------------------------------------------
172  {
173  if ( n < 0 )
174  _seekFwd( -n );
175  else
176  _seekRev( n );
177  return *this;
178  }
179  //--------------------------------------------------------------------------
181  {
182  return *mIter;
183  }
184  //--------------------------------------------------------------------------
186  {
187  _fwd_iterator tmp( *this );
188  tmp += n;
189  return *tmp;
190  }
191  //--------------------------------------------------------------------------
193  {
194  _moveNext();
195  return *this;
196  }
197  //--------------------------------------------------------------------------
199  {
200  _movePrev();
201  return *this;
202  }
203  //--------------------------------------------------------------------------
205  {
206  return _getCharacter();
207  }
208  //--------------------------------------------------------------------------
210  {
211  return _setCharacter( uc );
212  }
213  //--------------------------------------------------------------------------
214  //--------------------------------------------------------------------------
215  //--------------------------------------------------------------------------
216  //--------------------------------------------------------------------------
218  //--------------------------------------------------------------------------
220  {
221  _become( i );
222  }
223  //--------------------------------------------------------------------------
225  {
226  _become( i );
227  return *this;
228  }
229  //--------------------------------------------------------------------------
231  {
232  _become( i );
233  }
234  //--------------------------------------------------------------------------
236  {
237  _seekFwd( 1 );
238  return *this;
239  }
240  //--------------------------------------------------------------------------
242  {
243  _const_fwd_iterator tmp( *this );
244  _seekFwd( 1 );
245  return tmp;
246  }
247  //--------------------------------------------------------------------------
249  {
250  _seekRev( 1 );
251  return *this;
252  }
253  //--------------------------------------------------------------------------
255  {
256  _const_fwd_iterator tmp( *this );
257  _seekRev( 1 );
258  return tmp;
259  }
260  //--------------------------------------------------------------------------
262  {
263  _const_fwd_iterator tmp( *this );
264  if ( n < 0 )
265  tmp._seekRev( -n );
266  else
267  tmp._seekFwd( n );
268  return tmp;
269  }
270  //--------------------------------------------------------------------------
272  {
273  _const_fwd_iterator tmp( *this );
274  if ( n < 0 )
275  tmp._seekFwd( -n );
276  else
277  tmp._seekRev( n );
278  return tmp;
279  }
280  //--------------------------------------------------------------------------
282  {
283  if ( n < 0 )
284  _seekRev( -n );
285  else
286  _seekFwd( n );
287  return *this;
288  }
289  //--------------------------------------------------------------------------
291  {
292  if ( n < 0 )
293  _seekFwd( -n );
294  else
295  _seekRev( n );
296  return *this;
297  }
298  //--------------------------------------------------------------------------
300  {
301  return *mIter;
302  }
303  //--------------------------------------------------------------------------
305  {
306  _const_fwd_iterator tmp( *this );
307  tmp += n;
308  return *tmp;
309  }
310  //--------------------------------------------------------------------------
312  {
313  _moveNext();
314  return *this;
315  }
316  //--------------------------------------------------------------------------
318  {
319  _movePrev();
320  return *this;
321  }
322  //--------------------------------------------------------------------------
324  {
325  return _getCharacter();
326  }
327  //--------------------------------------------------------------------------
328  //--------------------------------------------------------------------------
329  //--------------------------------------------------------------------------
330  //--------------------------------------------------------------------------
332  //--------------------------------------------------------------------------
334  {
335  _become( i );
336  }
337  //--------------------------------------------------------------------------
339  {
340  _seekRev( 1 );
341  return *this;
342  }
343  //--------------------------------------------------------------------------
345  {
346  _rev_iterator tmp( *this );
347  _seekRev( 1 );
348  return tmp;
349  }
350  //--------------------------------------------------------------------------
352  {
353  _seekFwd( 1 );
354  return *this;
355  }
356  //--------------------------------------------------------------------------
358  {
359  _rev_iterator tmp( *this );
360  _seekFwd( 1 );
361  return tmp;
362  }
363  //--------------------------------------------------------------------------
365  {
366  _rev_iterator tmp( *this );
367  if ( n < 0 )
368  tmp._seekFwd( -n );
369  else
370  tmp._seekRev( n );
371  return tmp;
372  }
373  //--------------------------------------------------------------------------
375  {
376  _rev_iterator tmp( *this );
377  if ( n < 0 )
378  tmp._seekRev( -n );
379  else
380  tmp._seekFwd( n );
381  return tmp;
382  }
383  //--------------------------------------------------------------------------
385  {
386  if ( n < 0 )
387  _seekFwd( -n );
388  else
389  _seekRev( n );
390  return *this;
391  }
392  //--------------------------------------------------------------------------
394  {
395  if ( n < 0 )
396  _seekRev( -n );
397  else
398  _seekFwd( n );
399  return *this;
400  }
401  //--------------------------------------------------------------------------
403  {
404  return mIter[-1];
405  }
406  //--------------------------------------------------------------------------
408  {
409  _rev_iterator tmp( *this );
410  tmp -= n;
411  return *tmp;
412  }
413  //--------------------------------------------------------------------------
414  //--------------------------------------------------------------------------
415  //--------------------------------------------------------------------------
416  //--------------------------------------------------------------------------
418  //--------------------------------------------------------------------------
420  {
421  _become( i );
422  }
423  //--------------------------------------------------------------------------
425  {
426  _become( i );
427  }
428  //--------------------------------------------------------------------------
430  {
431  _seekRev( 1 );
432  return *this;
433  }
434  //--------------------------------------------------------------------------
436  {
437  _const_rev_iterator tmp( *this );
438  _seekRev( 1 );
439  return tmp;
440  }
441  //--------------------------------------------------------------------------
443  {
444  _seekFwd( 1 );
445  return *this;
446  }
447  //--------------------------------------------------------------------------
449  {
450  _const_rev_iterator tmp( *this );
451  _seekFwd( 1 );
452  return tmp;
453  }
454  //--------------------------------------------------------------------------
456  {
457  _const_rev_iterator tmp( *this );
458  if ( n < 0 )
459  tmp._seekFwd( -n );
460  else
461  tmp._seekRev( n );
462  return tmp;
463  }
464  //--------------------------------------------------------------------------
466  {
467  _const_rev_iterator tmp( *this );
468  if ( n < 0 )
469  tmp._seekRev( -n );
470  else
471  tmp._seekFwd( n );
472  return tmp;
473  }
474  //--------------------------------------------------------------------------
476  {
477  if ( n < 0 )
478  _seekFwd( -n );
479  else
480  _seekRev( n );
481  return *this;
482  }
483  //--------------------------------------------------------------------------
485  {
486  if ( n < 0 )
487  _seekRev( -n );
488  else
489  _seekFwd( n );
490  return *this;
491  }
492  //--------------------------------------------------------------------------
494  {
495  return mIter[-1];
496  }
497  //--------------------------------------------------------------------------
499  {
500  _const_rev_iterator tmp( *this );
501  tmp -= n;
502  return *tmp;
503  }
504  //--------------------------------------------------------------------------
505  //--------------------------------------------------------------------------
506  //--------------------------------------------------------------------------
507  //--------------------------------------------------------------------------
509  {
510  _init();
511  }
512  //--------------------------------------------------------------------------
513  UString::UString( const UString& copy )
514  {
515  _init();
516  mData = copy.mData;
517  }
518  //--------------------------------------------------------------------------
520  {
521  _init();
522  assign( length, ch );
523  }
524  //--------------------------------------------------------------------------
526  {
527  _init();
528  assign( str );
529  }
530  //--------------------------------------------------------------------------
532  {
533  _init();
534  assign( str, length );
535  }
536  //--------------------------------------------------------------------------
538  {
539  _init();
540  assign( str, index, length );
541  }
542  //--------------------------------------------------------------------------
543 #if MYGUI_IS_NATIVE_WCHAR_T
544  UString::UString( const wchar_t* w_str )
545  {
546  _init();
547  assign( w_str );
548  }
549  //--------------------------------------------------------------------------
550  UString::UString( const wchar_t* w_str, size_type length )
551  {
552  _init();
553  assign( w_str, length );
554  }
555 #endif
556  //--------------------------------------------------------------------------
557  UString::UString( const std::wstring& wstr )
558  {
559  _init();
560  assign( wstr );
561  }
562  //--------------------------------------------------------------------------
563  UString::UString( const char* c_str )
564  {
565  _init();
566  assign( c_str );
567  }
568  //--------------------------------------------------------------------------
570  {
571  _init();
572  assign( c_str, length );
573  }
574  //--------------------------------------------------------------------------
575  UString::UString( const std::string& str )
576  {
577  _init();
578  assign( str );
579  }
580  //--------------------------------------------------------------------------
582  {
583  _init();
584  assign( str );
585  }
586  //--------------------------------------------------------------------------
588  {
589  _cleanBuffer();
590  }
591  //--------------------------------------------------------------------------
593  {
594  return mData.size();
595  }
596  //--------------------------------------------------------------------------
598  {
599  return size();
600  }
601  //--------------------------------------------------------------------------
603  {
604  const_iterator i = begin(), ie = end();
605  size_type c = 0;
606  while ( i != ie ) {
607  i.moveNext();
608  ++c;
609  }
610  return c;
611  }
612  //--------------------------------------------------------------------------
614  {
615  return mData.max_size();
616  }
617  //--------------------------------------------------------------------------
619  {
620  mData.reserve( size );
621  }
622  //--------------------------------------------------------------------------
623  void UString::resize( size_type num, const code_point& val /*= 0 */ )
624  {
625  mData.resize( num, val );
626  }
627  //--------------------------------------------------------------------------
628  void UString::swap( UString& from )
629  {
630  mData.swap( from.mData );
631  }
632  //--------------------------------------------------------------------------
633  bool UString::empty() const
634  {
635  return mData.empty();
636  }
637  //--------------------------------------------------------------------------
639  {
640  return mData.c_str();
641  }
642  //--------------------------------------------------------------------------
644  {
645  return c_str();
646  }
647  //--------------------------------------------------------------------------
649  {
650  return mData.capacity();
651  }
652  //--------------------------------------------------------------------------
654  {
655  mData.clear();
656  }
657  //--------------------------------------------------------------------------
658  UString UString::substr( size_type index, size_type num /*= npos */ ) const
659  {
660  // this could avoid the extra copy if we used a private specialty constructor
661  dstring data = mData.substr( index, num );
662  UString tmp;
663  tmp.mData.swap( data );
664  return tmp;
665  }
666  //--------------------------------------------------------------------------
668  {
669  code_point cp[2];
670  size_t c = _utf32_to_utf16( val, cp );
671  if ( c > 0 ) push_back( cp[0] );
672  if ( c > 1 ) push_back( cp[1] );
673  }
674  //--------------------------------------------------------------------------
675 #if MYGUI_IS_NATIVE_WCHAR_T
676  void UString::push_back( wchar_t val )
677  {
678  // we do this because the Unicode method still preserves UTF-16 code points
679  mData.push_back( static_cast<code_point>( val ) );
680  }
681 #endif
682  //--------------------------------------------------------------------------
684  {
685  mData.push_back( val );
686  }
687 
688  void UString::push_back( char val )
689  {
690  mData.push_back( static_cast<code_point>( val ) );
691  }
692 
694  {
695  const_iterator i, ie = end();
696  for ( i = begin(); i != ie; i.moveNext() ) {
697  if ( i.getCharacter() == ch )
698  return true;
699  }
700  return false;
701  }
702 
703  const std::string& UString::asUTF8() const
704  {
705  _load_buffer_UTF8();
706  return *m_buffer.mStrBuffer;
707  }
708 
709  const char* UString::asUTF8_c_str() const
710  {
711  _load_buffer_UTF8();
712  return m_buffer.mStrBuffer->c_str();
713  }
714 
716  {
717  _load_buffer_UTF32();
718  return *m_buffer.mUTF32StrBuffer;
719  }
720 
722  {
723  _load_buffer_UTF32();
724  return m_buffer.mUTF32StrBuffer->c_str();
725  }
726 
727  const std::wstring& UString::asWStr() const
728  {
729  _load_buffer_WStr();
730  return *m_buffer.mWStrBuffer;
731  }
732 
733  const wchar_t* UString::asWStr_c_str() const
734  {
735  _load_buffer_WStr();
736  return m_buffer.mWStrBuffer->c_str();
737  }
738 
740  {
741  return mData.at( loc );
742  }
743 
745  {
746  return mData.at( loc );
747  }
748 
750  {
751  const code_point* ptr = c_str();
752  unicode_char uc;
753  size_t l = _utf16_char_length( ptr[loc] );
754  code_point cp[2] = { /* blame the code beautifier */
755  0, 0
756  };
757  cp[0] = ptr[loc];
758 
759  if ( l == 2 && ( loc + 1 ) < mData.length() ) {
760  cp[1] = ptr[loc+1];
761  }
762  _utf16_to_utf32( cp, uc );
763  return uc;
764  }
765 
767  {
768  code_point cp[2] = { /* blame the code beautifier */
769  0, 0
770  };
771  size_t l = _utf32_to_utf16( ch, cp );
772  unicode_char existingChar = getChar( loc );
773  size_t existingSize = _utf16_char_length( existingChar );
774  size_t newSize = _utf16_char_length( ch );
775 
776  if ( newSize > existingSize ) {
777  at( loc ) = cp[0];
778  insert( loc + 1, 1, cp[1] );
779  return 1;
780  }
781  if ( newSize < existingSize ) {
782  erase( loc, 1 );
783  at( loc ) = cp[0];
784  return -1;
785  }
786 
787  // newSize == existingSize
788  at( loc ) = cp[0];
789  if ( l == 2 ) at( loc + 1 ) = cp[1];
790  return 0;
791  }
792 
794  {
795  iterator i;
796  i.mIter = mData.begin();
797  i.mString = this;
798  return i;
799  }
800 
802  {
803  const_iterator i;
804  i.mIter = const_cast<UString*>( this )->mData.begin();
805  i.mString = const_cast<UString*>( this );
806  return i;
807  }
808 
810  {
811  iterator i;
812  i.mIter = mData.end();
813  i.mString = this;
814  return i;
815  }
816 
818  {
819  const_iterator i;
820  i.mIter = const_cast<UString*>( this )->mData.end();
821  i.mString = const_cast<UString*>( this );
822  return i;
823  }
824 
826  {
828  i.mIter = mData.end();
829  i.mString = this;
830  return i;
831  }
832 
834  {
836  i.mIter = const_cast<UString*>( this )->mData.end();
837  i.mString = const_cast<UString*>( this );
838  return i;
839  }
840 
842  {
844  i.mIter = mData.begin();
845  i.mString = this;
846  return i;
847  }
848 
850  {
852  i.mIter = const_cast<UString*>( this )->mData.begin();
853  i.mString = const_cast<UString*>( this );
854  return i;
855  }
856 
858  {
859  mData.assign( start.mIter, end.mIter );
860  return *this;
861  }
862 
864  {
865  mData.assign( str.mData );
866  return *this;
867  }
868 
870  {
871  mData.assign( str );
872  return *this;
873  }
874 
876  {
877  mData.assign( str, num );
878  return *this;
879  }
880 
882  {
883  mData.assign( str.mData, index, len );
884  return *this;
885  }
886 
888  {
889  mData.assign( num, ch );
890  return *this;
891  }
892 
893  UString& UString::assign( const std::wstring& wstr )
894  {
895  mData.clear();
896  mData.reserve( wstr.length() ); // best guess bulk allocate
897 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
898  code_point tmp;
899  std::wstring::const_iterator i, ie = wstr.end();
900  for ( i = wstr.begin(); i != ie; i++ ) {
901  tmp = static_cast<code_point>( *i );
902  mData.push_back( tmp );
903  }
904 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
905  code_point cp[3] = {0, 0, 0};
906  unicode_char tmp;
907  std::wstring::const_iterator i, ie = wstr.end();
908  for ( i = wstr.begin(); i != ie; i++ ) {
909  tmp = static_cast<unicode_char>( *i );
910  size_t l = _utf32_to_utf16( tmp, cp );
911  if ( l > 0 ) mData.push_back( cp[0] );
912  if ( l > 1 ) mData.push_back( cp[1] );
913  }
914 #endif
915  return *this;
916  }
917 
918 #if MYGUI_IS_NATIVE_WCHAR_T
919  UString& UString::assign( const wchar_t* w_str )
920  {
921  std::wstring tmp;
922  tmp.assign( w_str );
923  return assign( tmp );
924  }
925 
926  UString& UString::assign( const wchar_t* w_str, size_type num )
927  {
928  std::wstring tmp;
929  tmp.assign( w_str, num );
930  return assign( tmp );
931  }
932 #endif
933 
934  UString& UString::assign( const std::string& str )
935  {
936  size_type len = _verifyUTF8( str );
937  clear(); // empty our contents, if there are any
938  reserve( len ); // best guess bulk capacity growth
939 
940  // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
941  // then converting it to UTF-16, then finally appending the data buffer
942 
943  unicode_char uc; // temporary Unicode character buffer
944  unsigned char utf8buf[7]; // temporary UTF-8 buffer
945  utf8buf[6] = 0;
946  size_t utf8len; // UTF-8 length
947  code_point utf16buff[3]; // temporary UTF-16 buffer
948  utf16buff[2] = 0;
949  size_t utf16len; // UTF-16 length
950 
951  std::string::const_iterator i, ie = str.end();
952  for ( i = str.begin(); i != ie; i++ ) {
953  utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
954  for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
955  utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
956  }
957  utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
958  utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
959  i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
960 
961  utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
962  append( utf16buff, utf16len ); // append the characters to the string
963  }
964  return *this;
965  }
966 
968  {
969  for (const auto& character : str)
970  {
971  push_back(character);
972  }
973  return *this;
974  }
975 
976  UString& UString::assign( const char* c_str )
977  {
978  std::string tmp( c_str );
979  return assign( tmp );
980  }
981 
982  UString& UString::assign( const char* c_str, size_type num )
983  {
984  std::string tmp;
985  tmp.assign( c_str, num );
986  return assign( tmp );
987  }
988 
990  {
991  mData.append( str.mData );
992  return *this;
993  }
994 
996  {
997  mData.append( str );
998  return *this;
999  }
1000 
1002  {
1003  mData.append( str.mData, index, len );
1004  return *this;
1005  }
1006 
1008  {
1009  mData.append( str, num );
1010  return *this;
1011  }
1012 
1014  {
1015  mData.append( num, ch );
1016  return *this;
1017  }
1018 
1020  {
1021  mData.append( start.mIter, end.mIter );
1022  return *this;
1023  }
1024 
1025 #if MYGUI_IS_NATIVE_WCHAR_T
1026  UString& UString::append( const wchar_t* w_str, size_type num )
1027  {
1028  std::wstring tmp( w_str, num );
1029  return append( tmp );
1030  }
1031 
1032  UString& UString::append( size_type num, wchar_t ch )
1033  {
1034  return append( num, static_cast<unicode_char>( ch ) );
1035  }
1036 #endif
1038  {
1039  UString tmp( c_str, num );
1040  append( tmp );
1041  return *this;
1042  }
1043 
1045  {
1046  append( num, static_cast<code_point>( ch ) );
1047  return *this;
1048  }
1049 
1051  {
1052  code_point cp[2] = {0, 0};
1053  if ( _utf32_to_utf16( ch, cp ) == 2 ) {
1054  for ( size_type i = 0; i < num; i++ ) {
1055  append( 1, cp[0] );
1056  append( 1, cp[1] );
1057  }
1058  } else {
1059  for ( size_type i = 0; i < num; i++ ) {
1060  append( 1, cp[0] );
1061  }
1062  }
1063  return *this;
1064  }
1065 
1067  {
1068  iterator ret;
1069  ret.mIter = mData.insert( i.mIter, ch );
1070  ret.mString = this;
1071  return ret;
1072  }
1073 
1075  {
1076  mData.insert( index, str.mData );
1077  return *this;
1078  }
1079 
1080  UString& UString::insert( size_type index1, const UString& str, size_type index2, size_type num )
1081  {
1082  mData.insert( index1, str.mData, index2, num );
1083  return *this;
1084  }
1085 
1087  {
1088  mData.insert( i.mIter, start.mIter, end.mIter );
1089  }
1090 
1092  {
1093  mData.insert( index, str, num );
1094  return *this;
1095  }
1096 
1097 #if MYGUI_IS_NATIVE_WCHAR_T
1098  UString& UString::insert( size_type index, const wchar_t* w_str, size_type num )
1099  {
1100  UString tmp( w_str, num );
1101  insert( index, tmp );
1102  return *this;
1103  }
1104 #endif
1105 
1106  UString& UString::insert( size_type index, const char* c_str, size_type num )
1107  {
1108  UString tmp( c_str, num );
1109  insert( index, tmp );
1110  return *this;
1111  }
1112 
1114  {
1115  mData.insert( index, num, ch );
1116  return *this;
1117  }
1118 
1119 #if MYGUI_IS_NATIVE_WCHAR_T
1120  UString& UString::insert( size_type index, size_type num, wchar_t ch )
1121  {
1122  insert( index, num, static_cast<unicode_char>( ch ) );
1123  return *this;
1124  }
1125 #endif
1126 
1127  UString& UString::insert( size_type index, size_type num, char ch )
1128  {
1129  insert( index, num, static_cast<code_point>( ch ) );
1130  return *this;
1131  }
1132 
1134  {
1135  code_point cp[3] = {0, 0, 0};
1136  size_t l = _utf32_to_utf16( ch, cp );
1137  if ( l == 1 ) {
1138  return insert( index, num, cp[0] );
1139  }
1140  for ( size_type c = 0; c < num; c++ ) {
1141  // insert in reverse order to preserve ordering after insert
1142  insert( index, 1, cp[1] );
1143  insert( index, 1, cp[0] );
1144  }
1145  return *this;
1146  }
1147 
1148  void UString::insert( iterator i, size_type num, const code_point& ch )
1149  {
1150  mData.insert( i.mIter, num, ch );
1151  }
1152 #if MYGUI_IS_NATIVE_WCHAR_T
1153  void UString::insert( iterator i, size_type num, const wchar_t& ch )
1154  {
1155  insert( i, num, static_cast<unicode_char>( ch ) );
1156  }
1157 #endif
1158 
1159  void UString::insert( iterator i, size_type num, const char& ch )
1160  {
1161  insert( i, num, static_cast<code_point>( ch ) );
1162  }
1163 
1165  {
1166  code_point cp[3] = {0, 0, 0};
1167  size_t l = _utf32_to_utf16( ch, cp );
1168  if ( l == 1 ) {
1169  insert( i, num, cp[0] );
1170  } else {
1171  for ( size_type c = 0; c < num; c++ ) {
1172  // insert in reverse order to preserve ordering after insert
1173  insert( i, 1, cp[1] );
1174  insert( i, 1, cp[0] );
1175  }
1176  }
1177  }
1178 
1180  {
1181  iterator ret;
1182  ret.mIter = mData.erase( loc.mIter );
1183  ret.mString = this;
1184  return ret;
1185  }
1186 
1188  {
1189  iterator ret;
1190  ret.mIter = mData.erase( start.mIter, end.mIter );
1191  ret.mString = this;
1192  return ret;
1193  }
1194 
1195  UString& UString::erase( size_type index /*= 0*/, size_type num /*= npos */ )
1196  {
1197  if ( num == npos )
1198  mData.erase( index );
1199  else
1200  mData.erase( index, num );
1201  return *this;
1202  }
1203 
1204  UString& UString::replace( size_type index1, size_type num1, const UString& str )
1205  {
1206  mData.replace( index1, num1, str.mData, 0, npos );
1207  return *this;
1208  }
1209 
1210  UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type num2 )
1211  {
1212  mData.replace( index1, num1, str.mData, 0, num2 );
1213  return *this;
1214  }
1215 
1216  UString& UString::replace( size_type index1, size_type num1, const UString& str, size_type index2, size_type num2 )
1217  {
1218  mData.replace( index1, num1, str.mData, index2, num2 );
1219  return *this;
1220  }
1221 
1222  UString& UString::replace( iterator start, iterator end, const UString& str, size_type num /*= npos */ )
1223  {
1224  _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1225 
1226  size_type index1 = begin() - st;
1227  size_type num1 = end - st;
1228  return replace( index1, num1, str, 0, num );
1229  }
1230 
1232  {
1233  mData.replace( index, num1, num2, ch );
1234  return *this;
1235  }
1236 
1238  {
1239  _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
1240 
1241  size_type index1 = begin() - st;
1242  size_type num1 = end - st;
1243  return replace( index1, num1, num, ch );
1244  }
1245 
1246  int UString::compare( const UString& str ) const
1247  {
1248  return mData.compare( str.mData );
1249  }
1250 
1251  int UString::compare( const code_point* str ) const
1252  {
1253  return mData.compare( str );
1254  }
1255 
1256  int UString::compare( size_type index, size_type length, const UString& str ) const
1257  {
1258  return mData.compare( index, length, str.mData );
1259  }
1260 
1261  int UString::compare( size_type index, size_type length, const UString& str, size_type index2, size_type length2 ) const
1262  {
1263  return mData.compare( index, length, str.mData, index2, length2 );
1264  }
1265 
1266  int UString::compare( size_type index, size_type length, const code_point* str, size_type length2 ) const
1267  {
1268  return mData.compare( index, length, str, length2 );
1269  }
1270 
1271 #if MYGUI_IS_NATIVE_WCHAR_T
1272  int UString::compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const
1273  {
1274  UString tmp( w_str, length2 );
1275  return compare( index, length, tmp );
1276  }
1277 #endif
1278 
1279  int UString::compare( size_type index, size_type length, const char* c_str, size_type length2 ) const
1280  {
1281  UString tmp( c_str, length2 );
1282  return compare( index, length, tmp );
1283  }
1284 
1285  UString::size_type UString::find( const UString& str, size_type index /*= 0 */ ) const
1286  {
1287  return mData.find( str.c_str(), index );
1288  }
1289 
1291  {
1292  UString tmp( cp_str );
1293  return mData.find( tmp.c_str(), index, length );
1294  }
1295 
1297  {
1298  UString tmp( c_str );
1299  return mData.find( tmp.c_str(), index, length );
1300  }
1301 
1302 #if MYGUI_IS_NATIVE_WCHAR_T
1303  UString::size_type UString::find( const wchar_t* w_str, size_type index, size_type length ) const
1304  {
1305  UString tmp( w_str );
1306  return mData.find( tmp.c_str(), index, length );
1307  }
1308 #endif
1309 
1310  UString::size_type UString::find( char ch, size_type index /*= 0 */ ) const
1311  {
1312  return find( static_cast<code_point>( ch ), index );
1313  }
1314 
1316  {
1317  return mData.find( ch, index );
1318  }
1319 
1320 #if MYGUI_IS_NATIVE_WCHAR_T
1321  UString::size_type UString::find( wchar_t ch, size_type index /*= 0 */ ) const
1322  {
1323  return find( static_cast<unicode_char>( ch ), index );
1324  }
1325 #endif
1326 
1328  {
1329  code_point cp[3] = {0, 0, 0};
1330  size_t l = _utf32_to_utf16( ch, cp );
1331  return find( UString( cp, l ), index );
1332  }
1333 
1334  UString::size_type UString::rfind( const UString& str, size_type index /*= 0 */ ) const
1335  {
1336  return mData.rfind( str.c_str(), index );
1337  }
1338 
1340  {
1341  UString tmp( cp_str );
1342  return mData.rfind( tmp.c_str(), index, num );
1343  }
1344 
1345  UString::size_type UString::rfind( const char* c_str, size_type index, size_type num ) const
1346  {
1347  UString tmp( c_str );
1348  return mData.rfind( tmp.c_str(), index, num );
1349  }
1350 
1351 #if MYGUI_IS_NATIVE_WCHAR_T
1352  UString::size_type UString::rfind( const wchar_t* w_str, size_type index, size_type num ) const
1353  {
1354  UString tmp( w_str );
1355  return mData.rfind( tmp.c_str(), index, num );
1356  }
1357 #endif
1358 
1359  UString::size_type UString::rfind( char ch, size_type index /*= 0 */ ) const
1360  {
1361  return rfind( static_cast<code_point>( ch ), index );
1362  }
1363 
1365  {
1366  return mData.rfind( ch, index );
1367  }
1368 
1369 #if MYGUI_IS_NATIVE_WCHAR_T
1370  UString::size_type UString::rfind( wchar_t ch, size_type index /*= 0 */ ) const
1371  {
1372  return rfind( static_cast<unicode_char>( ch ), index );
1373  }
1374 #endif
1375 
1377  {
1378  code_point cp[3] = {0, 0, 0};
1379  size_t l = _utf32_to_utf16( ch, cp );
1380  return rfind( UString( cp, l ), index );
1381  }
1382 
1383  UString::size_type UString::find_first_of( const UString &str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1384  {
1385  size_type i = 0;
1386  const size_type len = length();
1387  while ( i < num && ( index + i ) < len ) {
1388  unicode_char ch = getChar( index + i );
1389  if ( str.inString( ch ) )
1390  return index + i;
1391  i += _utf16_char_length( ch ); // increment by the Unicode character length
1392  }
1393  return npos;
1394  }
1395 
1397  {
1398  UString tmp;
1399  tmp.assign( 1, ch );
1400  return find_first_of( tmp, index );
1401  }
1402 
1403  UString::size_type UString::find_first_of( char ch, size_type index /*= 0 */ ) const
1404  {
1405  return find_first_of( static_cast<code_point>( ch ), index );
1406  }
1407 
1408 #if MYGUI_IS_NATIVE_WCHAR_T
1409  UString::size_type UString::find_first_of( wchar_t ch, size_type index /*= 0 */ ) const
1410  {
1411  return find_first_of( static_cast<unicode_char>( ch ), index );
1412  }
1413 #endif
1414 
1416  {
1417  code_point cp[3] = {0, 0, 0};
1418  size_t l = _utf32_to_utf16( ch, cp );
1419  return find_first_of( UString( cp, l ), index );
1420  }
1421 
1422  UString::size_type UString::find_first_not_of( const UString& str, size_type index /*= 0*/, size_type num /*= npos */ ) const
1423  {
1424  size_type i = 0;
1425  const size_type len = length();
1426  while ( i < num && ( index + i ) < len ) {
1427  unicode_char ch = getChar( index + i );
1428  if ( !str.inString( ch ) )
1429  return index + i;
1430  i += _utf16_char_length( ch ); // increment by the Unicode character length
1431  }
1432  return npos;
1433  }
1434 
1436  {
1437  UString tmp;
1438  tmp.assign( 1, ch );
1439  return find_first_not_of( tmp, index );
1440  }
1441 
1443  {
1444  return find_first_not_of( static_cast<code_point>( ch ), index );
1445  }
1446 
1447 #if MYGUI_IS_NATIVE_WCHAR_T
1448  UString::size_type UString::find_first_not_of( wchar_t ch, size_type index /*= 0 */ ) const
1449  {
1450  return find_first_not_of( static_cast<unicode_char>( ch ), index );
1451  }
1452 #endif
1453 
1455  {
1456  code_point cp[3] = {0, 0, 0};
1457  size_t l = _utf32_to_utf16( ch, cp );
1458  return find_first_not_of( UString( cp, l ), index );
1459  }
1460 
1461  UString::size_type UString::find_last_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1462  {
1463  size_type i = 0;
1464  const size_type len = length();
1465  if ( index > len ) index = len - 1;
1466 
1467  while ( i < num && ( index - i ) != npos ) {
1468  size_type j = index - i;
1469  // careful to step full Unicode characters
1470  if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1471  j = index - ++i;
1472  }
1473  // and back to the usual dull test
1474  unicode_char ch = getChar( j );
1475  if ( str.inString( ch ) )
1476  return j;
1477  i++;
1478  }
1479  return npos;
1480  }
1481 
1483  {
1484  UString tmp;
1485  tmp.assign( 1, ch );
1486  return find_last_of( tmp, index );
1487  }
1488 
1489 #if MYGUI_IS_NATIVE_WCHAR_T
1490  UString::size_type UString::find_last_of( wchar_t ch, size_type index /*= npos */ ) const
1491  {
1492  return find_last_of( static_cast<unicode_char>( ch ), index );
1493  }
1494 #endif
1495 
1497  {
1498  code_point cp[3] = {0, 0, 0};
1499  size_t l = _utf32_to_utf16( ch, cp );
1500  return find_last_of( UString( cp, l ), index );
1501  }
1502 
1503  UString::size_type UString::find_last_not_of( const UString& str, size_type index /*= npos*/, size_type num /*= npos */ ) const
1504  {
1505  size_type i = 0;
1506  const size_type len = length();
1507  if ( index > len ) index = len - 1;
1508 
1509  while ( i < num && ( index - i ) != npos ) {
1510  size_type j = index - i;
1511  // careful to step full Unicode characters
1512  if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
1513  j = index - ++i;
1514  }
1515  // and back to the usual dull test
1516  unicode_char ch = getChar( j );
1517  if ( !str.inString( ch ) )
1518  return j;
1519  i++;
1520  }
1521  return npos;
1522  }
1523 
1525  {
1526  UString tmp;
1527  tmp.assign( 1, ch );
1528  return find_last_not_of( tmp, index );
1529  }
1530 
1531  UString::size_type UString::find_last_not_of( char ch, size_type index /*= npos */ ) const
1532  {
1533  return find_last_not_of( static_cast<code_point>( ch ), index );
1534  }
1535 
1536 #if MYGUI_IS_NATIVE_WCHAR_T
1537  UString::size_type UString::find_last_not_of( wchar_t ch, size_type index /*= npos */ ) const
1538  {
1539  return find_last_not_of( static_cast<unicode_char>( ch ), index );
1540  }
1541 #endif
1542 
1544  {
1545  code_point cp[3] = {0, 0, 0};
1546  size_t l = _utf32_to_utf16( ch, cp );
1547  return find_last_not_of( UString( cp, l ), index );
1548  }
1549 
1550  bool UString::operator<( const UString& right ) const
1551  {
1552  return compare( right ) < 0;
1553  }
1554 
1555  bool UString::operator<=( const UString& right ) const
1556  {
1557  return compare( right ) <= 0;
1558  }
1559 
1561  {
1562  return assign( s );
1563  }
1564 
1566  {
1567  clear();
1568  return append( 1, ch );
1569  }
1570 
1572  {
1573  clear();
1574  return append( 1, ch );
1575  }
1576 
1577 #if MYGUI_IS_NATIVE_WCHAR_T
1578  UString& UString::operator=( wchar_t ch )
1579  {
1580  clear();
1581  return append( 1, ch );
1582  }
1583 #endif
1584 
1586  {
1587  clear();
1588  return append( 1, ch );
1589  }
1590 
1591  bool UString::operator>( const UString& right ) const
1592  {
1593  return compare( right ) > 0;
1594  }
1595 
1596  bool UString::operator>=( const UString& right ) const
1597  {
1598  return compare( right ) >= 0;
1599  }
1600 
1601  bool UString::operator==( const UString& right ) const
1602  {
1603  return compare( right ) == 0;
1604  }
1605 
1606  bool UString::operator!=( const UString& right ) const
1607  {
1608  return !operator==( right );
1609  }
1610 
1612  {
1613  return at( index );
1614  }
1615 
1617  {
1618  return at( index );
1619  }
1620 
1621  UString::operator std::string() const
1622  {
1623  return std::string( asUTF8() );
1624  }
1625 
1627  UString::operator std::wstring() const
1628  {
1629  return std::wstring( asWStr() );
1630  }
1631 
1632 
1634  {
1635  if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
1636  return false; // it matches a surrogate pair signature
1637  return true; // everything else is a standalone code point
1638  }
1639 
1641  {
1642  if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
1643  return true; // it is a 1st word
1644  return false; // it isn't
1645  }
1646 
1648  {
1649  if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
1650  return true; // it is a 2nd word
1651  return false; // everything else isn't
1652  }
1653 
1655  {
1656  if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
1657  return 2; // if it is, then we are 2 words long
1658  return 1; // otherwise we are only 1 word long
1659  }
1660 
1662  {
1663  if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
1664  return 2; // if so, we need a surrogate pair
1665  return 1; // otherwise we can stuff it into a single word
1666  }
1667 
1668  size_t UString::_utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc )
1669  {
1670  const code_point& cp1 = in_cp[0];
1671  const code_point& cp2 = in_cp[1];
1672  bool wordPair = false;
1673 
1674  // does it look like a surrogate pair?
1675  if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1676  // looks like one, but does the other half match the algorithm as well?
1677  if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1678  wordPair = true; // yep!
1679  }
1680 
1681  if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
1682  out_uc = cp1;
1683  return 1;
1684  }
1685 
1686  unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
1687  cU -= 0xD800; // remove the encoding markers
1688  cL -= 0xDC00;
1689 
1690  out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
1691  out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
1692  out_uc += 0x10000; // add back in the value offset
1693 
1694  return 2; // this whole operation takes to words, so that's what we'll return
1695  }
1696 
1697  size_t UString::_utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] )
1698  {
1699  if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
1700  out_cp[0] = static_cast<code_point>(in_uc);
1701  return 1;
1702  }
1703  unicode_char uc = in_uc; // copy to writable buffer
1704  unsigned short tmp; // single code point buffer
1705  uc -= 0x10000; // subtract value offset
1706 
1707  //process upper word
1708  tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF); // grab the upper 10 bits
1709  tmp += 0xD800; // add encoding offset
1710  out_cp[0] = tmp; // write
1711 
1712  // process lower word
1713  tmp = static_cast<unsigned short>(uc & 0x03FF); // grab the lower 10 bits
1714  tmp += 0xDC00; // add encoding offset
1715  out_cp[1] = tmp; // write
1716 
1717  return 2; // return used word count (2 for surrogate pairs)
1718  }
1719 
1720  bool UString::_utf8_start_char( unsigned char cp )
1721  {
1722  return ( cp & ~_cont_mask ) != _cont;
1723  }
1724 
1725  size_t UString::_utf8_char_length( unsigned char cp )
1726  {
1727  if ( !( cp & 0x80 ) ) return 1;
1728  if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
1729  if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
1730  if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
1731  if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
1732  if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
1733 
1734  return 1;
1735  //throw invalid_data( "invalid UTF-8 sequence header value" );
1736  }
1737 
1739  {
1740  /*
1741  7 bit: U-00000000 - U-0000007F: 0xxxxxxx
1742  11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
1743  16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
1744  21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
1745  26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1746  31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
1747  */
1748  if ( !( uc & ~0x0000007F ) ) return 1;
1749  if ( !( uc & ~0x000007FF ) ) return 2;
1750  if ( !( uc & ~0x0000FFFF ) ) return 3;
1751  if ( !( uc & ~0x001FFFFF ) ) return 4;
1752  if ( !( uc & ~0x03FFFFFF ) ) return 5;
1753  if ( !( uc & ~0x7FFFFFFF ) ) return 6;
1754 
1755  return 1;
1756  //throw invalid_data( "invalid UTF-32 value" );
1757  }
1758 
1759  size_t UString::_utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc )
1760  {
1761  size_t len = _utf8_char_length( in_cp[0] );
1762  if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
1763  out_uc = in_cp[0];
1764  return 1;
1765  }
1766 
1767  unicode_char c = 0; // temporary buffer
1768  size_t i = 0;
1769  switch ( len ) { // load header byte
1770  case 6:
1771  c = in_cp[i] & _lead5_mask;
1772  break;
1773  case 5:
1774  c = in_cp[i] & _lead4_mask;
1775  break;
1776  case 4:
1777  c = in_cp[i] & _lead3_mask;
1778  break;
1779  case 3:
1780  c = in_cp[i] & _lead2_mask;
1781  break;
1782  case 2:
1783  c = in_cp[i] & _lead1_mask;
1784  break;
1785  }
1786 
1787  // load each continuation byte
1788  for ( ++i; i < len; i++ )
1789  {
1790  if (( in_cp[i] & ~_cont_mask ) != _cont )
1791  {
1792  //throw invalid_data( "bad UTF-8 continuation byte" );
1793  out_uc = in_cp[0];
1794  return 1;
1795  }
1796  c <<= 6;
1797  c |= ( in_cp[i] & _cont_mask );
1798  }
1799 
1800  out_uc = c; // write the final value and return the used byte length
1801  return len;
1802  }
1803 
1804  size_t UString::_utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] )
1805  {
1806  size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
1807  unicode_char c = in_uc; // copy to temp buffer
1808 
1809  //stuff all of the lower bits
1810  for ( size_t i = len - 1; i > 0; i-- ) {
1811  out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1812  c >>= 6;
1813  }
1814 
1815  //now write the header byte
1816  switch ( len ) {
1817  case 6:
1818  out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1819  break;
1820  case 5:
1821  out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1822  break;
1823  case 4:
1824  out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1825  break;
1826  case 3:
1827  out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1828  break;
1829  case 2:
1830  out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1831  break;
1832  case 1:
1833  default:
1834  out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
1835  break;
1836  }
1837 
1838  // return the byte length of the sequence
1839  return len;
1840  }
1841 
1843  {
1844  std::string tmp( reinterpret_cast<const char*>( c_str ) );
1845  return _verifyUTF8( tmp );
1846  }
1847 
1848  UString::size_type UString::_verifyUTF8( const std::string& str )
1849  {
1850  std::string::const_iterator i, ie = str.end();
1851  i = str.begin();
1852  size_type length = 0;
1853 
1854  while ( i != ie ) {
1855  // characters pass until we find an extended sequence
1856  if (( *i ) & 0x80 ) {
1857  unsigned char c = ( *i );
1858  size_t contBytes = 0;
1859 
1860  // get continuation byte count and test for overlong sequences
1861  if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
1862  if ( c == _lead1 )
1863  {
1864  //throw invalid_data( "overlong UTF-8 sequence" );
1865  return str.size();
1866  }
1867  contBytes = 1;
1868 
1869  } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
1870  contBytes = 2;
1871  if ( c == _lead2 ) { // possible overlong UTF-8 sequence
1872  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1873  if (( c & _lead2 ) == _cont )
1874  {
1875  //throw invalid_data( "overlong UTF-8 sequence" );
1876  return str.size();
1877  }
1878  }
1879 
1880  } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
1881  contBytes = 3;
1882  if ( c == _lead3 ) { // possible overlong UTF-8 sequence
1883  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1884  if (( c & _lead3 ) == _cont )
1885  {
1886  //throw invalid_data( "overlong UTF-8 sequence" );
1887  return str.size();
1888  }
1889  }
1890 
1891  } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
1892  contBytes = 4;
1893  if ( c == _lead4 ) { // possible overlong UTF-8 sequence
1894  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1895  if (( c & _lead4 ) == _cont )
1896  {
1897  //throw invalid_data( "overlong UTF-8 sequence" );
1898  return str.size();
1899  }
1900  }
1901 
1902  } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
1903  contBytes = 5;
1904  if ( c == _lead5 ) { // possible overlong UTF-8 sequence
1905  c = ( *( i + 1 ) ); // look ahead to next byte in sequence
1906  if (( c & _lead5 ) == _cont )
1907  {
1908  //throw invalid_data( "overlong UTF-8 sequence" );
1909  return str.size();
1910  }
1911  }
1912  }
1913 
1914  // check remaining continuation bytes for
1915  while ( contBytes-- ) {
1916  c = ( *( ++i ) ); // get next byte in sequence
1917  if (( c & ~_cont_mask ) != _cont )
1918  {
1919  //throw invalid_data( "bad UTF-8 continuation byte" );
1920  return str.size();
1921  }
1922  }
1923  }
1924  length++;
1925  i++;
1926  }
1927  return length;
1928  }
1929 
1930  void UString::_init()
1931  {
1932  m_buffer.mVoidBuffer = nullptr;
1933  m_bufferType = bt_none;
1934  m_bufferSize = 0;
1935  }
1936 
1937  void UString::_cleanBuffer() const
1938  {
1939  if ( m_buffer.mVoidBuffer != nullptr ) {
1940  switch ( m_bufferType ) {
1941  case bt_string:
1942  delete m_buffer.mStrBuffer;
1943  break;
1944  case bt_wstring:
1945  delete m_buffer.mWStrBuffer;
1946  break;
1947  case bt_utf32string:
1948  delete m_buffer.mUTF32StrBuffer;
1949  break;
1950  case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
1951  //delete m_buffer.mVoidBuffer;
1952  // delete void* is undefined, don't do that
1953  assert("This should never happen - mVoidBuffer should never contain something if we "
1954  "don't know the type");
1955  break;
1956  }
1957  m_buffer.mVoidBuffer = nullptr;
1958  m_bufferSize = 0;
1959  m_bufferType = bt_none;
1960  }
1961  }
1962 
1963  void UString::_getBufferStr() const
1964  {
1965  if ( m_bufferType != bt_string ) {
1966  _cleanBuffer();
1967  m_buffer.mStrBuffer = new std::string();
1968  m_bufferType = bt_string;
1969  }
1970  m_buffer.mStrBuffer->clear();
1971  }
1972 
1973  void UString::_getBufferWStr() const
1974  {
1975  if ( m_bufferType != bt_wstring ) {
1976  _cleanBuffer();
1977  m_buffer.mWStrBuffer = new std::wstring();
1978  m_bufferType = bt_wstring;
1979  }
1980  m_buffer.mWStrBuffer->clear();
1981  }
1982 
1983  void UString::_getBufferUTF32Str() const
1984  {
1985  if ( m_bufferType != bt_utf32string ) {
1986  _cleanBuffer();
1987  m_buffer.mUTF32StrBuffer = new utf32string();
1988  m_bufferType = bt_utf32string;
1989  }
1990  m_buffer.mUTF32StrBuffer->clear();
1991  }
1992 
1993  void UString::_load_buffer_UTF8() const
1994  {
1995  _getBufferStr();
1996  std::string& buffer = ( *m_buffer.mStrBuffer );
1997  buffer.reserve( length() );
1998 
1999  unsigned char utf8buf[6];
2000  char* charbuf = ( char* )utf8buf;
2001  unicode_char c;
2002  size_t len;
2003 
2004  const_iterator i, ie = end();
2005  for ( i = begin(); i != ie; i.moveNext() ) {
2006  c = i.getCharacter();
2007  len = _utf32_to_utf8( c, utf8buf );
2008  size_t j = 0;
2009  while ( j < len )
2010  buffer.push_back( charbuf[j++] );
2011  }
2012  }
2013 
2014  void UString::_load_buffer_WStr() const
2015  {
2016  _getBufferWStr();
2017  std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2018  buffer.reserve( length() ); // may over reserve, but should be close enough
2019 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2020  const_iterator i, ie = end();
2021  for ( i = begin(); i != ie; ++i ) {
2022  buffer.push_back(( wchar_t )( *i ) );
2023  }
2024 #else // wchar_t fits UTF-32
2025  unicode_char c;
2026  const_iterator i, ie = end();
2027  for ( i = begin(); i != ie; i.moveNext() ) {
2028  c = i.getCharacter();
2029  buffer.push_back(( wchar_t )c );
2030  }
2031 #endif
2032  }
2033 
2034  void UString::_load_buffer_UTF32() const
2035  {
2036  _getBufferUTF32Str();
2037  utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2038  buffer.reserve( length() ); // may over reserve, but should be close enough
2039 
2040  unicode_char c;
2041 
2042  const_iterator i, ie = end();
2043  for ( i = begin(); i != ie; i.moveNext() ) {
2044  c = i.getCharacter();
2045  buffer.push_back( c );
2046  }
2047  }
2048 
2049 } // namespace MyGUI
base iterator class for UString
int _setCharacter(unicode_char uc)
void _become(const _base_iterator &i)
void _jump_to(size_type index)
unicode_char _getCharacter() const
const forward iterator for UString
_const_fwd_iterator & operator=(const _const_fwd_iterator &i)
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_fwd_iterator & operator++()
pre-increment
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator & operator--()
pre-decrement
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
const value_type & operator*() const
dereference operator
const reverse iterator for UString
_const_rev_iterator operator-(difference_type n)
subtraction operator
_const_rev_iterator operator+(difference_type n)
addition operator
_const_rev_iterator & operator++()
pre-increment
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
const value_type & operator[](difference_type n) const
dereference at offset operator
_const_rev_iterator & operator--()
pre-decrement
const value_type & operator*() const
dereference operator
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
forward iterator for UString
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
_fwd_iterator & operator++()
pre-increment
_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator=(const _fwd_iterator &i)
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator operator+(difference_type n)
addition operator
value_type & operator*() const
dereference operator
_fwd_iterator & operator--()
pre-decrement
value_type & operator[](difference_type n) const
dereference at offset operator
forward iterator for UString
_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator & operator--()
pre-decrement
value_type & operator*() const
dereference operator
_rev_iterator & operator++()
pre-increment
_rev_iterator operator-(difference_type n)
subtraction operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
value_type & operator[](difference_type n) const
dereference at offset operator
_rev_iterator operator+(difference_type n)
addition operator
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
size_type length() const
Returns the number of code points in the current string.
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
bool operator>(const UString &right) const
greater than operator
size_type size() const
Returns the number of code points in the current string.
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
const code_point * data() const
returns a pointer to the first character in the current string
UString()
default constructor, creates an empty string
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
bool operator==(const UString &right) const
equality operator
bool operator!=(const UString &right) const
inequality operator
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
bool operator>=(const UString &right) const
greater than or equal operator
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
void reserve(size_type size)
sets the capacity of the string to at least size code points
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
void clear()
deletes all of the elements in the string
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
~UString()
destructor
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int compare(const UString &str) const
compare str to the current string
code_point value_type
value type typedef for use in iterators
bool operator<=(const UString &right) const
less than or equal operator
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
std::basic_string< code_point > dstring
bool operator<(const UString &right) const
less than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
uint16 code_point
a single UTF-16 code point
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
iterator end()
returns an iterator just past the end of the string
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
static const size_type npos
the usual constant representing: not found, no limit, etc
uint32 unicode_char
a single 32-bit Unicode character
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
_fwd_iterator iterator
iterator
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
code_point & operator[](size_type index)
code point dereference operator
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
UString & append(const UString &str)
appends str on to the end of the current string
const code_point * c_str() const
returns a pointer to the first character in the current string
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
_const_fwd_iterator const_iterator
const iterator
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
size_t size_type
size type used to indicate string size and character positions within the string
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
size_type length_Characters() const
Returns the number of Unicode characters in the string.
void push_back(unicode_char val)
appends val to the end of the string
iterator begin()
returns an iterator to the first element of the string
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
void swap(UString &from)
exchanges the elements of the current string with those of from
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
bool empty() const
returns true if the string has no elements, false otherwise
float len(float x, float y)