13 // naming conventions:
14 // offset: raw byte offset (0 based)
15 // position: code point offset (0 based)
16 // index: code point offset (1 based or negative)
18 // function that will count the number of code points (utf-8 characters) from the given beginning to the given end
19 size_t code_point_count(const string& str, size_t start, size_t end) {
20 return utf8::distance(str.begin() + start, str.begin() + end);
23 size_t code_point_count(const string& str) {
24 return utf8::distance(str.begin(), str.end());
27 // function that will return the byte offset at a code point position
28 size_t offset_at_position(const string& str, size_t position) {
29 string::const_iterator it = str.begin();
30 utf8::advance(it, position, str.end());
31 return distance(str.begin(), it);
34 // function that returns number of bytes in a character at offset
35 size_t code_point_size_at_offset(const string& str, size_t offset) {
36 // get iterator from string and forward by offset
37 string::const_iterator stop = str.begin() + offset;
38 // check if beyond boundary
39 if (stop == str.end()) return 0;
40 // advance by one code point
41 utf8::advance(stop, 1, str.end());
42 // calculate offset for code point
43 return stop - str.begin() - offset;
46 // function that will return a normalized index, given a crazy one
47 size_t normalize_index(int index, size_t len) {
48 long signed_len = static_cast<long>(len);
49 // assuming the index is 1-based
50 // we are returning a 0-based index
51 if (index > 0 && index <= signed_len) {
52 // positive and within string length
55 else if (index > signed_len) {
56 // positive and past string length
59 else if (index == 0) {
62 else if (std::abs((double)index) <= signed_len) {
63 // negative and within string length
64 return index + signed_len;
67 // negative and past string length
77 // convert from utf16/wide string to utf8 string
78 string convert_from_utf16(const wstring& utf16)
81 // pre-allocate expected memory
82 utf8.reserve(sizeof(utf16)/2);
83 utf8::utf16to8(utf16.begin(), utf16.end(),
88 // convert from utf8 string to utf16/wide string
89 wstring convert_to_utf16(const string& utf8)
92 // pre-allocate expected memory
93 utf16.reserve(code_point_count(utf8)*2);
94 utf8::utf8to16(utf8.begin(), utf8.end(),
95 back_inserter(utf16));