Otclient  14/8/2020
string.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2020 OTClient <https://github.com/edubart/otclient>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20  * THE SOFTWARE.
21  */
22 
23 #include "string.h"
24 #include "format.h"
25 #include <boost/algorithm/string.hpp>
26 #include <cctype>
27 #include <physfs.h>
28 
29 #ifdef _MSC_VER
30  #pragma warning(disable:4267) // '?' : conversion from 'A' to 'B', possible loss of data
31 #endif
32 
33 namespace stdext {
34 
35 std::string resolve_path(const std::string& filePath, std::string sourcePath)
36 {
37  if(stdext::starts_with(filePath, "/"))
38  return filePath;
39  if(!stdext::ends_with(sourcePath, "/")) {
40  std::size_t slashPos = sourcePath.find_last_of("/");
41  if(slashPos == std::string::npos)
42  throw_exception(format("invalid source path '%s', for file '%s'", sourcePath, filePath));
43  sourcePath = sourcePath.substr(0, slashPos + 1);
44  }
45  return sourcePath + filePath;
46 }
47 
48 std::string date_time_string()
49 {
50  char date[32];
51  std::time_t tnow;
52  std::time(&tnow);
53  std::tm *ts = std::localtime(&tnow);
54  std::strftime(date, 32, "%b %d %Y %H:%M:%S", ts);
55  return std::string(date);
56 }
57 
58 std::string dec_to_hex(uint64_t num)
59 {
60  std::string str;
61  std::ostringstream o;
62  o << std::hex << num;
63  str = o.str();
64  return str;
65 }
66 
67 uint64_t hex_to_dec(const std::string& str)
68 {
69  uint64_t num;
70  std::istringstream i(str);
71  i >> std::hex >> num;
72  return num;
73 }
74 
75 bool is_valid_utf8(const std::string& src)
76 {
77  const unsigned char *bytes = (const unsigned char *)src.c_str();
78  while(*bytes) {
79  if( (// ASCII
80  // use bytes[0] <= 0x7F to allow ASCII control characters
81  bytes[0] == 0x09 ||
82  bytes[0] == 0x0A ||
83  bytes[0] == 0x0D ||
84  (0x20 <= bytes[0] && bytes[0] <= 0x7E)
85  )
86  ) {
87  bytes += 1;
88  continue;
89  }
90  if( (// non-overlong 2-byte
91  (0xC2 <= bytes[0] && bytes[0] <= 0xDF) &&
92  (0x80 <= bytes[1] && bytes[1] <= 0xBF)
93  )
94  ) {
95  bytes += 2;
96  continue;
97  }
98  if( (// excluding overlongs
99  bytes[0] == 0xE0 &&
100  (0xA0 <= bytes[1] && bytes[1] <= 0xBF) &&
101  (0x80 <= bytes[2] && bytes[2] <= 0xBF)
102  ) ||
103  (// straight 3-byte
104  ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) ||
105  bytes[0] == 0xEE ||
106  bytes[0] == 0xEF) &&
107  (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
108  (0x80 <= bytes[2] && bytes[2] <= 0xBF)
109  ) ||
110  (// excluding surrogates
111  bytes[0] == 0xED &&
112  (0x80 <= bytes[1] && bytes[1] <= 0x9F) &&
113  (0x80 <= bytes[2] && bytes[2] <= 0xBF)
114  )
115  ) {
116  bytes += 3;
117  continue;
118  }
119  if( (// planes 1-3
120  bytes[0] == 0xF0 &&
121  (0x90 <= bytes[1] && bytes[1] <= 0xBF) &&
122  (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
123  (0x80 <= bytes[3] && bytes[3] <= 0xBF)
124  ) ||
125  (// planes 4-15
126  (0xF1 <= bytes[0] && bytes[0] <= 0xF3) &&
127  (0x80 <= bytes[1] && bytes[1] <= 0xBF) &&
128  (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
129  (0x80 <= bytes[3] && bytes[3] <= 0xBF)
130  ) ||
131  (// plane 16
132  bytes[0] == 0xF4 &&
133  (0x80 <= bytes[1] && bytes[1] <= 0x8F) &&
134  (0x80 <= bytes[2] && bytes[2] <= 0xBF) &&
135  (0x80 <= bytes[3] && bytes[3] <= 0xBF)
136  )
137  ) {
138  bytes += 4;
139  continue;
140  }
141  return false;
142  }
143  return true;
144 }
145 
146 std::string utf8_to_latin1(const std::string& src)
147 {
148  std::string out;
149  for(uint i=0;i<src.length();) {
150  uchar c = src[i++];
151  if((c >= 32 && c < 128) || c == 0x0d || c == 0x0a || c == 0x09)
152  out += c;
153  else if(c == 0xc2 || c == 0xc3) {
154  uchar c2 = src[i++];
155  if(c == 0xc2) {
156  if(c2 > 0xa1 && c2 < 0xbb)
157  out += c2;
158  } else if(c == 0xc3)
159  out += 64 + c2;
160  } else if(c >= 0xc4 && c <= 0xdf)
161  i += 1;
162  else if(c >= 0xe0 && c <= 0xed)
163  i += 2;
164  else if(c >= 0xf0 && c <= 0xf4)
165  i += 3;
166  }
167  return out;
168 }
169 
170 std::string latin1_to_utf8(const std::string& src)
171 {
172  std::string out;
173  for(uchar c : src) {
174  if((c >= 32 && c < 128) || c == 0x0d || c == 0x0a || c == 0x09)
175  out += c;
176  else {
177  out += 0xc2 + (c > 0xbf);
178  out += 0x80 + (c & 0x3f);
179  }
180  }
181  return out;
182 }
183 
184 #ifdef WIN32
185 #include <winsock2.h>
186 #include <windows.h>
187 std::wstring utf8_to_utf16(const std::string& src)
188 {
189  std::wstring res;
190  wchar_t out[4096];
191  if(MultiByteToWideChar(CP_UTF8, 0, src.c_str(), -1, out, 4096))
192  res = out;
193  return res;
194 }
195 
196 std::string utf16_to_utf8(const std::wstring& src)
197 {
198  std::string res;
199  char out[4096];
200  if(WideCharToMultiByte(CP_UTF8, 0, src.c_str(), -1, out, 4096, NULL, NULL))
201  res = out;
202  return res;
203 }
204 
205 std::wstring latin1_to_utf16(const std::string& src)
206 {
207  return utf8_to_utf16(latin1_to_utf8(src));
208 }
209 
210 std::string utf16_to_latin1(const std::wstring& src)
211 {
212  return utf8_to_latin1(utf16_to_utf8(src));
213 }
214 #endif
215 
216 void tolower(std::string& str)
217 {
218  std::transform(str.begin(), str.end(), str.begin(), lochar);
219 }
220 
221 void toupper(std::string& str)
222 {
223  std::transform(str.begin(), str.end(), str.begin(), upchar);
224 }
225 
226 void trim(std::string& str)
227 {
228  boost::trim(str);
229 }
230 
231 char upchar(char c)
232 {
233  if((c >= 97 && c <= 122) || (uchar)c >= 224)
234  c -= 32;
235  return c;
236 }
237 
238 char lochar(char c)
239 {
240  if((c >= 65 && c <= 90) || ((uchar)c >= 192 && (uchar)c <= 223))
241  c += 32;
242  return c;
243 }
244 
245 void ucwords(std::string& str)
246 {
247  uint32 strLen = str.length();
248  if(strLen == 0)
249  return;
250 
251  str[0] = upchar(str[0]);
252  for(uint32 i = 1; i < strLen; ++i) {
253  if(str[i - 1] == ' ')
254  str[i] = upchar(str[i]);
255  }
256 }
257 
258 bool ends_with(const std::string& str, const std::string& test)
259 {
260  return boost::ends_with(str, test);
261 }
262 
263 bool starts_with(const std::string& str, const std::string& test)
264 {
265  return boost::starts_with(str, test);
266 }
267 
268 void replace_all(std::string& str, const std::string& search, const std::string& replacement)
269 {
270  return boost::replace_all(str, search, replacement);
271 }
272 
273 std::vector<std::string> split(const std::string& str, const std::string& separators)
274 {
275  std::vector<std::string> splitted;
276  boost::split(splitted, str, boost::is_any_of(std::string(separators)));
277  return splitted;
278 }
279 
280 }
stdext::latin1_to_utf8
std::string latin1_to_utf8(const std::string &src)
Definition: string.cpp:170
trim
#define trim(x)
Definition: lbitlib.cpp:193
string.h
stdext::ucwords
void ucwords(std::string &str)
Definition: string.cpp:245
uint32
uint32_t uint32
Definition: types.h:35
format.h
stdext::format
std::string format()
Definition: format.h:82
stdext::time
ticks_t time()
Definition: time.cpp:33
stdext::starts_with
bool starts_with(const std::string &str, const std::string &test)
Definition: string.cpp:263
stdext::resolve_path
std::string resolve_path(const std::string &filePath, std::string sourcePath)
Resolve a file path by combining sourcePath with filePath.
Definition: string.cpp:35
uint
unsigned int uint
Definition: types.h:31
stdext::is_valid_utf8
bool is_valid_utf8(const std::string &src)
Definition: string.cpp:75
stdext::trim
void trim(std::string &str)
Definition: string.cpp:226
stdext::hex_to_dec
uint64_t hex_to_dec(const std::string &str)
Definition: string.cpp:67
stdext::utf8_to_latin1
std::string utf8_to_latin1(const std::string &src)
Definition: string.cpp:146
stdext::throw_exception
void throw_exception(const std::string &what)
Throws a generic exception.
Definition: exception.h:43
stdext::ends_with
bool ends_with(const std::string &str, const std::string &test)
Definition: string.cpp:258
stdext::toupper
void toupper(std::string &str)
Definition: string.cpp:221
stdext::lochar
char lochar(char c)
Definition: string.cpp:238
stdext::split
std::vector< std::string > split(const std::string &str, const std::string &separators)
Definition: string.cpp:273
uchar
unsigned char uchar
Definition: types.h:29
stdext::replace_all
void replace_all(std::string &str, const std::string &search, const std::string &replacement)
Definition: string.cpp:268
stdext::date_time_string
std::string date_time_string()
Get current date and time in a std::string.
Definition: string.cpp:48
stdext
Definition: any.h:30
stdext::tolower
void tolower(std::string &str)
Definition: string.cpp:216
stdext::upchar
char upchar(char c)
Definition: string.cpp:231
stdext::dec_to_hex
std::string dec_to_hex(uint64_t num)
Definition: string.cpp:58