Nemanja Trifunovic
5 years ago
14 changed files with 550 additions and 375 deletions
@ -0,0 +1,3 @@ |
|||
[submodule "extern/gtest"] |
|||
path = extern/gtest |
|||
url = git@github.com:google/googletest.git |
@ -1,78 +0,0 @@ |
|||
#include "../../source/utf8.h" |
|||
using namespace utf8; |
|||
using namespace std; |
|||
|
|||
int main() |
|||
{ |
|||
string u; |
|||
#if __cplusplus >= 201103L // C++ 11 or later
|
|||
//append
|
|||
|
|||
append(0x0448, u); |
|||
assert (u[0] == char(0xd1) && u[1] == char(0x88) && u.length() == 2); |
|||
|
|||
u.clear(); |
|||
append(0x65e5, u); |
|||
assert (u[0] == char(0xe6) && u[1] == char(0x97) && u[2] == char(0xa5) && u.length() == 3); |
|||
|
|||
u.clear(); |
|||
append(0x3044, u); |
|||
assert (u[0] == char(0xe3) && u[1] == char(0x81) && u[2] == char(0x84) && u.length() == 3); |
|||
|
|||
u.clear(); |
|||
append(0x10346, u); |
|||
assert (u[0] == char(0xf0) && u[1] == char(0x90) && u[2] == char(0x8d) && u[3] == char(0x86) && u.length() == 4); |
|||
|
|||
//utf16to8
|
|||
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; |
|||
u.clear(); |
|||
u = utf16to8(utf16string); |
|||
assert (u.size() == 10); |
|||
|
|||
//utf8to16
|
|||
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
u16string utf16result = utf8to16(utf8_with_surrogates); |
|||
assert (utf16result.length() == 4); |
|||
assert (utf16result[2] == 0xd834); |
|||
assert (utf16result[3] == 0xdd1e); |
|||
|
|||
// utf32to8
|
|||
u32string utf32string = {0x448, 0x65E5, 0x10346}; |
|||
string utf8result = utf32to8(utf32string); |
|||
assert (utf8result.size() == 9); |
|||
|
|||
// utf8to32
|
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
u32string utf32result = utf8to32(twochars); |
|||
assert (utf32result.size() == 2); |
|||
|
|||
//find_invalid
|
|||
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa"; |
|||
auto invalid = find_invalid(utf_invalid); |
|||
assert (invalid == 5); |
|||
|
|||
//is_valid
|
|||
bool bvalid = is_valid(utf_invalid); |
|||
assert (bvalid == false); |
|||
bvalid = is_valid(utf8_with_surrogates); |
|||
assert (bvalid == true); |
|||
|
|||
//replace_invalid
|
|||
string invalid_sequence = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; |
|||
string replace_invalid_result = replace_invalid(invalid_sequence, '?'); |
|||
bvalid = is_valid(replace_invalid_result); |
|||
assert (bvalid); |
|||
const string fixed_invalid_sequence = "a????z"; |
|||
assert (fixed_invalid_sequence == replace_invalid_result); |
|||
|
|||
//starts_with_bom
|
|||
string byte_order_mark = {char(0xef), char(0xbb), char(0xbf)}; |
|||
bool bbom = starts_with_bom(byte_order_mark); |
|||
assert (bbom == true); |
|||
string threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
bool no_bbom = starts_with_bom(threechars); |
|||
assert (no_bbom == false); |
|||
|
|||
|
|||
#endif // C++ 11 or later
|
|||
} |
@ -1,273 +0,0 @@ |
|||
#include <cstring> |
|||
#include <cassert> |
|||
#include <vector> |
|||
#include "../../source/utf8.h" |
|||
using namespace utf8; |
|||
using namespace std; |
|||
|
|||
int main() |
|||
{ |
|||
//append
|
|||
unsigned char u[5] = {0,0,0,0,0}; |
|||
|
|||
append(0x0448, u); |
|||
assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); |
|||
|
|||
append(0x65e5, u); |
|||
assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); |
|||
|
|||
append(0x3044, u); |
|||
assert (u[0] == 0xe3 && u[1] == 0x81 && u[2] == 0x84 && u[3] == 0 && u[4] == 0); |
|||
|
|||
append(0x10346, u); |
|||
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); |
|||
|
|||
|
|||
//next
|
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = twochars; |
|||
int cp = next(w, twochars + 6); |
|||
assert (cp == 0x65e5); |
|||
assert (w == twochars + 3); |
|||
|
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
w = threechars; |
|||
cp = next(w, threechars + 9); |
|||
assert (cp == 0x10346); |
|||
assert (w == threechars + 4); |
|||
cp = next(w, threechars + 9); |
|||
assert (cp == 0x65e5); |
|||
assert (w == threechars + 7); |
|||
cp = next(w, threechars + 9); |
|||
assert (cp == 0x0448); |
|||
assert (w == threechars + 9); |
|||
|
|||
//peek_next
|
|||
const char* const cw = twochars; |
|||
cp = peek_next(cw, cw + 6); |
|||
assert (cp == 0x65e5); |
|||
assert (cw == twochars); |
|||
|
|||
//prior
|
|||
w = twochars + 3; |
|||
cp = prior (w, twochars); |
|||
assert (cp == 0x65e5); |
|||
assert (w == twochars); |
|||
|
|||
w = threechars + 9; |
|||
cp = prior(w, threechars); |
|||
assert (cp == 0x0448); |
|||
assert (w == threechars + 7); |
|||
cp = prior(w, threechars); |
|||
assert (cp == 0x65e5); |
|||
assert (w == threechars + 4); |
|||
cp = prior(w, threechars); |
|||
assert (cp == 0x10346); |
|||
assert (w == threechars); |
|||
|
|||
// advance
|
|||
w = threechars; |
|||
advance(w, 2, threechars + 9); |
|||
assert(w == threechars + 7); |
|||
advance(w, -2, threechars); |
|||
assert(w == threechars); |
|||
advance(w, 3, threechars + 9); |
|||
assert(w == threechars + 9); |
|||
advance(w, -2, threechars); |
|||
assert(w == threechars + 4); |
|||
advance(w, -1, threechars); |
|||
assert(w == threechars); |
|||
|
|||
// distance
|
|||
size_t dist = utf8::distance(twochars, twochars + 5); |
|||
assert (dist == 2); |
|||
|
|||
// utf32to8
|
|||
int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; |
|||
vector<char> utf8result; |
|||
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); |
|||
assert (utf8result.size() == 9); |
|||
// try it with the return value;
|
|||
char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); |
|||
assert (utf8_end == &utf8result[0] + 9); |
|||
|
|||
//utf8to32
|
|||
vector<int> utf32result; |
|||
utf8to32(twochars, twochars + 5, back_inserter(utf32result)); |
|||
assert (utf32result.size() == 2); |
|||
// try it with the return value;
|
|||
int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); |
|||
assert (utf32_end == &utf32result[0] + 2); |
|||
|
|||
//utf16to8
|
|||
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; |
|||
utf8result.clear(); |
|||
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); |
|||
assert (utf8result.size() == 10); |
|||
// try it with the return value;
|
|||
utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); |
|||
assert (utf8_end == &utf8result[0] + 10); |
|||
|
|||
//utf8to16
|
|||
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
vector <unsigned short> utf16result; |
|||
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); |
|||
assert (utf16result.size() == 4); |
|||
assert (utf16result[2] == 0xd834); |
|||
assert (utf16result[3] == 0xdd1e); |
|||
// try it with the return value;
|
|||
unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); |
|||
assert (utf16_end == &utf16result[0] + 4); |
|||
|
|||
//find_invalid
|
|||
char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; |
|||
char* invalid = find_invalid(utf_invalid, utf_invalid + 6); |
|||
assert (invalid == utf_invalid + 5); |
|||
|
|||
//is_valid
|
|||
bool bvalid = is_valid(utf_invalid, utf_invalid + 6); |
|||
assert (bvalid == false); |
|||
bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9); |
|||
assert (bvalid == true); |
|||
|
|||
//starts_with_bom
|
|||
unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; |
|||
bool bbom = starts_with_bom(byte_order_mark, byte_order_mark + sizeof(byte_order_mark)); |
|||
assert (bbom == true); |
|||
bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars)); |
|||
assert (no_bbom == false); |
|||
|
|||
//replace_invalid
|
|||
char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; |
|||
vector<char> replace_invalid_result; |
|||
replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), std::back_inserter(replace_invalid_result), '?'); |
|||
bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end()); |
|||
assert (bvalid); |
|||
const char fixed_invalid_sequence[] = "a????z"; |
|||
assert (sizeof(fixed_invalid_sequence) == replace_invalid_result.size()); |
|||
assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.begin() + sizeof(fixed_invalid_sequence), fixed_invalid_sequence)); |
|||
|
|||
// iterator
|
|||
utf8::iterator<const char*> it(threechars, threechars, threechars + 9); |
|||
utf8::iterator<const char*> it2 = it; |
|||
assert (it2 == it); |
|||
assert (*it == 0x10346); |
|||
assert (*(++it) == 0x65e5); |
|||
assert ((*it++) == 0x65e5); |
|||
assert (*it == 0x0448); |
|||
assert (it != it2); |
|||
utf8::iterator<const char*> endit (threechars + 9, threechars, threechars + 9); |
|||
assert (++it == endit); |
|||
assert (*(--it) == 0x0448); |
|||
assert ((*it--) == 0x0448); |
|||
assert (*it == 0x65e5); |
|||
assert (--it == utf8::iterator<const char*>(threechars, threechars, threechars + 9)); |
|||
assert (*it == 0x10346); |
|||
|
|||
//////////////////////////////////////////////////////////
|
|||
//// Unchecked variants
|
|||
//////////////////////////////////////////////////////////
|
|||
|
|||
//append
|
|||
memset(u, 0, 5); |
|||
append(0x0448, u); |
|||
assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0); |
|||
|
|||
append(0x65e5, u); |
|||
assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0); |
|||
|
|||
append(0x10346, u); |
|||
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0); |
|||
|
|||
//next
|
|||
w = twochars; |
|||
cp = unchecked::next(w); |
|||
assert (cp == 0x65e5); |
|||
assert (w == twochars + 3); |
|||
|
|||
w = threechars; |
|||
cp = unchecked::next(w); |
|||
assert (cp == 0x10346); |
|||
assert (w == threechars + 4); |
|||
cp = unchecked::next(w); |
|||
assert (cp == 0x65e5); |
|||
assert (w == threechars + 7); |
|||
cp = unchecked::next(w); |
|||
assert (cp == 0x0448); |
|||
assert (w == threechars + 9); |
|||
|
|||
//peek_next
|
|||
cp = unchecked::peek_next(cw); |
|||
assert (cp == 0x65e5); |
|||
assert (cw == twochars); |
|||
|
|||
// advance
|
|||
w = threechars; |
|||
unchecked::advance(w, 2); |
|||
assert(w == threechars + 7); |
|||
unchecked::advance(w, -2); |
|||
assert(w == threechars); |
|||
unchecked::advance(w, 3); |
|||
assert(w == threechars + 9); |
|||
unchecked::advance(w, -2); |
|||
assert(w == threechars + 4); |
|||
unchecked::advance(w, -1); |
|||
assert(w == threechars); |
|||
|
|||
// distance
|
|||
dist = unchecked::distance(twochars, twochars + 5); |
|||
assert (dist == 2); |
|||
|
|||
// utf32to8
|
|||
utf8result.clear(); |
|||
unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); |
|||
assert (utf8result.size() == 9); |
|||
// try it with the return value;
|
|||
utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]); |
|||
assert(utf8_end == &utf8result[0] + 9); |
|||
|
|||
//utf8to32
|
|||
utf32result.clear(); |
|||
unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result)); |
|||
assert (utf32result.size() == 2); |
|||
// try it with the return value;
|
|||
utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]); |
|||
assert (utf32_end == &utf32result[0] + 2); |
|||
|
|||
//utf16to8
|
|||
utf8result.clear(); |
|||
unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); |
|||
assert (utf8result.size() == 10); |
|||
// try it with the return value;
|
|||
utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]); |
|||
assert (utf8_end == &utf8result[0] + 10); |
|||
|
|||
//utf8to16
|
|||
utf16result.clear(); |
|||
unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); |
|||
assert (utf16result.size() == 4); |
|||
assert (utf16result[2] == 0xd834); |
|||
assert (utf16result[3] == 0xdd1e); |
|||
// try it with the return value;
|
|||
utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]); |
|||
assert (utf16_end == &utf16result[0] + 4); |
|||
|
|||
// iterator
|
|||
utf8::unchecked::iterator<const char*> un_it(threechars); |
|||
utf8::unchecked::iterator<const char*> un_it2 = un_it; |
|||
assert (un_it2 == un_it); |
|||
assert (*un_it == 0x10346); |
|||
assert (*(++un_it) == 0x65e5); |
|||
assert ((*un_it++) == 0x65e5); |
|||
assert (un_it != un_it2); |
|||
assert (*un_it == 0x0448); |
|||
utf8::unchecked::iterator<const char*> un_endit (threechars + 9); |
|||
assert (++un_it == un_endit); |
|||
assert (*(--un_it) == 0x0448); |
|||
assert ((*un_it--) == 0x0448); |
|||
assert (*un_it == 0x65e5); |
|||
assert (--un_it == utf8::unchecked::iterator<const char*>(threechars)); |
|||
assert (*un_it == 0x10346); |
|||
} |
|||
|
|||
|
@ -0,0 +1,22 @@ |
|||
add_executable(negative ${PROJECT_SOURCE_DIR}/tests/negative.cpp) |
|||
add_executable(cpp11 ${PROJECT_SOURCE_DIR}/tests/test_cpp11.cpp) |
|||
add_executable(apitests |
|||
${PROJECT_SOURCE_DIR}/tests/test_checked_api.cpp |
|||
${PROJECT_SOURCE_DIR}/tests/test_unchecked_api.cpp |
|||
${PROJECT_SOURCE_DIR}/tests/test_checked_iterator.cpp |
|||
${PROJECT_SOURCE_DIR}/tests/test_unchecked_iterator.cpp |
|||
) |
|||
|
|||
target_link_libraries(negative PRIVATE utf8::cpp) |
|||
target_link_libraries(cpp11 PRIVATE |
|||
utf8::cpp |
|||
gtest_main |
|||
) |
|||
target_link_libraries(apitests PRIVATE |
|||
utf8::cpp |
|||
gtest_main |
|||
) |
|||
|
|||
add_test(negative_test negative ${PROJECT_SOURCE_DIR}/tests/test_data/utf8_invalid.txt) |
|||
add_test(cpp11_test cpp11) |
|||
add_test(api_test apitests) |
@ -0,0 +1,188 @@ |
|||
#include "gtest/gtest.h" |
|||
#include "utf8/checked.h" |
|||
|
|||
#include <string> |
|||
#include <vector> |
|||
using namespace utf8; |
|||
using namespace std; |
|||
|
|||
|
|||
TEST(CheckedAPITests, test_append) |
|||
{ |
|||
unsigned char u[5] = {0,0,0,0,0}; |
|||
append(0x0448, u); |
|||
EXPECT_EQ (u[0], 0xd1); |
|||
EXPECT_EQ (u[1], 0x88); |
|||
EXPECT_EQ (u[2], 0); |
|||
EXPECT_EQ (u[3], 0); |
|||
EXPECT_EQ (u[4], 0); |
|||
|
|||
append(0x65e5, u); |
|||
EXPECT_EQ (u[0], 0xe6); |
|||
EXPECT_EQ (u[1], 0x97); |
|||
EXPECT_EQ (u[2], 0xa5); |
|||
EXPECT_EQ (u[3], 0); |
|||
EXPECT_EQ (u[4], 0); |
|||
|
|||
append(0x3044, u); |
|||
EXPECT_EQ (u[0], 0xe3); |
|||
EXPECT_EQ (u[1], 0x81); |
|||
EXPECT_EQ (u[2], 0x84); |
|||
EXPECT_EQ (u[3], 0); |
|||
EXPECT_EQ (u[4], 0); |
|||
|
|||
append(0x10346, u); |
|||
EXPECT_EQ (u[0], 0xf0); |
|||
EXPECT_EQ (u[1], 0x90); |
|||
EXPECT_EQ (u[2], 0x8d); |
|||
EXPECT_EQ (u[3], 0x86); |
|||
EXPECT_EQ (u[4], 0); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_next) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = twochars; |
|||
int cp = next(w, twochars + 6); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, twochars + 3); |
|||
|
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
w = threechars; |
|||
|
|||
cp = next(w, threechars + 9); |
|||
EXPECT_EQ (cp, 0x10346); |
|||
EXPECT_EQ (w, threechars + 4); |
|||
|
|||
cp = next(w, threechars + 9); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, threechars + 7); |
|||
|
|||
cp = next(w, threechars + 9); |
|||
EXPECT_EQ (cp, 0x0448); |
|||
EXPECT_EQ (w, threechars + 9); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_peek_next) |
|||
{ |
|||
const char* const cw = "\xe6\x97\xa5\xd1\x88"; |
|||
int cp = peek_next(cw, cw + 6); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_prior) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = twochars + 3; |
|||
int cp = prior (w, twochars); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, twochars); |
|||
|
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
w = threechars + 9; |
|||
cp = prior(w, threechars); |
|||
EXPECT_EQ (cp, 0x0448); |
|||
EXPECT_EQ (w, threechars + 7); |
|||
cp = prior(w, threechars); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, threechars + 4); |
|||
cp = prior(w, threechars); |
|||
EXPECT_EQ (cp, 0x10346); |
|||
EXPECT_EQ (w, threechars); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_advance) |
|||
{ |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = threechars; |
|||
advance(w, 2, threechars + 9); |
|||
EXPECT_EQ(w, threechars + 7); |
|||
advance(w, -2, threechars); |
|||
EXPECT_EQ(w, threechars); |
|||
advance(w, 3, threechars + 9); |
|||
EXPECT_EQ(w, threechars + 9); |
|||
advance(w, -2, threechars); |
|||
EXPECT_EQ(w, threechars + 4); |
|||
advance(w, -1, threechars); |
|||
EXPECT_EQ(w, threechars); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_distance) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
size_t dist = utf8::distance(twochars, twochars + 5); |
|||
EXPECT_EQ (dist, 2); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_utf32to8) |
|||
{ |
|||
int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; |
|||
string utf8result; |
|||
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); |
|||
EXPECT_EQ (utf8result.size(), 9); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_utf8to32) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
vector<int> utf32result; |
|||
utf8to32(twochars, twochars + 5, back_inserter(utf32result)); |
|||
EXPECT_EQ (utf32result.size(), 2); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_utf16to8) |
|||
{ |
|||
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; |
|||
string utf8result; |
|||
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); |
|||
EXPECT_EQ (utf8result.size(), 10); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_utf8to16) |
|||
{ |
|||
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
vector <unsigned short> utf16result; |
|||
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); |
|||
EXPECT_EQ (utf16result.size(), 4); |
|||
EXPECT_EQ (utf16result[2], 0xd834); |
|||
EXPECT_EQ (utf16result[3], 0xdd1e); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_replace_invalid) |
|||
{ |
|||
char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; |
|||
vector<char> replace_invalid_result; |
|||
replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), std::back_inserter(replace_invalid_result), '?'); |
|||
bool bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end()); |
|||
EXPECT_TRUE (bvalid); |
|||
const char fixed_invalid_sequence[] = "a????z"; |
|||
EXPECT_EQ (sizeof(fixed_invalid_sequence), replace_invalid_result.size()); |
|||
EXPECT_TRUE (std::equal(replace_invalid_result.begin(), replace_invalid_result.begin() + sizeof(fixed_invalid_sequence), fixed_invalid_sequence)); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_find_invalid) |
|||
{ |
|||
char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; |
|||
char* invalid = find_invalid(utf_invalid, utf_invalid + 6); |
|||
EXPECT_EQ (invalid, utf_invalid + 5); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_is_valid) |
|||
{ |
|||
char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa"; |
|||
bool bvalid = is_valid(utf_invalid, utf_invalid + 6); |
|||
EXPECT_FALSE (bvalid); |
|||
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9); |
|||
EXPECT_TRUE (bvalid); |
|||
} |
|||
|
|||
TEST(CheckedAPITests, test_starts_with_bom) |
|||
{ |
|||
unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; |
|||
bool bbom = starts_with_bom(byte_order_mark, byte_order_mark + sizeof(byte_order_mark)); |
|||
EXPECT_TRUE (bbom); |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars)); |
|||
EXPECT_FALSE (no_bbom); |
|||
} |
@ -0,0 +1,31 @@ |
|||
#include "gtest/gtest.h" |
|||
#include "utf8/checked.h" |
|||
|
|||
using namespace utf8; |
|||
|
|||
|
|||
TEST(CheckedIteratrTests, test_increment) |
|||
{ |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
utf8::iterator<const char*> it(threechars, threechars, threechars + 9); |
|||
utf8::iterator<const char*> it2 = it; |
|||
EXPECT_EQ (it2, it); |
|||
EXPECT_EQ (*it, 0x10346); |
|||
EXPECT_EQ (*(++it), 0x65e5); |
|||
EXPECT_EQ ((*it++), 0x65e5); |
|||
EXPECT_EQ (*it, 0x0448); |
|||
EXPECT_NE (it, it2); |
|||
utf8::iterator<const char*> endit (threechars + 9, threechars, threechars + 9); |
|||
EXPECT_EQ (++it, endit); |
|||
} |
|||
|
|||
TEST(CheckedIteratrTests, test_decrement) |
|||
{ |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
utf8::iterator<const char*> it(threechars+9, threechars, threechars + 9); |
|||
EXPECT_EQ (*(--it), 0x0448); |
|||
EXPECT_EQ ((*it--), 0x0448); |
|||
EXPECT_EQ (*it, 0x65e5); |
|||
EXPECT_EQ (--it, utf8::iterator<const char*>(threechars, threechars, threechars + 9)); |
|||
EXPECT_EQ (*it, 0x10346); |
|||
} |
@ -0,0 +1,106 @@ |
|||
#include "gtest/gtest.h" |
|||
#include "utf8.h" |
|||
#include <string> |
|||
using namespace utf8; |
|||
using namespace std; |
|||
|
|||
#if __cplusplus >= 201103L // C++ 11 or later
|
|||
|
|||
TEST(CPP11APITests, test_append) |
|||
{ |
|||
string u; |
|||
append(0x0448, u); |
|||
EXPECT_EQ (u[0], char(0xd1)); |
|||
EXPECT_EQ (u[1], char(0x88)); |
|||
EXPECT_EQ (u.length(), 2); |
|||
|
|||
u.clear(); |
|||
append(0x65e5, u); |
|||
EXPECT_EQ (u[0], char(0xe6)); |
|||
EXPECT_EQ (u[1], char(0x97)); |
|||
EXPECT_EQ (u[2], char(0xa5)); |
|||
EXPECT_EQ (u.length(), 3); |
|||
|
|||
u.clear(); |
|||
append(0x3044, u); |
|||
EXPECT_EQ (u[0], char(0xe3)); |
|||
EXPECT_EQ (u[1], char(0x81)); |
|||
EXPECT_EQ (u[2], char(0x84)); |
|||
EXPECT_EQ (u.length(), 3); |
|||
|
|||
u.clear(); |
|||
append(0x10346, u); |
|||
EXPECT_EQ (u[0], char(0xf0)); |
|||
EXPECT_EQ (u[1], char(0x90)); |
|||
EXPECT_EQ (u[2], char(0x8d)); |
|||
EXPECT_EQ (u[3], char(0x86)); |
|||
EXPECT_EQ (u.length(), 4); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_utf16to8) |
|||
{ |
|||
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; |
|||
string u = utf16to8(utf16string); |
|||
EXPECT_EQ (u.size(), 10); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_utf8to16) |
|||
{ |
|||
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
u16string utf16result = utf8to16(utf8_with_surrogates); |
|||
EXPECT_EQ (utf16result.size(), 4); |
|||
EXPECT_EQ (utf16result[2], 0xd834); |
|||
EXPECT_EQ (utf16result[3], 0xdd1e); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_utf32to8) |
|||
{ |
|||
u32string utf32string = {0x448, 0x65E5, 0x10346}; |
|||
string utf8result = utf32to8(utf32string); |
|||
EXPECT_EQ (utf8result.size(), 9); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_utf8to32) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
u32string utf32result = utf8to32(twochars); |
|||
EXPECT_EQ (utf32result.size(), 2); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_find_invalid) |
|||
{ |
|||
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa"; |
|||
auto invalid = find_invalid(utf_invalid); |
|||
EXPECT_EQ (invalid, 5); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_is_valid) |
|||
{ |
|||
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa"; |
|||
bool bvalid = is_valid(utf_invalid); |
|||
EXPECT_FALSE (bvalid); |
|||
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
bvalid = is_valid(utf8_with_surrogates); |
|||
EXPECT_TRUE (bvalid); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_replace_invalid) |
|||
{ |
|||
string invalid_sequence = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; |
|||
string replace_invalid_result = replace_invalid(invalid_sequence, '?'); |
|||
bool bvalid = is_valid(replace_invalid_result); |
|||
EXPECT_TRUE (bvalid); |
|||
const string fixed_invalid_sequence = "a????z"; |
|||
EXPECT_EQ(fixed_invalid_sequence, replace_invalid_result); |
|||
} |
|||
|
|||
TEST(CPP11APITests, test_starts_with_bom) |
|||
{ |
|||
string byte_order_mark = {char(0xef), char(0xbb), char(0xbf)}; |
|||
bool bbom = starts_with_bom(byte_order_mark); |
|||
EXPECT_TRUE (bbom); |
|||
string threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
bool no_bbom = starts_with_bom(threechars); |
|||
EXPECT_FALSE (no_bbom); |
|||
} |
|||
#endif // C++ 11 or later
|
@ -0,0 +1,148 @@ |
|||
#include "gtest/gtest.h" |
|||
#include "utf8/unchecked.h" |
|||
|
|||
#include <string> |
|||
#include <vector> |
|||
using namespace utf8::unchecked; |
|||
using namespace std; |
|||
|
|||
TEST(UnCheckedAPITests, test_append) |
|||
{ |
|||
unsigned char u[5] = {0,0,0,0,0}; |
|||
append(0x0448, u); |
|||
EXPECT_EQ (u[0], 0xd1); |
|||
EXPECT_EQ (u[1], 0x88); |
|||
EXPECT_EQ (u[2], 0); |
|||
EXPECT_EQ (u[3], 0); |
|||
EXPECT_EQ (u[4], 0); |
|||
|
|||
append(0x65e5, u); |
|||
EXPECT_EQ (u[0], 0xe6); |
|||
EXPECT_EQ (u[1], 0x97); |
|||
EXPECT_EQ (u[2], 0xa5); |
|||
EXPECT_EQ (u[3], 0); |
|||
EXPECT_EQ (u[4], 0); |
|||
|
|||
append(0x3044, u); |
|||
EXPECT_EQ (u[0], 0xe3); |
|||
EXPECT_EQ (u[1], 0x81); |
|||
EXPECT_EQ (u[2], 0x84); |
|||
EXPECT_EQ (u[3], 0); |
|||
EXPECT_EQ (u[4], 0); |
|||
|
|||
append(0x10346, u); |
|||
EXPECT_EQ (u[0], 0xf0); |
|||
EXPECT_EQ (u[1], 0x90); |
|||
EXPECT_EQ (u[2], 0x8d); |
|||
EXPECT_EQ (u[3], 0x86); |
|||
EXPECT_EQ (u[4], 0); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_next) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = twochars; |
|||
int cp = utf8::unchecked::next(w); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, twochars + 3); |
|||
|
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
w = threechars; |
|||
|
|||
cp = utf8::unchecked::next(w); |
|||
EXPECT_EQ (cp, 0x10346); |
|||
EXPECT_EQ (w, threechars + 4); |
|||
|
|||
cp = utf8::unchecked::next(w); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, threechars + 7); |
|||
|
|||
cp = utf8::unchecked::next(w); |
|||
EXPECT_EQ (cp, 0x0448); |
|||
EXPECT_EQ (w, threechars + 9); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_peek_next) |
|||
{ |
|||
const char* const cw = "\xe6\x97\xa5\xd1\x88"; |
|||
int cp = peek_next(cw); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_prior) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = twochars + 3; |
|||
int cp = prior (w); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, twochars); |
|||
|
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
w = threechars + 9; |
|||
cp = prior(w); |
|||
EXPECT_EQ (cp, 0x0448); |
|||
EXPECT_EQ (w, threechars + 7); |
|||
cp = prior(w); |
|||
EXPECT_EQ (cp, 0x65e5); |
|||
EXPECT_EQ (w, threechars + 4); |
|||
cp = prior(w); |
|||
EXPECT_EQ (cp, 0x10346); |
|||
EXPECT_EQ (w, threechars); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_advance) |
|||
{ |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
const char* w = threechars; |
|||
utf8::unchecked::advance(w, 2); |
|||
EXPECT_EQ(w, threechars + 7); |
|||
utf8::unchecked::advance(w, -2); |
|||
EXPECT_EQ(w, threechars); |
|||
utf8::unchecked::advance(w, 3); |
|||
EXPECT_EQ(w, threechars + 9); |
|||
utf8::unchecked::advance(w, -2); |
|||
EXPECT_EQ(w, threechars + 4); |
|||
utf8::unchecked::advance(w, -1); |
|||
EXPECT_EQ(w, threechars); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_distance) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
size_t dist = utf8::unchecked::distance(twochars, twochars + 5); |
|||
EXPECT_EQ (dist, 2); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_utf32to8) |
|||
{ |
|||
int utf32string[] = {0x448, 0x65E5, 0x10346, 0}; |
|||
string utf8result; |
|||
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result)); |
|||
EXPECT_EQ (utf8result.size(), 9); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_utf8to32) |
|||
{ |
|||
const char* twochars = "\xe6\x97\xa5\xd1\x88"; |
|||
vector<int> utf32result; |
|||
utf8to32(twochars, twochars + 5, back_inserter(utf32result)); |
|||
EXPECT_EQ (utf32result.size(), 2); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_utf16to8) |
|||
{ |
|||
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e}; |
|||
string utf8result; |
|||
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result)); |
|||
EXPECT_EQ (utf8result.size(), 10); |
|||
} |
|||
|
|||
TEST(UnCheckedAPITests, test_utf8to16) |
|||
{ |
|||
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e"; |
|||
vector <unsigned short> utf16result; |
|||
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result)); |
|||
EXPECT_EQ (utf16result.size(), 4); |
|||
EXPECT_EQ (utf16result[2], 0xd834); |
|||
EXPECT_EQ (utf16result[3], 0xdd1e); |
|||
} |
@ -0,0 +1,32 @@ |
|||
#include "gtest/gtest.h" |
|||
#include "utf8/unchecked.h" |
|||
|
|||
using namespace utf8::unchecked; |
|||
|
|||
|
|||
TEST(UnCheckedIteratrTests, test_increment) |
|||
{ |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
utf8::unchecked::iterator<const char*> it(threechars); |
|||
utf8::unchecked::iterator<const char*> it2 = it; |
|||
EXPECT_EQ (it2, it); |
|||
EXPECT_EQ (*it, 0x10346); |
|||
EXPECT_EQ (*(++it), 0x65e5); |
|||
EXPECT_EQ ((*it++), 0x65e5); |
|||
EXPECT_EQ (*it, 0x0448); |
|||
EXPECT_NE (it, it2); |
|||
utf8::unchecked::iterator<const char*> endit (threechars + 9); |
|||
EXPECT_EQ (++it, endit); |
|||
} |
|||
|
|||
TEST(UnCheckedIteratrTests, test_decrement) |
|||
{ |
|||
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88"; |
|||
utf8::unchecked::iterator<const char*> it(threechars+9); |
|||
EXPECT_EQ (*(--it), 0x0448); |
|||
EXPECT_EQ ((*it--), 0x0448); |
|||
EXPECT_EQ (*it, 0x65e5); |
|||
EXPECT_EQ (--it, utf8::unchecked::iterator<const char*>(threechars)); |
|||
EXPECT_EQ (*it, 0x10346); |
|||
|
|||
} |
Loading…
Reference in new issue