Browse Source

Introduce GTest

Restructure tests to use Google Test framework
master
Nemanja Trifunovic 5 years ago
parent
commit
66804081d4
  1. 3
      .gitmodules
  2. 24
      CMakeLists.txt
  3. 1
      extern/gtest
  4. 1
      source/utf8/cpp11.h
  5. 78
      test_drivers/smoke_test/cpp11.cpp
  6. 273
      test_drivers/smoke_test/test.cpp
  7. 22
      tests/CMakeLists.txt
  8. 18
      tests/negative.cpp
  9. 188
      tests/test_checked_api.cpp
  10. 31
      tests/test_checked_iterator.cpp
  11. 106
      tests/test_cpp11.cpp
  12. 0
      tests/test_data/utf8_invalid.txt
  13. 148
      tests/test_unchecked_api.cpp
  14. 32
      tests/test_unchecked_iterator.cpp

3
.gitmodules

@ -0,0 +1,3 @@
[submodule "extern/gtest"]
path = extern/gtest
url = git@github.com:google/googletest.git

24
CMakeLists.txt

@ -6,8 +6,8 @@ option(UTF8_SAMPLES "Enable building samples for UTF8-CPP" On)
add_library(utf8cpp INTERFACE)
target_include_directories(utf8cpp INTERFACE
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/source>"
$<INSTALL_INTERFACE:include/utf8cpp>
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/source>"
$<INSTALL_INTERFACE:include/utf8cpp>
)
add_library(utf8::cpp ALIAS utf8cpp)
@ -23,22 +23,12 @@ install(TARGETS utf8cpp EXPORT utf8cppConfig)
install(EXPORT utf8cppConfig DESTINATION ${DEF_INSTALL_CMAKE_DIR})
if(UTF8_SAMPLES)
add_executable(docsample ${PROJECT_SOURCE_DIR}/samples/docsample.cpp)
target_link_libraries(docsample PRIVATE utf8::cpp)
add_executable(docsample ${PROJECT_SOURCE_DIR}/samples/docsample.cpp)
target_link_libraries(docsample PRIVATE utf8::cpp)
endif()
if(UTF8_TESTS)
add_executable(smoke ${PROJECT_SOURCE_DIR}/test_drivers/smoke_test/test.cpp)
add_executable(cpp11 ${PROJECT_SOURCE_DIR}/test_drivers/smoke_test/cpp11.cpp)
add_executable(negative ${PROJECT_SOURCE_DIR}/test_drivers/negative/negative.cpp)
target_link_libraries(smoke PRIVATE utf8::cpp)
target_link_libraries(cpp11 PRIVATE utf8::cpp)
target_link_libraries(negative PRIVATE utf8::cpp)
enable_testing()
add_test(smoke_test smoke)
add_test(cpp11_test cpp11)
add_test(negative_test negative ${PROJECT_SOURCE_DIR}/test_data/negative/utf8_invalid.txt)
enable_testing()
add_subdirectory(extern/gtest)
add_subdirectory(tests)
endif()

1
extern/gtest

@ -0,0 +1 @@
Subproject commit 2fe3bd994b3189899d93f1d5a881e725e046fdc2

1
source/utf8/cpp11.h

@ -30,7 +30,6 @@ DEALINGS IN THE SOFTWARE.
#include "checked.h"
#include <string>
#include <cassert>
namespace utf8
{

78
test_drivers/smoke_test/cpp11.cpp

@ -1,78 +0,0 @@
#include "../../source/utf8.h"
using namespace utf8;
using namespace std;
int main()
{
string u;
#if __cplusplus >= 201103L // C++ 11 or later
//append
append(0x0448, u);
assert (u[0] == char(0xd1) && u[1] == char(0x88) && u.length() == 2);
u.clear();
append(0x65e5, u);
assert (u[0] == char(0xe6) && u[1] == char(0x97) && u[2] == char(0xa5) && u.length() == 3);
u.clear();
append(0x3044, u);
assert (u[0] == char(0xe3) && u[1] == char(0x81) && u[2] == char(0x84) && u.length() == 3);
u.clear();
append(0x10346, u);
assert (u[0] == char(0xf0) && u[1] == char(0x90) && u[2] == char(0x8d) && u[3] == char(0x86) && u.length() == 4);
//utf16to8
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
u.clear();
u = utf16to8(utf16string);
assert (u.size() == 10);
//utf8to16
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
u16string utf16result = utf8to16(utf8_with_surrogates);
assert (utf16result.length() == 4);
assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
// utf32to8
u32string utf32string = {0x448, 0x65E5, 0x10346};
string utf8result = utf32to8(utf32string);
assert (utf8result.size() == 9);
// utf8to32
const char* twochars = "\xe6\x97\xa5\xd1\x88";
u32string utf32result = utf8to32(twochars);
assert (utf32result.size() == 2);
//find_invalid
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
auto invalid = find_invalid(utf_invalid);
assert (invalid == 5);
//is_valid
bool bvalid = is_valid(utf_invalid);
assert (bvalid == false);
bvalid = is_valid(utf8_with_surrogates);
assert (bvalid == true);
//replace_invalid
string invalid_sequence = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
string replace_invalid_result = replace_invalid(invalid_sequence, '?');
bvalid = is_valid(replace_invalid_result);
assert (bvalid);
const string fixed_invalid_sequence = "a????z";
assert (fixed_invalid_sequence == replace_invalid_result);
//starts_with_bom
string byte_order_mark = {char(0xef), char(0xbb), char(0xbf)};
bool bbom = starts_with_bom(byte_order_mark);
assert (bbom == true);
string threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
bool no_bbom = starts_with_bom(threechars);
assert (no_bbom == false);
#endif // C++ 11 or later
}

273
test_drivers/smoke_test/test.cpp

@ -1,273 +0,0 @@
#include <cstring>
#include <cassert>
#include <vector>
#include "../../source/utf8.h"
using namespace utf8;
using namespace std;
int main()
{
//append
unsigned char u[5] = {0,0,0,0,0};
append(0x0448, u);
assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
append(0x65e5, u);
assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0);
append(0x3044, u);
assert (u[0] == 0xe3 && u[1] == 0x81 && u[2] == 0x84 && u[3] == 0 && u[4] == 0);
append(0x10346, u);
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
//next
const char* twochars = "\xe6\x97\xa5\xd1\x88";
const char* w = twochars;
int cp = next(w, twochars + 6);
assert (cp == 0x65e5);
assert (w == twochars + 3);
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
w = threechars;
cp = next(w, threechars + 9);
assert (cp == 0x10346);
assert (w == threechars + 4);
cp = next(w, threechars + 9);
assert (cp == 0x65e5);
assert (w == threechars + 7);
cp = next(w, threechars + 9);
assert (cp == 0x0448);
assert (w == threechars + 9);
//peek_next
const char* const cw = twochars;
cp = peek_next(cw, cw + 6);
assert (cp == 0x65e5);
assert (cw == twochars);
//prior
w = twochars + 3;
cp = prior (w, twochars);
assert (cp == 0x65e5);
assert (w == twochars);
w = threechars + 9;
cp = prior(w, threechars);
assert (cp == 0x0448);
assert (w == threechars + 7);
cp = prior(w, threechars);
assert (cp == 0x65e5);
assert (w == threechars + 4);
cp = prior(w, threechars);
assert (cp == 0x10346);
assert (w == threechars);
// advance
w = threechars;
advance(w, 2, threechars + 9);
assert(w == threechars + 7);
advance(w, -2, threechars);
assert(w == threechars);
advance(w, 3, threechars + 9);
assert(w == threechars + 9);
advance(w, -2, threechars);
assert(w == threechars + 4);
advance(w, -1, threechars);
assert(w == threechars);
// distance
size_t dist = utf8::distance(twochars, twochars + 5);
assert (dist == 2);
// utf32to8
int utf32string[] = {0x448, 0x65E5, 0x10346, 0};
vector<char> utf8result;
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
assert (utf8result.size() == 9);
// try it with the return value;
char* utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]);
assert (utf8_end == &utf8result[0] + 9);
//utf8to32
vector<int> utf32result;
utf8to32(twochars, twochars + 5, back_inserter(utf32result));
assert (utf32result.size() == 2);
// try it with the return value;
int* utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]);
assert (utf32_end == &utf32result[0] + 2);
//utf16to8
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
utf8result.clear();
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
assert (utf8result.size() == 10);
// try it with the return value;
utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]);
assert (utf8_end == &utf8result[0] + 10);
//utf8to16
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
vector <unsigned short> utf16result;
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
assert (utf16result.size() == 4);
assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
// try it with the return value;
unsigned short* utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]);
assert (utf16_end == &utf16result[0] + 4);
//find_invalid
char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa";
char* invalid = find_invalid(utf_invalid, utf_invalid + 6);
assert (invalid == utf_invalid + 5);
//is_valid
bool bvalid = is_valid(utf_invalid, utf_invalid + 6);
assert (bvalid == false);
bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9);
assert (bvalid == true);
//starts_with_bom
unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf};
bool bbom = starts_with_bom(byte_order_mark, byte_order_mark + sizeof(byte_order_mark));
assert (bbom == true);
bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars));
assert (no_bbom == false);
//replace_invalid
char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
vector<char> replace_invalid_result;
replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), std::back_inserter(replace_invalid_result), '?');
bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end());
assert (bvalid);
const char fixed_invalid_sequence[] = "a????z";
assert (sizeof(fixed_invalid_sequence) == replace_invalid_result.size());
assert (std::equal(replace_invalid_result.begin(), replace_invalid_result.begin() + sizeof(fixed_invalid_sequence), fixed_invalid_sequence));
// iterator
utf8::iterator<const char*> it(threechars, threechars, threechars + 9);
utf8::iterator<const char*> it2 = it;
assert (it2 == it);
assert (*it == 0x10346);
assert (*(++it) == 0x65e5);
assert ((*it++) == 0x65e5);
assert (*it == 0x0448);
assert (it != it2);
utf8::iterator<const char*> endit (threechars + 9, threechars, threechars + 9);
assert (++it == endit);
assert (*(--it) == 0x0448);
assert ((*it--) == 0x0448);
assert (*it == 0x65e5);
assert (--it == utf8::iterator<const char*>(threechars, threechars, threechars + 9));
assert (*it == 0x10346);
//////////////////////////////////////////////////////////
//// Unchecked variants
//////////////////////////////////////////////////////////
//append
memset(u, 0, 5);
append(0x0448, u);
assert (u[0] == 0xd1 && u[1] == 0x88 && u[2] == 0 && u[3] == 0 && u[4] == 0);
append(0x65e5, u);
assert (u[0] == 0xe6 && u[1] == 0x97 && u[2] == 0xa5 && u[3] == 0 && u[4] == 0);
append(0x10346, u);
assert (u[0] == 0xf0 && u[1] == 0x90 && u[2] == 0x8d && u[3] == 0x86 && u[4] == 0);
//next
w = twochars;
cp = unchecked::next(w);
assert (cp == 0x65e5);
assert (w == twochars + 3);
w = threechars;
cp = unchecked::next(w);
assert (cp == 0x10346);
assert (w == threechars + 4);
cp = unchecked::next(w);
assert (cp == 0x65e5);
assert (w == threechars + 7);
cp = unchecked::next(w);
assert (cp == 0x0448);
assert (w == threechars + 9);
//peek_next
cp = unchecked::peek_next(cw);
assert (cp == 0x65e5);
assert (cw == twochars);
// advance
w = threechars;
unchecked::advance(w, 2);
assert(w == threechars + 7);
unchecked::advance(w, -2);
assert(w == threechars);
unchecked::advance(w, 3);
assert(w == threechars + 9);
unchecked::advance(w, -2);
assert(w == threechars + 4);
unchecked::advance(w, -1);
assert(w == threechars);
// distance
dist = unchecked::distance(twochars, twochars + 5);
assert (dist == 2);
// utf32to8
utf8result.clear();
unchecked::utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
assert (utf8result.size() == 9);
// try it with the return value;
utf8_end = utf32to8(utf32string, utf32string + 3, &utf8result[0]);
assert(utf8_end == &utf8result[0] + 9);
//utf8to32
utf32result.clear();
unchecked::utf8to32(twochars, twochars + 5, back_inserter(utf32result));
assert (utf32result.size() == 2);
// try it with the return value;
utf32_end = utf8to32(twochars, twochars + 5, &utf32result[0]);
assert (utf32_end == &utf32result[0] + 2);
//utf16to8
utf8result.clear();
unchecked::utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
assert (utf8result.size() == 10);
// try it with the return value;
utf8_end = utf16to8 (utf16string, utf16string + 5, &utf8result[0]);
assert (utf8_end == &utf8result[0] + 10);
//utf8to16
utf16result.clear();
unchecked::utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
assert (utf16result.size() == 4);
assert (utf16result[2] == 0xd834);
assert (utf16result[3] == 0xdd1e);
// try it with the return value;
utf16_end = utf8to16 (utf8_with_surrogates, utf8_with_surrogates + 9, &utf16result[0]);
assert (utf16_end == &utf16result[0] + 4);
// iterator
utf8::unchecked::iterator<const char*> un_it(threechars);
utf8::unchecked::iterator<const char*> un_it2 = un_it;
assert (un_it2 == un_it);
assert (*un_it == 0x10346);
assert (*(++un_it) == 0x65e5);
assert ((*un_it++) == 0x65e5);
assert (un_it != un_it2);
assert (*un_it == 0x0448);
utf8::unchecked::iterator<const char*> un_endit (threechars + 9);
assert (++un_it == un_endit);
assert (*(--un_it) == 0x0448);
assert ((*un_it--) == 0x0448);
assert (*un_it == 0x65e5);
assert (--un_it == utf8::unchecked::iterator<const char*>(threechars));
assert (*un_it == 0x10346);
}

22
tests/CMakeLists.txt

@ -0,0 +1,22 @@
add_executable(negative ${PROJECT_SOURCE_DIR}/tests/negative.cpp)
add_executable(cpp11 ${PROJECT_SOURCE_DIR}/tests/test_cpp11.cpp)
add_executable(apitests
${PROJECT_SOURCE_DIR}/tests/test_checked_api.cpp
${PROJECT_SOURCE_DIR}/tests/test_unchecked_api.cpp
${PROJECT_SOURCE_DIR}/tests/test_checked_iterator.cpp
${PROJECT_SOURCE_DIR}/tests/test_unchecked_iterator.cpp
)
target_link_libraries(negative PRIVATE utf8::cpp)
target_link_libraries(cpp11 PRIVATE
utf8::cpp
gtest_main
)
target_link_libraries(apitests PRIVATE
utf8::cpp
gtest_main
)
add_test(negative_test negative ${PROJECT_SOURCE_DIR}/tests/test_data/utf8_invalid.txt)
add_test(cpp11_test cpp11)
add_test(api_test apitests)

18
test_drivers/negative/negative.cpp → tests/negative.cpp

@ -1,4 +1,4 @@
#include "../../source/utf8.h"
#include "utf8.h"
using namespace utf8;
#include <string>
@ -17,13 +17,13 @@ int main(int argc, char** argv)
test_file_path = argv[1];
else {
cout << "Wrong number of arguments" << endl;
exit(0);
return 1;
}
// Open the test file
ifstream fs8(test_file_path.c_str());
if (!fs8.is_open()) {
cout << "Could not open " << test_file_path << endl;
return 0;
return 1;
}
// Read it line by line
@ -38,16 +38,22 @@ int main(int argc, char** argv)
bool expected_valid = (find(INVALID_LINES, INVALID_LINES_END, line_count) == INVALID_LINES_END);
// Print out lines that contain unexpected invalid UTF-8
if (!is_valid(line.begin(), line.end())) {
if (expected_valid)
if (expected_valid) {
cout << "Unexpected invalid utf-8 at line " << line_count << '\n';
return 1;
}
// try fixing it:
string fixed_line;
replace_invalid(line.begin(), line.end(), back_inserter(fixed_line));
if (!is_valid(fixed_line.begin(), fixed_line.end()))
if (!is_valid(fixed_line.begin(), fixed_line.end())) {
cout << "replace_invalid() resulted in an invalid utf-8 at line " << line_count << '\n';
return 1;
}
}
else if (!expected_valid)
else if (!expected_valid) {
cout << "Invalid utf-8 NOT detected at line " << line_count << '\n';
return 1;
}
}
}

188
tests/test_checked_api.cpp

@ -0,0 +1,188 @@
#include "gtest/gtest.h"
#include "utf8/checked.h"
#include <string>
#include <vector>
using namespace utf8;
using namespace std;
TEST(CheckedAPITests, test_append)
{
unsigned char u[5] = {0,0,0,0,0};
append(0x0448, u);
EXPECT_EQ (u[0], 0xd1);
EXPECT_EQ (u[1], 0x88);
EXPECT_EQ (u[2], 0);
EXPECT_EQ (u[3], 0);
EXPECT_EQ (u[4], 0);
append(0x65e5, u);
EXPECT_EQ (u[0], 0xe6);
EXPECT_EQ (u[1], 0x97);
EXPECT_EQ (u[2], 0xa5);
EXPECT_EQ (u[3], 0);
EXPECT_EQ (u[4], 0);
append(0x3044, u);
EXPECT_EQ (u[0], 0xe3);
EXPECT_EQ (u[1], 0x81);
EXPECT_EQ (u[2], 0x84);
EXPECT_EQ (u[3], 0);
EXPECT_EQ (u[4], 0);
append(0x10346, u);
EXPECT_EQ (u[0], 0xf0);
EXPECT_EQ (u[1], 0x90);
EXPECT_EQ (u[2], 0x8d);
EXPECT_EQ (u[3], 0x86);
EXPECT_EQ (u[4], 0);
}
TEST(CheckedAPITests, test_next)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
const char* w = twochars;
int cp = next(w, twochars + 6);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, twochars + 3);
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
w = threechars;
cp = next(w, threechars + 9);
EXPECT_EQ (cp, 0x10346);
EXPECT_EQ (w, threechars + 4);
cp = next(w, threechars + 9);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, threechars + 7);
cp = next(w, threechars + 9);
EXPECT_EQ (cp, 0x0448);
EXPECT_EQ (w, threechars + 9);
}
TEST(CheckedAPITests, test_peek_next)
{
const char* const cw = "\xe6\x97\xa5\xd1\x88";
int cp = peek_next(cw, cw + 6);
EXPECT_EQ (cp, 0x65e5);
}
TEST(CheckedAPITests, test_prior)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
const char* w = twochars + 3;
int cp = prior (w, twochars);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, twochars);
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
w = threechars + 9;
cp = prior(w, threechars);
EXPECT_EQ (cp, 0x0448);
EXPECT_EQ (w, threechars + 7);
cp = prior(w, threechars);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, threechars + 4);
cp = prior(w, threechars);
EXPECT_EQ (cp, 0x10346);
EXPECT_EQ (w, threechars);
}
TEST(CheckedAPITests, test_advance)
{
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
const char* w = threechars;
advance(w, 2, threechars + 9);
EXPECT_EQ(w, threechars + 7);
advance(w, -2, threechars);
EXPECT_EQ(w, threechars);
advance(w, 3, threechars + 9);
EXPECT_EQ(w, threechars + 9);
advance(w, -2, threechars);
EXPECT_EQ(w, threechars + 4);
advance(w, -1, threechars);
EXPECT_EQ(w, threechars);
}
TEST(CheckedAPITests, test_distance)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
size_t dist = utf8::distance(twochars, twochars + 5);
EXPECT_EQ (dist, 2);
}
TEST(CheckedAPITests, test_utf32to8)
{
int utf32string[] = {0x448, 0x65E5, 0x10346, 0};
string utf8result;
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
EXPECT_EQ (utf8result.size(), 9);
}
TEST(CheckedAPITests, test_utf8to32)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
vector<int> utf32result;
utf8to32(twochars, twochars + 5, back_inserter(utf32result));
EXPECT_EQ (utf32result.size(), 2);
}
TEST(CheckedAPITests, test_utf16to8)
{
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
string utf8result;
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
EXPECT_EQ (utf8result.size(), 10);
}
TEST(CheckedAPITests, test_utf8to16)
{
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
vector <unsigned short> utf16result;
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
EXPECT_EQ (utf16result.size(), 4);
EXPECT_EQ (utf16result[2], 0xd834);
EXPECT_EQ (utf16result[3], 0xdd1e);
}
TEST(CheckedAPITests, test_replace_invalid)
{
char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
vector<char> replace_invalid_result;
replace_invalid (invalid_sequence, invalid_sequence + sizeof(invalid_sequence), std::back_inserter(replace_invalid_result), '?');
bool bvalid = is_valid(replace_invalid_result.begin(), replace_invalid_result.end());
EXPECT_TRUE (bvalid);
const char fixed_invalid_sequence[] = "a????z";
EXPECT_EQ (sizeof(fixed_invalid_sequence), replace_invalid_result.size());
EXPECT_TRUE (std::equal(replace_invalid_result.begin(), replace_invalid_result.begin() + sizeof(fixed_invalid_sequence), fixed_invalid_sequence));
}
TEST(CheckedAPITests, test_find_invalid)
{
char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa";
char* invalid = find_invalid(utf_invalid, utf_invalid + 6);
EXPECT_EQ (invalid, utf_invalid + 5);
}
TEST(CheckedAPITests, test_is_valid)
{
char utf_invalid[] = "\xe6\x97\xa5\xd1\x88\xfa";
bool bvalid = is_valid(utf_invalid, utf_invalid + 6);
EXPECT_FALSE (bvalid);
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
bvalid = is_valid(utf8_with_surrogates, utf8_with_surrogates + 9);
EXPECT_TRUE (bvalid);
}
TEST(CheckedAPITests, test_starts_with_bom)
{
unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf};
bool bbom = starts_with_bom(byte_order_mark, byte_order_mark + sizeof(byte_order_mark));
EXPECT_TRUE (bbom);
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars));
EXPECT_FALSE (no_bbom);
}

31
tests/test_checked_iterator.cpp

@ -0,0 +1,31 @@
#include "gtest/gtest.h"
#include "utf8/checked.h"
using namespace utf8;
TEST(CheckedIteratrTests, test_increment)
{
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
utf8::iterator<const char*> it(threechars, threechars, threechars + 9);
utf8::iterator<const char*> it2 = it;
EXPECT_EQ (it2, it);
EXPECT_EQ (*it, 0x10346);
EXPECT_EQ (*(++it), 0x65e5);
EXPECT_EQ ((*it++), 0x65e5);
EXPECT_EQ (*it, 0x0448);
EXPECT_NE (it, it2);
utf8::iterator<const char*> endit (threechars + 9, threechars, threechars + 9);
EXPECT_EQ (++it, endit);
}
TEST(CheckedIteratrTests, test_decrement)
{
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
utf8::iterator<const char*> it(threechars+9, threechars, threechars + 9);
EXPECT_EQ (*(--it), 0x0448);
EXPECT_EQ ((*it--), 0x0448);
EXPECT_EQ (*it, 0x65e5);
EXPECT_EQ (--it, utf8::iterator<const char*>(threechars, threechars, threechars + 9));
EXPECT_EQ (*it, 0x10346);
}

106
tests/test_cpp11.cpp

@ -0,0 +1,106 @@
#include "gtest/gtest.h"
#include "utf8.h"
#include <string>
using namespace utf8;
using namespace std;
#if __cplusplus >= 201103L // C++ 11 or later
TEST(CPP11APITests, test_append)
{
string u;
append(0x0448, u);
EXPECT_EQ (u[0], char(0xd1));
EXPECT_EQ (u[1], char(0x88));
EXPECT_EQ (u.length(), 2);
u.clear();
append(0x65e5, u);
EXPECT_EQ (u[0], char(0xe6));
EXPECT_EQ (u[1], char(0x97));
EXPECT_EQ (u[2], char(0xa5));
EXPECT_EQ (u.length(), 3);
u.clear();
append(0x3044, u);
EXPECT_EQ (u[0], char(0xe3));
EXPECT_EQ (u[1], char(0x81));
EXPECT_EQ (u[2], char(0x84));
EXPECT_EQ (u.length(), 3);
u.clear();
append(0x10346, u);
EXPECT_EQ (u[0], char(0xf0));
EXPECT_EQ (u[1], char(0x90));
EXPECT_EQ (u[2], char(0x8d));
EXPECT_EQ (u[3], char(0x86));
EXPECT_EQ (u.length(), 4);
}
TEST(CPP11APITests, test_utf16to8)
{
u16string utf16string = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
string u = utf16to8(utf16string);
EXPECT_EQ (u.size(), 10);
}
TEST(CPP11APITests, test_utf8to16)
{
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
u16string utf16result = utf8to16(utf8_with_surrogates);
EXPECT_EQ (utf16result.size(), 4);
EXPECT_EQ (utf16result[2], 0xd834);
EXPECT_EQ (utf16result[3], 0xdd1e);
}
TEST(CPP11APITests, test_utf32to8)
{
u32string utf32string = {0x448, 0x65E5, 0x10346};
string utf8result = utf32to8(utf32string);
EXPECT_EQ (utf8result.size(), 9);
}
TEST(CPP11APITests, test_utf8to32)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
u32string utf32result = utf8to32(twochars);
EXPECT_EQ (utf32result.size(), 2);
}
TEST(CPP11APITests, test_find_invalid)
{
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
auto invalid = find_invalid(utf_invalid);
EXPECT_EQ (invalid, 5);
}
TEST(CPP11APITests, test_is_valid)
{
string utf_invalid = "\xe6\x97\xa5\xd1\x88\xfa";
bool bvalid = is_valid(utf_invalid);
EXPECT_FALSE (bvalid);
string utf8_with_surrogates = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
bvalid = is_valid(utf8_with_surrogates);
EXPECT_TRUE (bvalid);
}
TEST(CPP11APITests, test_replace_invalid)
{
string invalid_sequence = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z";
string replace_invalid_result = replace_invalid(invalid_sequence, '?');
bool bvalid = is_valid(replace_invalid_result);
EXPECT_TRUE (bvalid);
const string fixed_invalid_sequence = "a????z";
EXPECT_EQ(fixed_invalid_sequence, replace_invalid_result);
}
TEST(CPP11APITests, test_starts_with_bom)
{
string byte_order_mark = {char(0xef), char(0xbb), char(0xbf)};
bool bbom = starts_with_bom(byte_order_mark);
EXPECT_TRUE (bbom);
string threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
bool no_bbom = starts_with_bom(threechars);
EXPECT_FALSE (no_bbom);
}
#endif // C++ 11 or later

0
test_data/negative/utf8_invalid.txt → tests/test_data/utf8_invalid.txt

148
tests/test_unchecked_api.cpp

@ -0,0 +1,148 @@
#include "gtest/gtest.h"
#include "utf8/unchecked.h"
#include <string>
#include <vector>
using namespace utf8::unchecked;
using namespace std;
TEST(UnCheckedAPITests, test_append)
{
unsigned char u[5] = {0,0,0,0,0};
append(0x0448, u);
EXPECT_EQ (u[0], 0xd1);
EXPECT_EQ (u[1], 0x88);
EXPECT_EQ (u[2], 0);
EXPECT_EQ (u[3], 0);
EXPECT_EQ (u[4], 0);
append(0x65e5, u);
EXPECT_EQ (u[0], 0xe6);
EXPECT_EQ (u[1], 0x97);
EXPECT_EQ (u[2], 0xa5);
EXPECT_EQ (u[3], 0);
EXPECT_EQ (u[4], 0);
append(0x3044, u);
EXPECT_EQ (u[0], 0xe3);
EXPECT_EQ (u[1], 0x81);
EXPECT_EQ (u[2], 0x84);
EXPECT_EQ (u[3], 0);
EXPECT_EQ (u[4], 0);
append(0x10346, u);
EXPECT_EQ (u[0], 0xf0);
EXPECT_EQ (u[1], 0x90);
EXPECT_EQ (u[2], 0x8d);
EXPECT_EQ (u[3], 0x86);
EXPECT_EQ (u[4], 0);
}
TEST(UnCheckedAPITests, test_next)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
const char* w = twochars;
int cp = utf8::unchecked::next(w);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, twochars + 3);
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
w = threechars;
cp = utf8::unchecked::next(w);
EXPECT_EQ (cp, 0x10346);
EXPECT_EQ (w, threechars + 4);
cp = utf8::unchecked::next(w);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, threechars + 7);
cp = utf8::unchecked::next(w);
EXPECT_EQ (cp, 0x0448);
EXPECT_EQ (w, threechars + 9);
}
TEST(UnCheckedAPITests, test_peek_next)
{
const char* const cw = "\xe6\x97\xa5\xd1\x88";
int cp = peek_next(cw);
EXPECT_EQ (cp, 0x65e5);
}
TEST(UnCheckedAPITests, test_prior)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
const char* w = twochars + 3;
int cp = prior (w);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, twochars);
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
w = threechars + 9;
cp = prior(w);
EXPECT_EQ (cp, 0x0448);
EXPECT_EQ (w, threechars + 7);
cp = prior(w);
EXPECT_EQ (cp, 0x65e5);
EXPECT_EQ (w, threechars + 4);
cp = prior(w);
EXPECT_EQ (cp, 0x10346);
EXPECT_EQ (w, threechars);
}
TEST(UnCheckedAPITests, test_advance)
{
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
const char* w = threechars;
utf8::unchecked::advance(w, 2);
EXPECT_EQ(w, threechars + 7);
utf8::unchecked::advance(w, -2);
EXPECT_EQ(w, threechars);
utf8::unchecked::advance(w, 3);
EXPECT_EQ(w, threechars + 9);
utf8::unchecked::advance(w, -2);
EXPECT_EQ(w, threechars + 4);
utf8::unchecked::advance(w, -1);
EXPECT_EQ(w, threechars);
}
TEST(UnCheckedAPITests, test_distance)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
size_t dist = utf8::unchecked::distance(twochars, twochars + 5);
EXPECT_EQ (dist, 2);
}
TEST(UnCheckedAPITests, test_utf32to8)
{
int utf32string[] = {0x448, 0x65E5, 0x10346, 0};
string utf8result;
utf32to8(utf32string, utf32string + 3, back_inserter(utf8result));
EXPECT_EQ (utf8result.size(), 9);
}
TEST(UnCheckedAPITests, test_utf8to32)
{
const char* twochars = "\xe6\x97\xa5\xd1\x88";
vector<int> utf32result;
utf8to32(twochars, twochars + 5, back_inserter(utf32result));
EXPECT_EQ (utf32result.size(), 2);
}
TEST(UnCheckedAPITests, test_utf16to8)
{
unsigned short utf16string[] = {0x41, 0x0448, 0x65e5, 0xd834, 0xdd1e};
string utf8result;
utf16to8(utf16string, utf16string + 5, back_inserter(utf8result));
EXPECT_EQ (utf8result.size(), 10);
}
TEST(UnCheckedAPITests, test_utf8to16)
{
char utf8_with_surrogates[] = "\xe6\x97\xa5\xd1\x88\xf0\x9d\x84\x9e";
vector <unsigned short> utf16result;
utf8to16(utf8_with_surrogates, utf8_with_surrogates + 9, back_inserter(utf16result));
EXPECT_EQ (utf16result.size(), 4);
EXPECT_EQ (utf16result[2], 0xd834);
EXPECT_EQ (utf16result[3], 0xdd1e);
}

32
tests/test_unchecked_iterator.cpp

@ -0,0 +1,32 @@
#include "gtest/gtest.h"
#include "utf8/unchecked.h"
using namespace utf8::unchecked;
TEST(UnCheckedIteratrTests, test_increment)
{
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
utf8::unchecked::iterator<const char*> it(threechars);
utf8::unchecked::iterator<const char*> it2 = it;
EXPECT_EQ (it2, it);
EXPECT_EQ (*it, 0x10346);
EXPECT_EQ (*(++it), 0x65e5);
EXPECT_EQ ((*it++), 0x65e5);
EXPECT_EQ (*it, 0x0448);
EXPECT_NE (it, it2);
utf8::unchecked::iterator<const char*> endit (threechars + 9);
EXPECT_EQ (++it, endit);
}
TEST(UnCheckedIteratrTests, test_decrement)
{
const char* threechars = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88";
utf8::unchecked::iterator<const char*> it(threechars+9);
EXPECT_EQ (*(--it), 0x0448);
EXPECT_EQ ((*it--), 0x0448);
EXPECT_EQ (*it, 0x65e5);
EXPECT_EQ (--it, utf8::unchecked::iterator<const char*>(threechars));
EXPECT_EQ (*it, 0x10346);
}
Loading…
Cancel
Save