diff --git a/README.md b/README.md index 1a1effa..c1bd92f 100644 --- a/README.md +++ b/README.md @@ -308,39 +308,6 @@ In case `start` is reached before a UTF-8 lead octet is hit, or if an invalid UT In case `start` equals `it`, a `not_enough_room` exception is thrown. -#### utf8::previous - -Deprecated in version 1.02 and later. - -Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it decreases the iterator until it hits the beginning of the previous UTF-8 encoded code point and returns the 32 bits representation of the code point. - -```cpp -template -uint32_t previous(octet_iterator& it, octet_iterator pass_start); -``` - -`octet_iterator`: a random access iterator. -`it`: a reference pointing to an octet within a UTF-8 encoded string. After the function returns, it is decremented to point to the beginning of the previous code point. -`pass_start`: an iterator to the point in the sequence where the search for the beginning of a code point is aborted if no result was reached. It is a safety measure to prevent passing the beginning of the string in the search for a UTF-8 lead octet. -Return value: the 32 bit representation of the previous code point. - -Example of use: - -```cpp -char* twochars = "\xe6\x97\xa5\xd1\x88"; -unsigned char* w = twochars + 3; -int cp = previous (w, twochars - 1); -assert (cp == 0x65e5); -assert (w == twochars); -``` - - -`utf8::previous` is deprecated, and `utf8::prior` should be used instead, although the existing code can continue using this function. The problem is the parameter `pass_start` that points to the position just before the beginning of the sequence. Standard containers don't have the concept of "pass start" and the function can not be used with their iterators. - -`it` will typically point to the beginning of a code point, and `pass_start` will point to the octet just before the beginning of the string to ensure we don't go backwards too far. `it` is decreased until it points to a lead UTF-8 octet, and then the UTF-8 sequence beginning with that octet is decoded to a 32 bit representation and returned. - -In case `pass_start` is reached before a UTF-8 lead octet is hit, or if an invalid UTF-8 sequence is started by the lead octet, an `invalid_utf8` exception is thrown - #### utf8::advance Available in version 1.0 and later. @@ -801,7 +768,7 @@ The typical use of this function is to check the first three bytes of a file. If #### utf8::starts_with_bom -Available in version 2.3 and later. Replaces deprecated `is_bom()` function. +Available in version 2.3 and later. Checks whether an octet sequence starts with a UTF-8 byte order mark (BOM) @@ -825,33 +792,6 @@ assert (bbom == true); The typical use of this function is to check the first three bytes of a file. If they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8 encoded text. -#### utf8::is_bom - -Available in version 1.0 and later. Deprecated in version 2.3\. `starts_with_bom()` should be used instead. - -Checks whether a sequence of three octets is a UTF-8 byte order mark (BOM) - -```cpp -template -bool is_bom (octet_iterator it); // Deprecated -``` - -`octet_iterator`: an input iterator. -`it`: beginning of the 3-octet sequence to check -Return value: `true` if the sequence is UTF-8 byte order mark; `false` if not. - -Example of use: - -```cpp -unsigned char byte_order_mark[] = {0xef, 0xbb, 0xbf}; -bool bbom = is_bom(byte_order_mark); -assert (bbom == true); -``` - -The typical use of this function is to check the first three bytes of a file. If they form the UTF-8 BOM, we want to skip them before processing the actual UTF-8 encoded text. - -If a sequence is shorter than three bytes, an invalid iterator will be dereferenced. Therefore, this function is deprecated in favor of `starts_with_bom()`that takes the end of sequence as an argument. - ### Types From utf8 Namespace #### utf8::exception @@ -1098,34 +1038,6 @@ assert (w == twochars); This is a faster but less safe version of `utf8::prior`. It does not check for validity of the supplied UTF-8 sequence and offers no boundary checking. -#### utf8::unchecked::previous (deprecated, see utf8::unchecked::prior) - -Deprecated in version 1.02 and later. - -Given a reference to an iterator pointing to an octet in a UTF-8 seqence, it decreases the iterator until it hits the beginning of the previous UTF-8 encoded code point and returns the 32 bits representation of the code point. - -```cpp -template -uint32_t previous(octet_iterator& it); -``` - -`it`: a reference pointing to an octet within a UTF-8 encoded string. After the function returns, it is decremented to point to the beginning of the previous code point. -Return value: the 32 bit representation of the previous code point. - -Example of use: - -```cpp -char* twochars = "\xe6\x97\xa5\xd1\x88"; -char* w = twochars + 3; -int cp = unchecked::previous (w); -assert (cp == 0x65e5); -assert (w == twochars); -``` - -The reason this function is deprecated is just the consistency with the "checked" versions, where `prior` should be used instead of `previous`. In fact, `unchecked::previous` behaves exactly the same as `unchecked::prior` - -This is a faster but less safe version of `utf8::previous`. It does not check for validity of the supplied UTF-8 sequence and offers no boundary checking. - #### utf8::unchecked::advance Available in version 1.0 and later. diff --git a/source/utf8/checked.h b/source/utf8/checked.h index 2aef583..50d4812 100644 --- a/source/utf8/checked.h +++ b/source/utf8/checked.h @@ -174,18 +174,6 @@ namespace utf8 return utf8::peek_next(it, end); } - /// Deprecated in versions that include "prior" - template - uint32_t previous(octet_iterator& it, octet_iterator pass_start) - { - octet_iterator end = it; - while (utf8::internal::is_trail(*(--it))) - if (it == pass_start) - throw invalid_utf8(*it); // error - no lead byte in the sequence - octet_iterator temp = it; - return utf8::next(temp, end); - } - template void advance (octet_iterator& it, distance_type n, octet_iterator end) { diff --git a/source/utf8/core.h b/source/utf8/core.h index 6fdb3ec..1ebd099 100644 --- a/source/utf8/core.h +++ b/source/utf8/core.h @@ -313,18 +313,7 @@ namespace internal ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) ); - } - - //Deprecated in release 2.3 - template - inline bool is_bom (octet_iterator it) - { - return ( - (utf8::internal::mask8(*it++)) == bom[0] && - (utf8::internal::mask8(*it++)) == bom[1] && - (utf8::internal::mask8(*it)) == bom[2] - ); - } + } } // namespace utf8 #endif // header guard diff --git a/source/utf8/unchecked.h b/source/utf8/unchecked.h index cb24271..3d36183 100644 --- a/source/utf8/unchecked.h +++ b/source/utf8/unchecked.h @@ -102,13 +102,6 @@ namespace utf8 return utf8::unchecked::next(temp); } - // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) - template - inline uint32_t previous(octet_iterator& it) - { - return utf8::unchecked::prior(it); - } - template void advance (octet_iterator& it, distance_type n) { diff --git a/test_drivers/smoke_test/test.cpp b/test_drivers/smoke_test/test.cpp index 4f9fb04..ce0480b 100644 --- a/test_drivers/smoke_test/test.cpp +++ b/test_drivers/smoke_test/test.cpp @@ -65,23 +65,6 @@ int main() assert (cp == 0x10346); assert (w == threechars); - //previous (deprecated) - w = twochars + 3; - cp = previous (w, twochars - 1); - assert (cp == 0x65e5); - assert (w == twochars); - - w = threechars + 9; - cp = previous(w, threechars - 1); - assert (cp == 0x0448); - assert (w == threechars + 7); - cp = previous(w, threechars -1); - assert (cp == 0x65e5); - assert (w == threechars + 4); - cp = previous(w, threechars - 1); - assert (cp == 0x10346); - assert (w == threechars); - // advance w = twochars; advance (w, 2, twochars + 6); @@ -145,11 +128,6 @@ int main() assert (bbom == true); bool no_bbom = starts_with_bom(threechars, threechars + sizeof(threechars)); assert (no_bbom == false); - - //is_bom - bool unsafe_bbom = is_bom(byte_order_mark); - assert (unsafe_bbom == true); - //replace_invalid char invalid_sequence[] = "a\x80\xe0\xa0\xc0\xaf\xed\xa0\x80z"; @@ -215,25 +193,6 @@ int main() assert (cp == 0x65e5); assert (cw == twochars); - - //previous (calls prior internally) - - w = twochars + 3; - cp = unchecked::previous (w); - assert (cp == 0x65e5); - assert (w == twochars); - - w = threechars + 9; - cp = unchecked::previous(w); - assert (cp == 0x0448); - assert (w == threechars + 7); - cp = unchecked::previous(w); - assert (cp == 0x65e5); - assert (w == threechars + 4); - cp = unchecked::previous(w); - assert (cp == 0x10346); - assert (w == threechars); - // advance w = twochars; unchecked::advance (w, 2); diff --git a/test_drivers/utf8reader/utf8reader.cpp b/test_drivers/utf8reader/utf8reader.cpp index c88a5ee..c104763 100644 --- a/test_drivers/utf8reader/utf8reader.cpp +++ b/test_drivers/utf8reader/utf8reader.cpp @@ -76,11 +76,11 @@ int main(int argc, char** argv) cout << "Line " << line_count << ": Error in distance function" << '\n'; while (it != line_start) { - previous(it, line.rend().base()); + prior(it, line.rend().base()); char_count--; } if (char_count != 0) - cout << "Line " << line_count << ": Error in iterating with previous - wrong number of characters" << '\n'; + cout << "Line " << line_count << ": Error in iterating with prior - wrong number of characters" << '\n'; // Try utf8::iterator utf8::iterator u8it(line_start, line_start, line_end); @@ -140,11 +140,11 @@ int main(int argc, char** argv) cout << "Line " << line_count << ": Error in unchecked::distance function" << '\n'; while (it != line_start) { - unchecked::previous(it); + unchecked::prior(it); char_count--; } if (char_count != 0) - cout << "Line " << line_count << ": Error in iterating with unchecked::previous - wrong number of characters" << '\n'; + cout << "Line " << line_count << ": Error in iterating with unchecked::prior - wrong number of characters" << '\n'; // Try utf8::unchecked::iterator utf8::unchecked::iterator un_u8it(line_start);