diff options
Diffstat (limited to 'src/utf8/checked.h')
-rw-r--r-- | src/utf8/checked.h | 86 |
1 files changed, 39 insertions, 47 deletions
diff --git a/src/utf8/checked.h b/src/utf8/checked.h index 1331155..512dcc2 100644 --- a/src/utf8/checked.h +++ b/src/utf8/checked.h @@ -1,4 +1,4 @@ -// Copyright 2006 Nemanja Trifunovic +// Copyright 2006-2016 Nemanja Trifunovic /* Permission is hereby granted, free of charge, to any person or organization @@ -41,8 +41,8 @@ namespace utf8 class invalid_code_point : public exception { uint32_t cp; public: - invalid_code_point(uint32_t cp) : cp(cp) {} - virtual const char* what() const throw() { return "Invalid code point"; } + invalid_code_point(uint32_t codepoint) : cp(codepoint) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid code point"; } uint32_t code_point() const {return cp;} }; @@ -50,7 +50,8 @@ namespace utf8 uint8_t u8; public: invalid_utf8 (uint8_t u) : u8(u) {} - virtual const char* what() const throw() { return "Invalid UTF-8"; } + invalid_utf8 (char c) : u8(static_cast<uint8_t>(c)) {} + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-8"; } uint8_t utf8_octet() const {return u8;} }; @@ -58,13 +59,13 @@ namespace utf8 uint16_t u16; public: invalid_utf16 (uint16_t u) : u16(u) {} - virtual const char* what() const throw() { return "Invalid UTF-16"; } + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Invalid UTF-16"; } uint16_t utf16_word() const {return u16;} }; class not_enough_room : public exception { public: - virtual const char* what() const throw() { return "Not enough space"; } + virtual const char* what() const UTF_CPP_NOEXCEPT UTF_CPP_OVERRIDE { return "Not enough space"; } }; /// The library API - functions intended to be called by the users @@ -75,24 +76,7 @@ namespace utf8 if (!utf8::internal::is_code_point_valid(cp)) throw invalid_code_point(cp); - if (cp < 0x80) // one octet - *(result++) = static_cast<uint8_t>(cp); - else if (cp < 0x800) { // two octets - *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0); - *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); - } - else if (cp < 0x10000) { // three octets - *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0); - *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); - } - else { // four octets - *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0); - *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80); - *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80); - } - return result; + return internal::append(cp, result); } template <typename octet_iterator, typename output_iterator> @@ -107,7 +91,9 @@ namespace utf8 *out++ = *it; break; case internal::NOT_ENOUGH_ROOM: - throw not_enough_room(); + out = utf8::append (replacement, out); + start = end; + break; case internal::INVALID_LEAD: out = utf8::append (replacement, out); ++start; @@ -146,7 +132,7 @@ namespace utf8 case internal::INVALID_LEAD : case internal::INCOMPLETE_SEQUENCE : case internal::OVERLONG_SEQUENCE : - throw invalid_utf8(*it); + throw invalid_utf8(static_cast<uint8_t>(*it)); case internal::INVALID_CODE_POINT : throw invalid_code_point(cp); } @@ -174,23 +160,19 @@ namespace utf8 return utf8::peek_next(it, end); } - /// Deprecated in versions that include "prior" - template <typename octet_iterator> - uint32_t previous(octet_iterator& it, octet_iterator pass_start) - { - octet_iterator end = it; - while (utf8::internal::is_trail(*(--it))) - if (it == pass_start) - throw invalid_utf8(*it); // error - no lead byte in the sequence - octet_iterator temp = it; - return utf8::next(temp, end); - } - template <typename octet_iterator, typename distance_type> void advance (octet_iterator& it, distance_type n, octet_iterator end) { - for (distance_type i = 0; i < n; ++i) - utf8::next(it, end); + const distance_type zero(0); + if (n < zero) { + // backward + for (distance_type i = n; i < zero; ++i) + utf8::prior(it, end); + } else { + // forward + for (distance_type i = zero; i < n; ++i) + utf8::next(it, end); + } } template <typename octet_iterator> @@ -233,7 +215,7 @@ namespace utf8 template <typename u16bit_iterator, typename octet_iterator> u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) { - while (start != end) { + while (start < end) { uint32_t cp = utf8::next(start, end); if (cp > 0xffff) { //make a surrogate pair *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET); @@ -257,7 +239,7 @@ namespace utf8 template <typename octet_iterator, typename u32bit_iterator> u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) { - while (start != end) + while (start < end) (*result++) = utf8::next(start, end); return result; @@ -265,16 +247,21 @@ namespace utf8 // The iterator class template <typename octet_iterator> - class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { + class iterator { octet_iterator it; octet_iterator range_start; octet_iterator range_end; public: + typedef uint32_t value_type; + typedef uint32_t* pointer; + typedef uint32_t& reference; + typedef std::ptrdiff_t difference_type; + typedef std::bidirectional_iterator_tag iterator_category; iterator () {} explicit iterator (const octet_iterator& octet_it, - const octet_iterator& range_start, - const octet_iterator& range_end) : - it(octet_it), range_start(range_start), range_end(range_end) + const octet_iterator& rangestart, + const octet_iterator& rangeend) : + it(octet_it), range_start(rangestart), range_end(rangeend) { if (it < range_start || it > range_end) throw std::out_of_range("Invalid utf-8 iterator position"); @@ -322,6 +309,11 @@ namespace utf8 } // namespace utf8 -#endif //header guard +#if UTF_CPP_CPLUSPLUS >= 201703L // C++ 17 or later +#include "cpp17.h" +#elif UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later +#include "cpp11.h" +#endif // C++ 11 or later +#endif //header guard |