Updated utf8 dependency to upstream v3.2.3

author: Your Name <you@example.com> 2023-01-28 08:32:54 -0500
committer: Your Name <you@example.com> 2023-01-28 08:32:54 -0500
commit: f14824a8d3a18c7759d28885752b7bccc5b6679f (patch)
tree: 7ac7dad4bb23be14bf928299766be48a9be0f09f /src/utf8/core.h
parent: 538b480d12231fb32565949a755f342518d7f0c1 (diff)
download: libbible-f14824a8d3a18c7759d28885752b7bccc5b6679f.tar.gz
libbible-f14824a8d3a18c7759d28885752b7bccc5b6679f.tar.bz2
libbible-f14824a8d3a18c7759d28885752b7bccc5b6679f.zip
1 files changed, 75 insertions, 17 deletions
diff --git a/src/utf8/core.h b/src/utf8/core.h
index 693d388..34371ee 100644
--- a/src/utf8/core.h
+++ b/src/utf8/core.h
@@ -30,6 +30,23 @@ DEALINGS IN THE SOFTWARE.
 
 #include <iterator>
 
+// Determine the C++ standard version.
+// If the user defines UTF_CPP_CPLUSPLUS, use that.
+// Otherwise, trust the unreliable predefined macro __cplusplus
+
+#if !defined UTF_CPP_CPLUSPLUS
+    #define UTF_CPP_CPLUSPLUS __cplusplus
+#endif
+
+#if UTF_CPP_CPLUSPLUS >= 201103L // C++ 11 or later
+    #define UTF_CPP_OVERRIDE override
+    #define UTF_CPP_NOEXCEPT noexcept
+#else // C++ 98/03
+    #define UTF_CPP_OVERRIDE
+    #define UTF_CPP_NOEXCEPT throw()
+#endif // C++ 11 or later
+
+
 namespace utf8
 {
     // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
@@ -49,8 +66,8 @@ namespace internal
     const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
     const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
     const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
-    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
-    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+    const uint16_t LEAD_OFFSET         = 0xd7c0u;       // LEAD_SURROGATE_MIN - (0x10000 >> 10)
+    const uint32_t SURROGATE_OFFSET    = 0xfca02400u;   // 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN
 
     // Maximum valid value for a Unicode code point
     const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
@@ -142,7 +159,7 @@ namespace internal
 
         if (!utf8::internal::is_trail(*it))
             return INCOMPLETE_SEQUENCE;
-        
+
         return UTF8_OK;
     }
 
@@ -165,7 +182,7 @@ namespace internal
     {
         if (it == end) 
             return NOT_ENOUGH_ROOM;
-        
+
         code_point = utf8::internal::mask8(*it);
 
         UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
@@ -222,6 +239,9 @@ namespace internal
     template <typename octet_iterator>
     utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
     {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
         // Save the original value of it so we can go back in case of failure
         // Of course, it does not make much sense with i.e. stream iterators
         octet_iterator original_it = it;
@@ -234,7 +254,7 @@ namespace internal
         // Get trail octets and calculate the code point
         utf_error err = UTF8_OK;
         switch (length) {
-            case 0: 
+            case 0:
                 return INVALID_LEAD;
             case 1:
                 err = utf8::internal::get_sequence_1(it, end, cp);
@@ -277,6 +297,55 @@ namespace internal
         return utf8::internal::validate_next(it, end, ignored);
     }
 
+    // Internal implementation of both checked and unchecked append() function
+    // This function will be invoked by the overloads below, as they will know
+    // the octet_type.
+    template <typename octet_iterator, typename octet_type>
+    octet_iterator append(uint32_t cp, octet_iterator result) {
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<octet_type>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<octet_type>((cp >> 6)          | 0xc0);
+            *(result++) = static_cast<octet_type>((cp & 0x3f)        | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<octet_type>((cp >> 12)         | 0xe0);
+            *(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
+            *(result++) = static_cast<octet_type>((cp & 0x3f)        | 0x80);
+        }
+        else {                                // four octets
+            *(result++) = static_cast<octet_type>((cp >> 18)         | 0xf0);
+            *(result++) = static_cast<octet_type>(((cp >> 12) & 0x3f)| 0x80);
+            *(result++) = static_cast<octet_type>(((cp >> 6) & 0x3f) | 0x80);
+            *(result++) = static_cast<octet_type>((cp & 0x3f)        | 0x80);
+        }
+        return result;
+    }
+    
+    // One of the following overloads will be invoked from the API calls
+
+    // A simple (but dangerous) case: the caller appends byte(s) to a char array
+    inline char* append(uint32_t cp, char* result) {
+        return append<char*, char>(cp, result);
+    }
+
+    // Hopefully, most common case: the caller uses back_inserter
+    // i.e. append(cp, std::back_inserter(str));
+    template<typename container_type>
+    std::back_insert_iterator<container_type> append
+            (uint32_t cp, std::back_insert_iterator<container_type> result) {
+        return append<std::back_insert_iterator<container_type>,
+            typename container_type::value_type>(cp, result);
+    }
+
+    // The caller uses some other kind of output operator - not covered above
+    // Note that in this case we are not able to determine octet_type
+    // so we assume it's uint_8; that can cause a conversion warning if we are wrong.
+    template <typename octet_iterator>
+    octet_iterator append(uint32_t cp, octet_iterator result) {
+        return append<octet_iterator, uint8_t>(cp, result);
+    }
+
 } // namespace internal
 
     /// The library API - functions intended to be called by the users
@@ -310,18 +379,7 @@ namespace internal
             ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
             ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
            );
-    }
-	
-    //Deprecated in release 2.3 
-    template <typename octet_iterator>
-    inline bool is_bom (octet_iterator it)
-    {
-        return (
-            (utf8::internal::mask8(*it++)) == bom[0] &&
-            (utf8::internal::mask8(*it++)) == bom[1] &&
-            (utf8::internal::mask8(*it))   == bom[2]
-           );
-    }
+    }	
 } // namespace utf8
 
 #endif // header guard
author	Your Name <you@example.com>	2023-01-28 08:32:54 -0500
committer	Your Name <you@example.com>	2023-01-28 08:32:54 -0500
commit	f14824a8d3a18c7759d28885752b7bccc5b6679f (patch)
tree	7ac7dad4bb23be14bf928299766be48a9be0f09f /src/utf8/core.h
parent	538b480d12231fb32565949a755f342518d7f0c1 (diff)
download	libbible-f14824a8d3a18c7759d28885752b7bccc5b6679f.tar.gz libbible-f14824a8d3a18c7759d28885752b7bccc5b6679f.tar.bz2 libbible-f14824a8d3a18c7759d28885752b7bccc5b6679f.zip