From 55d58a16e2511741cc625e203205dec86144faf3 Mon Sep 17 00:00:00 2001
From: Your Name <you@example.com>
Date: Fri, 18 Feb 2022 20:35:38 -0500
Subject: Reorganized repository layout

---
 src/bible.cc             | 329 +++++++++++++++++++++++++++++++++++++++++++++++
 src/lib/libbible.cc      | 262 +++++++++++++++++++++++++++++++++++++
 src/lib/libbible.h       | 115 +++++++++++++++++
 src/lib/mods.cc          | 233 +++++++++++++++++++++++++++++++++
 src/lib/settings.cc      |  23 ++++
 src/test/Makefile        |  20 +++
 src/test/modules/JPS.zip | Bin 0 -> 1170889 bytes
 src/test/modules/KJV.zip | Bin 0 -> 4061008 bytes
 src/test/testLibbible.cc | 265 ++++++++++++++++++++++++++++++++++++++
 src/utf8.h               |  34 +++++
 src/utf8/checked.h       | 327 ++++++++++++++++++++++++++++++++++++++++++++++
 src/utf8/core.h          | 329 +++++++++++++++++++++++++++++++++++++++++++++++
 src/utf8/unchecked.h     | 228 ++++++++++++++++++++++++++++++++
 13 files changed, 2165 insertions(+)
 create mode 100644 src/bible.cc
 create mode 100644 src/lib/libbible.cc
 create mode 100644 src/lib/libbible.h
 create mode 100644 src/lib/mods.cc
 create mode 100644 src/lib/settings.cc
 create mode 100644 src/test/Makefile
 create mode 100644 src/test/modules/JPS.zip
 create mode 100644 src/test/modules/KJV.zip
 create mode 100644 src/test/testLibbible.cc
 create mode 100644 src/utf8.h
 create mode 100644 src/utf8/checked.h
 create mode 100644 src/utf8/core.h
 create mode 100644 src/utf8/unchecked.h

(limited to 'src')
diff --git a/src/bible.cc b/src/bible.cc
new file mode 100644
index 0000000..a09c0c0
--- /dev/null
+++ b/src/bible.cc
@@ -0,0 +1,329 @@
+#include "lib/libbible.h"
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <iostream>
+#include "utf8.h"
+
+using namespace std;
+
+void usage() {
+    printf("\nUsage:\n bible [options] [reference]\n\n");
+    printf("Print bible passages.\n\n");
+    printf("Options:\n");
+    printf(" -h, --help                         display this help message\n");
+    printf("     --list-modules                 list all installed modules\n");
+    printf(" -m, --module <mod>                 use specified module\n");
+    printf("     --set-default-module <mod>     use specified module by default in future runs\n");
+    printf("     --list-books                   list books available in the current module\n");
+    printf("     --list-chapters <book>         list chapters available in book in the current module\n");
+    printf(" -o, --omit-verse-numbers           when printing verse text, skip printing verse and chapter numbers\n");
+    printf("     --list-installable=<lang>      list bible versions available for download and install. Default lists for all languages.\n");
+    printf("     --install-network <mod>        install module from the network where <mod> is LANG:NAME as provided by --list-installable\n");
+    printf("     --install-zip <path>           install module from a zip file\n");
+    printf("     --remove-module <mod>          delete a module from the system\n");
+    printf("\n\nExamples:\n bible Gal 5:22-23\n");
+    printf(" bible John 3:16\n bible Romans 12\n bible Matt 5:3-7:27\n");
+    printf(" bible Genesis 1-3\n");
+}
+
+string getDefaultModule() {
+    return libbible::settingsRead("module");
+}
+
+void listModules() {
+    map<string, vector<string>> mods = libbible::getModules();
+    string defaultMod = getDefaultModule();
+    printf("Modules Installed:\n");
+    for(auto pair : mods) {
+        if(pair.first == defaultMod) {
+            printf(" %s (default)\n", pair.first.c_str());
+        } else {
+            printf(" %s\n", pair.first.c_str());
+        }
+    }
+}
+
+void setDefaultModule(string modname) {
+    libbible::settingsWrite("module", modname);
+}
+
+void listBooks(string modname) {
+    map<string, vector<string>> mods = libbible::getModules();
+    if(mods.find(modname) == mods.end()) {
+        printf("ERROR: Module \"%s\" not installed!\n", modname.c_str());
+    } else {
+        printf("Books in Module %s:\n", modname.c_str());
+        for(string book : mods[modname]) {
+            printf(" %s\n", book.c_str());
+        }
+    }
+}
+
+void listChapters(string modname, string book) {
+    printf("Valid chapters for book %s in module %s:\n", book.c_str(), modname.c_str());
+    for(auto pass : libbible::getPassages(modname, book)) {
+        printf(" Chapter %d, Verses %d-%d\n", pass.chapterStart, pass.verseStart, pass.verseEnd);
+    }
+}
+
+void listInstallable(string language) {
+    map<string, vector<string>> installable = libbible::downloadModsAvailable();
+    map<string, string> languages = libbible::getLanguageNames();
+    for(auto pair : installable) {
+        if(!language.empty() && language != pair.first) {
+            continue;
+        }
+        printf("For language %s:", pair.first.c_str());
+        if(!languages[pair.first].empty()) {
+            printf(" (%s)", languages[pair.first].c_str());
+        }
+        printf("\n");
+        for(string name : pair.second) {
+            printf(" %s\n", name.c_str());
+        }
+    }
+}
+
+void installNetwork(string mod) {
+    //Split on :
+    if(mod.find(':') == string::npos) {
+        printf("Unable to process module \"%s\": Must contain colon separated language:name\n", mod.c_str());
+        return;
+    }
+    string lang = mod.substr(0, mod.find(':'));
+    string name = mod.substr(mod.find(':')+1);
+    if(libbible::installModFromInternet(lang, name)) {
+        printf("Module installed.\n");
+    } else {
+        printf("Error installing module!\n");
+    }
+}
+
+void installZip(string path) {
+    libbible::installModFromZip(path);
+}
+
+void removeMod(string mod) {
+    libbible::uninstallMod(mod);
+}
+
+void textWrap(istream& in, ostream& out, size_t width) {
+    string word;
+    string line;
+    char cur = '\0';
+    size_t i = 0;
+
+    while(in.get(cur)) {
+        if(isspace(cur)) {
+            word.clear();
+        }
+        if(cur == '\n') {
+            out << line << '\n';
+            line.clear();
+            word.clear();
+            continue;
+        }
+        word += cur;
+        line += cur;
+        // Anything matching \033.*?m doesn't count
+        size_t credits = 0;
+        size_t found = -1;
+        while((found = line.find("\033", found+1)) != string::npos) {
+            size_t first = line.find_first_of("m", found);
+            if(first != string::npos) {
+                credits += first - found + 1;
+            } else {
+                credits += line.size() - found;
+            }
+        }
+        string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
+        i = utf8::distance(line.begin(), end_it) - credits;
+        //printf("Word: %s, i: %ld\n", word.c_str(), i);
+        if(i > width) {
+            word.erase(0, word.find_first_not_of(" "));
+            if(line.find_last_of(" ") != string::npos) {
+                line.erase(line.find_last_of(" "));
+                out << line << '\n';
+            }
+            line = word;
+        }
+    }
+    out << line;
+}
+
+int main(int argc, char* argv[]) {
+    static struct option long_options[] = {
+        {"help", no_argument, 0, 'h'},
+        {"list-modules", no_argument, 0, 0},
+        {"module", required_argument, 0, 'm'},
+        {"set-default-module", required_argument, 0, 0},
+        {"list-books", no_argument, 0, 0},
+        {"list-chapters", required_argument, 0, 0},
+        {"omit-verse-numbers", no_argument, 0, 'o'},
+        {"list-installable", optional_argument, 0, 0},
+        {"install-network", required_argument, 0, 0},
+        {"install-zip", required_argument, 0, 0},
+        {"remove-module", required_argument, 0, 0}
+    };
+    int opt, option_index;
+    string modname;
+    bool omitVerseNums = false;
+    bool doListBooks = false;
+    string listChaptersBook;
+    string option;
+    while ((opt = getopt_long(argc, argv, "hm:o", long_options, &option_index)) != -1) {
+        switch(opt) {
+            case 'h':
+                usage();
+                return 0;
+            case 'm':
+                modname = string(optarg);
+                break;
+            case 'o':
+                omitVerseNums = true;
+                break;
+            case 0:
+                option = string(long_options[option_index].name);
+                if(option == "list-modules") {
+                    listModules();
+                    return 0;
+                } else if(option == "set-default-module") {
+                    setDefaultModule(string(optarg));
+                } else if(option == "list-books") {
+                    doListBooks = true;
+                } else if(option == "list-chapters") {
+                    listChaptersBook = string(optarg);
+                } else if(option == "list-installable") {
+                    if(optarg == nullptr) {
+                        listInstallable(string());
+                    } else {
+                        listInstallable(string(optarg));
+                    }
+                } else if(option == "install-network") {
+                    installNetwork(string(optarg));
+                } else if(option == "install-zip") {
+                    installZip(string(optarg));
+                } else if(option == "remove-module") {
+                    removeMod(string(optarg));
+                }
+                break;
+            default:
+                usage();
+                return 1;
+        }
+    }
+    if(modname.empty()) {
+        modname = getDefaultModule();
+    }
+    if(doListBooks) {
+        listBooks(modname);
+    }
+    if(! listChaptersBook.empty()) {
+        listChapters(modname, listChaptersBook);
+    }
+    string reference;
+    while(optind < argc) {
+        reference += argv[optind++];
+        reference += " ";
+    }
+    if(reference.empty()) {
+        // That's all.
+        return 0;
+    }
+
+    auto text = libbible::getText(libbible::getPassage(modname, reference));
+    int chapter = 0;
+    int verse = 0;
+    const char* indent = "    ";
+    bool isNewline = true;
+    stringstream out;
+    for(auto tex : text) {
+        if(!omitVerseNums && tex.chapter != chapter) {
+            out << tex.book << " Chapter " << tex.chapter << ":\n";
+        }
+        bool isParagraph = false;
+        bool isIndent = false;
+        bool isDivineName = false;
+        bool isJesus = false;
+        bool isTitle = false;
+        bool isParallel = false;
+        bool isPreverse = false;
+        for(string modifier : tex.modifiers) {
+            if(modifier == "paragraph") {
+                isParagraph = true;
+            } else if (modifier == "line indent0") {
+                isIndent = true;
+            } else if (modifier == "divineName") {
+                isDivineName = true;
+            } else if (modifier == "wordsOfJesus") {
+                isJesus = true;
+            } else if (modifier == "title") {
+                isTitle = true;
+            } else if (modifier == "parallel") {
+                isParallel = true;
+            } else if (modifier == "preverse") {
+                isPreverse = true;
+            }
+        }
+        if(isPreverse or isTitle or isParallel) {
+            // Someday maybe we add this, but for now, omit
+            tex.text = "";
+        }
+        if(isIndent) {
+            isParagraph = false;
+            if(isNewline) {
+                out << indent;
+            }
+        }
+        if(isParagraph) {
+            out << indent;
+        }
+        if(isDivineName) {
+            transform(tex.text.begin(), tex.text.end(), tex.text.begin(), ::toupper);
+        }
+        if(isJesus) {
+            out << "\033[;31m";
+        }
+        if(omitVerseNums && tex.verse != verse) {
+            out << " ";
+        } else if(!omitVerseNums && tex.verse != verse) {
+            out << " (" << tex.verse << ") ";
+        }
+        chapter = tex.chapter;
+        verse = tex.verse;
+        out << tex.text;
+        if(tex.text.back() == '\n') {
+            isNewline = true;
+        } else {
+            isNewline = false;
+        }
+        if(isJesus) {
+            out << "\033[0m";
+        }
+    }
+    out << "\n";
+
+    // Get window size
+    struct winsize size;
+    ioctl(STDOUT_FILENO, TIOCGWINSZ, &size);
+    // size.ws_col is number of columns, or 0 if it's a pipe
+    int cols = size.ws_col;
+    // If terminal is too small, treat it like a pipe
+    if(cols < 10) {
+        cols = 0;
+    }
+
+    // Now print
+    if(cols == 0) {
+        cout << out.str();
+    } else {
+        stringstream out2;
+        textWrap(out, out2, cols);
+        cout << out2.str();
+    }
+    return 0;
+}
diff --git a/src/lib/libbible.cc b/src/lib/libbible.cc
new file mode 100644
index 0000000..c9acb7d
--- /dev/null
+++ b/src/lib/libbible.cc
@@ -0,0 +1,262 @@
+#include "libbible.h"
+#include <sword/versekey.h>
+#include <sword/markupfiltmgr.h>
+#include <sword/swmodule.h>
+#include <sword/swmgr.h>
+#include <sword/osisfootnotes.h>
+#include <algorithm>
+
+using namespace sword;
+using namespace std;
+
+SWMgr library(new MarkupFilterMgr(FMT_XHTML));
+OSISFootnotes filter;
+
+vector<string> getBooks(SWModule *target) {
+    vector<string> books;
+    VerseKey *key = (VerseKey *) target->getKey();
+    for(char t = 1; t <= key->getTestamentMax(); t++) {
+        key->setTestament(t);
+        for(char b = 1; b <= key->getBookMax(); b++) {
+            key->setBook(b);
+            // Bug (whose fault??) in JPS; they CLAIM to have two testaments,
+            // but they only have one, which causes repeats.
+            if(std::find(books.begin(), books.end(), key->getBookName()) != books.end()) {
+                continue;
+            }
+            // Another issue (maybe bug?) Some translations are NT only,
+            // but still report OT books/chapters.
+            if(string(target->renderText()).empty()) {
+                continue;
+            }
+            books.push_back(key->getBookName());
+        }
+    }
+    return books;
+}
+
+map<string, vector<string>> libbible::getModules() {
+    library.load();
+    map<string, vector<string>> mods;
+    ModMap::iterator it;
+    for (it = library.getModules().begin(); it != library.getModules().end(); it++) {
+        string modName = (*it).second->getName();
+        SWModule *target = library.getModule(modName.c_str());
+        mods[modName] = getBooks(target);
+    }
+    return mods;
+}
+
+vector<libbible::passage> libbible::getPassages(string modName, string book) {
+    vector<libbible::passage> passages;
+    SWModule *target = library.getModule(modName.c_str());
+    if(target == nullptr) {
+        // Module doesn't exist
+        return passages;
+    }
+    target->setKey((book + " " + "1").c_str());
+    VerseKey *key = (VerseKey *) target->getKey();
+    int maxChapter = key->getChapterMax();
+    for(int chapter = 1; chapter <= maxChapter; chapter++) {
+        string ref = book + ' ' + to_string(chapter);
+        target->setKey(ref.c_str());
+        VerseKey *key = (VerseKey *) target->getKey();
+        libbible::passage pass;
+        pass.modName = modName;
+        pass.book = string(key->getBookName());
+        pass.bookShort = string(key->getBookAbbrev());
+        pass.chapterStart = chapter;
+        pass.chapterEnd = chapter;
+        pass.verseStart = 1;
+        pass.verseEnd = key->getVerseMax();
+        passages.push_back(pass);
+    }
+    return passages;
+}
+
+libbible::text getEmptyText(VerseKey *key) {
+    libbible::text t;
+    t.chapter = key->getChapter();
+    t.verse = key->getVerse();
+    t.book = key->getBookName();
+    t.bookShort = key->getBookAbbrev();
+    return t;
+}
+
+libbible::passage libbible::getPassage(string modName, string reference) {
+    libbible::passage pass;
+    pass.modName = modName;
+    SWModule *target = library.getModule(pass.modName.c_str());
+    if(target == nullptr || reference.empty()) {
+        // Bad input
+        return pass;
+    }
+    vector<string> validBooks = getBooks(target);
+    //printf("Hey, I'm inferring missing parts!\n");
+    // Let's use the target to help us
+    target->setKey(reference.c_str());
+    VerseKey *key = (VerseKey *) target->getKey();
+    pass.book = string(key->getBookName());
+    // Hold on a moment, is this book even legal?
+    if(find(validBooks.begin(), validBooks.end(), pass.book) == validBooks.end()) {
+        key->setBookName(validBooks[0].c_str());
+        pass.book = string(key->getBookName());
+    }
+    pass.bookShort = string(key->getBookAbbrev());
+    pass.chapterStart = key->getChapter();
+    pass.verseStart = key->getVerse();
+    //printf("Results so far: book: %s; chapterStart: %d; verseStart: %d\n", pass.book.c_str(), pass.chapterStart, pass.verseStart);
+    // And now we just need chapterEnd and verseEnd. Yippee.
+    string ref = string(reference);
+    ref.erase(remove(ref.begin(), ref.end(), ' '), ref.end());
+    if(ref.find('-') == string::npos) {
+        // There's no range!
+        if(ref.find(':') == string::npos) {
+            // It's a full chapter reference
+            pass.chapterEnd = pass.chapterStart;
+            pass.verseEnd = key->getVerseMax();
+        } else {
+            // It's a single verse reference
+            pass.chapterEnd = pass.chapterStart;
+            pass.verseEnd = pass.verseStart;
+            //printf("Hey, it's a single verse reference!\n");
+        }
+    } else {
+        if(ref.find(':') == string::npos) {
+            // It's a multi-full-chapter reference
+            pass.chapterEnd = stoi(ref.substr(ref.find_last_of('-')+1));
+            key->setChapter(pass.chapterEnd);
+            pass.verseEnd = key->getVerseMax();
+        } else {
+            // It falls in categories c:v-v or c:v-c:v (or, technically, c-c:v)
+            string rangeEnd = ref.substr(ref.find_last_of('-')+1);
+            if(rangeEnd.find(':') == string::npos) {
+                // It's c:v-v
+                pass.verseEnd = stoi(rangeEnd);
+                pass.chapterEnd = pass.chapterStart;
+            } else {
+                // It's c:v-c:v (or c-c:v, but code is the same)
+                pass.chapterEnd = stoi(rangeEnd.substr(0, rangeEnd.find(':')));
+                pass.verseEnd = stoi(rangeEnd.substr(rangeEnd.find(':')+1));
+            }
+        }
+    }
+    return pass;
+}
+
+vector<libbible::text> libbible::getText(libbible::passage pass) {
+    vector<libbible::text> texts;
+    SWModule *target = library.getModule(pass.modName.c_str());
+    filter.setOptionValue("Off");
+    target->addOptionFilter(&filter);
+    if(target == nullptr) {
+        // Module doesn't exist
+        return texts;
+    }
+    if(pass.book.empty()) {
+        pass.book = pass.bookShort;
+    }
+    target->setKey((pass.book
+                + " " + to_string(pass.chapterStart)
+                + ":" + to_string(pass.verseStart)).c_str());
+    VerseKey *key = (VerseKey *) target->getKey();
+
+    bool endOfParagraph = false;
+
+    string book = string(key->getBookName());
+
+    for(; string(key->getBookName()) == book &&
+            (key->getChapter() < pass.chapterEnd
+            || (key->getChapter() == pass.chapterEnd && key->getVerse() <= pass.verseEnd));
+            (*key)++) {
+
+        string text = string(target->renderText());
+        //printf("Working with: %s\n", text.c_str());
+
+        texts.push_back(getEmptyText(key));
+
+        if(key->getVerse() == 1 || endOfParagraph) {
+            if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), "paragraph") == texts.back().modifiers.end()) {
+                texts.back().modifiers.push_back("paragraph");
+            }
+            endOfParagraph = false;
+        }
+
+        // Variable to accumulate unterminated spans
+        std::vector<std::pair<std::string, std::string>> spans;
+        bool spansChanged = false;
+        bool hasAddedText = false;
+        // Iterate over text
+        for(auto i = text.begin(); i != text.end(); i++) {
+            if(*i != '<') {
+                if(spansChanged) {
+                    spansChanged = false;
+                    if(!texts.back().text.empty()) {
+                        texts.push_back(getEmptyText(key));
+                    }
+                    for(auto& [tag, modifier] : spans) {
+                        if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), modifier) == texts.back().modifiers.end()) {
+                            texts.back().modifiers.push_back(modifier);
+                        }
+                    }
+                }
+                if(*i == '\n') {
+                    continue; // We add newlines with <br />
+                }
+                if(! hasAddedText && (*i == ' ' || *i == '\t')) {
+                    continue;
+                }
+                if(*i == "¶"[0] && i+1 != text.end() && *(i+1) == "¶"[1]) {
+                    i++;
+                    if(hasAddedText) {
+                        texts.back().text += '\n';
+                    } else {
+                        // Append \n to text in previous texts (if applicable)
+                        if(texts.size() > 1) {
+                            texts[texts.size()-2].text += '\n';
+                        }
+                        texts.back().modifiers.push_back("paragraph");
+                        continue;
+                    }
+                }
+                texts.back().text += *i;
+                hasAddedText = true;
+            }
+            else {
+                string span;
+                for(; i != text.end(); i++) {
+                    span.push_back(*i);
+                    if(*i == '>') {
+                        // The end of the span will be "</tag>".
+                        if(span[1] == '/') {
+                            string tag = span.substr(2, span.size()-3);
+                            for(auto rit = spans.rbegin(); rit != spans.rend(); rit++) {
+                                if(rit->first == tag) {
+                                    spans.erase(rit.base()-1);
+                                    spansChanged = true;
+                                    break;
+                                }
+                            }
+                        } else if(span.find("class=\"") != string::npos) {
+                            // The span will be formatted "<tag class=\"NAME\">"
+                            // We want just the NAME
+                            string tag = span.substr(1, span.find(" ")-1);
+                            size_t start = span.find("class=\"")+7;
+                            size_t end = span.find("\"", start);
+                            spans.push_back(std::pair<string, string>(tag, span.substr(start, end-start)));
+                            spansChanged = true;
+                        } else if(span.find("preverse") != string::npos) {
+                            string tag = span.substr(1, span.find(" ")-1);
+                            spans.push_back(std::pair<string, string>(tag, "preverse"));
+                        } else if(span == "<br />" || span == "<br/>") {
+                            texts.back().text += '\n';
+                        }
+                        break;
+                    }
+                }
+            }
+        }
+        endOfParagraph = (text[text.length()-1] == '\n');
+    }
+    return texts;
+}
diff --git a/src/lib/libbible.h b/src/lib/libbible.h
new file mode 100644
index 0000000..f77dc8c
--- /dev/null
+++ b/src/lib/libbible.h
@@ -0,0 +1,115 @@
+#include <string>
+#include <vector>
+#include <map>
+
+namespace libbible {
+
+    struct text {
+        int chapter;
+        int verse;
+        std::string book;
+        std::string bookShort;
+        std::string text;
+        std::vector<std::string> modifiers; // e.g., paragraph, line indent0, divineName, wordsOfJesus
+    };
+
+    struct passage {
+        std::string modName;
+        std::string book;
+        std::string bookShort;
+        int chapterStart;
+        int verseStart;
+        int chapterEnd;
+        int verseEnd;
+    };
+
+    /*
+     * @return Map of modName to supported books
+     */
+    std::map<std::string, std::vector<std::string>> getModules(void);
+
+    /*
+     * @return Vector of valid single full-chapter passages for a book
+     */
+    std::vector<struct passage> getPassages(std::string modName, std::string book);
+
+    /*
+     * @param modName the module to use for determining the passage
+     * @param reference a human-readable reference, e.g., "gen 1:26-27"
+     * @return the passage matching the reference
+     */
+    passage getPassage(std::string modName, std::string reference);
+
+    /*
+     * @return Text for a passage
+     */
+    std::vector<struct text> getText(struct passage pass);
+
+    /**************************
+     * Methods dealing with mods
+     ***************************/
+
+    class Status {
+        public:
+            virtual void update(unsigned long totalBytes, unsigned long completedBytes, std::string message) {}
+    };
+
+    /**
+     * @param status Status update method is called asynchronously as download progresses
+     */
+    void setStatusReporter(Status& status);
+
+    /**
+     * @return A mapping from language to bible version names
+     */
+    std::map<std::string, std::vector<std::string>> downloadModsAvailable();
+
+    /**
+     * @return A mapping from language abbreviations to full language names
+     */
+    std::map<std::string, std::string> getLanguageNames();
+
+    /**
+     * Cancel an in-progress download
+     */
+    void terminateDownload(void);
+
+    /**
+     * @param language The language of the mod to install as provided from downloadModsAvailable
+     * @param name The name of the bible version as provided from downloadModsAvailable
+     * @see downloadModsAvailable()
+     * @return true on success, false otherwise
+     */
+    bool installModFromInternet(std::string language, std::string name);
+
+    /**
+     * @param filename Path to the .zip compressed module to be installed
+     * @return true on success, false otherwise
+     */
+    bool installModFromZip(std::string filename);
+
+    /**
+     * @param modname The name of the module to be removed
+     */
+    void uninstallMod(std::string modname);
+
+    /******************************
+     * Methods dealing with settings
+     *******************************/
+
+    /*
+     * From already established code, valid and useful values are:
+     *    int fontsize: the last used size of the font
+     *    string passage: the last looked-up passage
+     *    string module: the last used module
+     */
+
+    void settingsWrite(std::string key, std::string value);
+
+    std::string settingsRead(std::string key);
+
+    void settingsWriteInt(std::string key, int value);
+
+    int settingsReadInt(std::string key);
+
+}
diff --git a/src/lib/mods.cc b/src/lib/mods.cc
new file mode 100644
index 0000000..ab54e48
--- /dev/null
+++ b/src/lib/mods.cc
@@ -0,0 +1,233 @@
+#include "libbible.h"
+#include <sword/swmgr.h>
+#include <sword/swmodule.h>
+#include <sword/installmgr.h>
+#include <sword/filemgr.h>
+#include <sword/remotetrans.h>
+#include <unzip.h>
+#include <filesystem>
+
+using namespace std;
+
+class myStatusReporter : public sword::StatusReporter {
+    public:
+        myStatusReporter(libbible::Status *status);
+        ~myStatusReporter();
+        void preStatus(long totalBytes, long completedBytes, const char *message);
+        void update(unsigned long totalBytes, unsigned long completedBytes);
+    protected:
+        libbible::Status *status;
+        string message;
+};
+
+myStatusReporter::myStatusReporter(libbible::Status *s) {
+    status = s;
+}
+
+myStatusReporter::~myStatusReporter() {};
+
+//virtual void libbible::Status::update(unsigned long totalBytes, unsigned long completedBytes, string message) {}
+
+void myStatusReporter::preStatus(long totalBytes, long completedBytes, const char *msg) {
+    message = string(msg);
+    status->update((unsigned long) totalBytes, (unsigned long) completedBytes, message);
+    //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str());
+}
+
+void myStatusReporter::update(unsigned long totalBytes, unsigned long completedBytes) {
+    status->update(totalBytes, completedBytes, message);
+    //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str());
+}
+
+string basedir = (getenv("HOME")) + string("/.sword/");
+sword::InstallMgr *installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), nullptr);
+map<string, vector<pair<string, sword::InstallSource *>>> installSources;
+map<string, string> languageNames; // maps abbreviation to full name
+
+void libbible::setStatusReporter(libbible::Status& status) {
+    myStatusReporter *msr = new myStatusReporter(&status);
+    free(installMgr);
+    installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), msr);
+    installMgr->setUserDisclaimerConfirmed(true);
+}
+
+map<string, vector<string>> libbible::downloadModsAvailable() {
+    installSources.clear();
+    languageNames.clear();
+    mkdir((basedir + std::string("mods.d/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+    mkdir((basedir + std::string("modules/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+    installMgr->setUserDisclaimerConfirmed(true);
+    string confpath = basedir + string("InstallMgr/InstallMgr.conf");
+    if(! sword::FileMgr::existsFile(confpath.c_str())) {
+        // Lifted directly from xiphos
+        sword::FileMgr::createParent(confpath.c_str());
+        sword::SWConfig config(confpath.c_str());
+        sword::InstallSource is("FTP");
+        is.caption = "CrossWire";
+        is.source = "ftp.crosswire.org";
+        is.directory = "/pub/sword/raw";
+        config["General"]["PassiveFTP"] = "true";
+        config["Sources"]["FTPSource"] = is.getConfEnt();
+        config.save();
+        installMgr->refreshRemoteSourceConfiguration();
+    }
+    installMgr->readInstallConf();
+    map<string, vector<string>> modsAvailable;
+    map<string, vector<string>> languagesToFull;
+    //printf("Getting langs...\n");
+    for(auto src : installMgr->sources) {
+        if(src.second->getMgr()->Modules.empty()) {
+            //printf("Refreshing remote source: %s\n", src.second->getConfEnt().c_str());
+            installMgr->refreshRemoteSource(src.second);
+        }
+        for(auto mod : src.second->getMgr()->Modules) {
+            auto *curMod = mod.second;
+            string type(curMod->getType());
+            if(type == "Biblical Texts") {
+                string language(curMod->getLanguage());
+                string fullLang;
+                if(curMod->getConfigEntry("LCSH")) {
+                    // Split on periods, last field, strip
+                    fullLang = string(curMod->getConfigEntry("LCSH"));
+                    // If ends with ., remove
+                    if(fullLang.ends_with('.')) fullLang = fullLang.substr(0, fullLang.size()-1);
+                    if(fullLang.find('.') != string::npos) fullLang = fullLang.substr(fullLang.find_last_of('.')+1);
+                    while(fullLang.starts_with(' ')) fullLang = fullLang.substr(1);
+                    while(fullLang.ends_with(' ')) fullLang = fullLang.substr(0, fullLang.size()-1);
+                }
+                vector<string> newLangs;
+                languagesToFull.emplace(language, newLangs);
+                languagesToFull[language].push_back(fullLang);
+                vector<string> newMods;
+                vector<pair<string, sword::InstallSource *>> newSources;
+                // emplace only adds if key is unique
+                modsAvailable.emplace(language, newMods);
+                installSources.emplace(language, newSources);
+                modsAvailable[language].push_back(string(curMod->getName()));
+                pair<string, sword::InstallSource *> p(string(curMod->getName()), src.second);
+                installSources[language].push_back(p);
+            }
+        }
+    }
+    // Now use majority voting to move languagesToFull -> languageNames
+    for(const auto& [abbrev, fulls] : languagesToFull) {
+        std::map<string, int> majVote;
+        for(auto full : fulls) {
+            majVote.try_emplace(full, 0);
+            majVote[full]++;
+        }
+        string selected = fulls[0];
+        for(auto full : fulls) {
+            if(majVote[full] > majVote[selected] or (majVote[full] == majVote[selected] and !full.empty() and full.size() < selected.size())) {
+                selected = full;
+            }
+        }
+        if(selected.empty()) languageNames[abbrev] = abbrev;
+        else languageNames[abbrev] = selected;
+    }
+    return modsAvailable;
+}
+
+std::map<std::string, std::string> libbible::getLanguageNames() {
+    if(languageNames.empty()) {
+        downloadModsAvailable();
+    }
+    return languageNames;
+}
+
+void libbible::terminateDownload() {
+    installMgr->terminate();
+}
+
+bool libbible::installModFromInternet(string language, string name) {
+    // Searching through map<string, vector<pair<string, sword::InstallSource *>>> installSources;
+    if(installSources.empty()) {
+        downloadModsAvailable();
+    }
+    for (pair<string, sword::InstallSource *> p : installSources[language]) {
+        if(p.first == name) {
+            sword::SWMgr mgr(basedir.c_str());
+            if(installMgr->installModule(&mgr, 0, name.c_str(), p.second) == 0) {
+                printf("Installed from %s\n", p.second->getConfEnt().c_str());
+                return true;
+            }
+            return false;
+        }
+    }
+    return false;
+}
+
+#define READ_SIZE 8192
+#define delim '/'
+
+bool libbible::installModFromZip(string filename) {
+    // So... turns out it's a mite unsupported to install from a .zip
+    // Here's the deal. We do a syscall to unzip. We fancy like that.
+    // TODO: Use the ZipCompress module from SWORD instead.
+    /*string command = "unzip -o " + filename + " -d " + basedir + "&> /dev/null";
+      if(system(command.c_str())) {
+    //Uh oh...
+    printf("Something bad happened when unpacking %s\n. Is unzip installed?", filename.c_str());
+    }*/
+    unzFile zipfile = unzOpen(filename.c_str());
+    if(zipfile == NULL) {
+        return false;
+    }
+    unz_global_info global_info;
+    if(unzGetGlobalInfo(zipfile, &global_info) != UNZ_OK) {
+        unzClose(zipfile);
+        return false;
+    }
+    char read_buffer[READ_SIZE];
+    ulong i;
+    for(i = 0; i < global_info.number_entry; i++) {
+        unz_file_info file_info;
+        if(unzGetCurrentFileInfo(zipfile, &file_info, read_buffer, READ_SIZE, NULL, 0, NULL, 0) != UNZ_OK) {
+            unzClose(zipfile);
+            return false;
+        }
+        string fname = basedir + string(read_buffer);
+        size_t pos = fname.find_last_of(delim);
+        if(pos != string::npos) {
+            string path = fname.substr(0, pos);
+            filesystem::create_directories(path);
+        }
+        if(unzOpenCurrentFile(zipfile) != UNZ_OK) {
+            unzCloseCurrentFile(zipfile);
+            unzClose(zipfile);
+            return false;
+        }
+        FILE *out = fopen(fname.c_str(), "wb");
+        if(out == NULL) {
+            unzCloseCurrentFile(zipfile);
+            unzClose(zipfile);
+            return false;
+        }
+        int bytesRead;
+        do {
+            bytesRead = unzReadCurrentFile(zipfile, read_buffer, READ_SIZE);
+            if(bytesRead < 0) {
+                printf("error %d\n", bytesRead);
+                unzCloseCurrentFile(zipfile);
+                unzClose(zipfile);
+                return false;
+            }
+            if(bytesRead > 0) {
+                fwrite(read_buffer, bytesRead, 1, out);
+            }
+        } while(bytesRead > 0);
+        fclose(out);
+        unzCloseCurrentFile(zipfile);
+        unzGoToNextFile(zipfile);
+    }
+    unzClose(zipfile);
+    return true;
+}
+
+void libbible::uninstallMod(string modname) {
+    sword::SWMgr mgr(basedir.c_str());
+    sword::ModMap::iterator it = mgr.Modules.find(modname.c_str());
+    if(it != mgr.Modules.end()) {
+        installMgr->removeModule(&mgr, it->second->getName());
+    }
+}
diff --git a/src/lib/settings.cc b/src/lib/settings.cc
new file mode 100644
index 0000000..848e22f
--- /dev/null
+++ b/src/lib/settings.cc
@@ -0,0 +1,23 @@
+#include "libbible.h"
+#include <sword/swconfig.h>
+
+std::string path = (std::getenv("HOME")) + std::string("/.sword/libbible.conf");
+sword::SWConfig config(path.c_str());
+
+void libbible::settingsWrite(std::string key, std::string value) {
+    config["General"][key.c_str()] = sword::SWBuf(value.c_str());
+    config.save();
+}
+
+std::string libbible::settingsRead(std::string key) {
+    return config["General"][key.c_str()].c_str();
+}
+
+void libbible::settingsWriteInt(std::string key, int value) {
+    config["General"][key.c_str()] = sword::SWBuf(std::to_string(value).c_str());
+    config.save();
+}
+
+int libbible::settingsReadInt(std::string key) {
+    return atoi(config["General"][key.c_str()].c_str());
+}
diff --git a/src/test/Makefile b/src/test/Makefile
new file mode 100644
index 0000000..1f8bc8b
--- /dev/null
+++ b/src/test/Makefile
@@ -0,0 +1,20 @@
+LIBS = sword minizip
+override CXXFLAGS += -MMD -Wall -fPIC -std=c++20 `pkg-config $(LIBS) --cflags`
+override LDFLAGS += -lstdc++fs `pkg-config $(LIBS) --libs` -lcppunit ../../libbible.so
+SOURCES = $(wildcard *.cc)
+OBJECTS = $(SOURCES:.cc=.o)
+DEPS = $(OBJECTS:.o=.d)
+TEST = testLibbible
+
+$(TEST): $(OBJECTS)
+	$(CXX) $(OBJECTS) -o $@ $(LDFLAGS)
+
+-include $(DEPS)
+
+.PHONY: test
+test: $(TEST)
+	./$(TEST)
+
+.PHONY: clean
+clean:
+	$(RM) $(OBJECTS) $(DEPS) $(TEST)
diff --git a/src/test/modules/JPS.zip b/src/test/modules/JPS.zip
new file mode 100644
index 0000000..4f09ff8
Binary files /dev/null and b/src/test/modules/JPS.zip differ
diff --git a/src/test/modules/KJV.zip b/src/test/modules/KJV.zip
new file mode 100644
index 0000000..27c161d
Binary files /dev/null and b/src/test/modules/KJV.zip differ
diff --git a/src/test/testLibbible.cc b/src/test/testLibbible.cc
new file mode 100644
index 0000000..d3a265a
--- /dev/null
+++ b/src/test/testLibbible.cc
@@ -0,0 +1,265 @@
+//#include <libbible.h>
+#include "libbible.h"
+#include <string>
+#include <map>
+#include <vector>
+#include <cppunit/TestCase.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/ui/text/TextTestRunner.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/extensions/TestFactoryRegistry.h>
+#include <cppunit/TestResult.h>
+#include <cppunit/TestResultCollector.h>
+#include <cppunit/TestRunner.h>
+#include <cppunit/BriefTestProgressListener.h>
+#include <cppunit/CompilerOutputter.h>
+#include <cppunit/XmlOutputter.h>
+#include <netinet/in.h>
+#include <iostream>
+
+using namespace CppUnit;
+using namespace std;
+
+//-----------------------------------------------------------------------------
+
+class TestLibbible : public CppUnit::TestFixture
+{
+    CPPUNIT_TEST_SUITE(TestLibbible);
+    CPPUNIT_TEST(testGetModules);
+    CPPUNIT_TEST(testGetPassages);
+    CPPUNIT_TEST(testGetText);
+    CPPUNIT_TEST(testSettings);
+    CPPUNIT_TEST(testDownload);
+    CPPUNIT_TEST_SUITE_END();
+
+    //public:
+    //void setUp(void);
+    //void tearDown(void);
+
+    protected:
+    void testGetModules(void);
+    void testGetPassages(void);
+    void testGetText(void);
+    void testSettings(void);
+    void testDownload(void);
+
+};
+
+//-----------------------------------------------------------------------------
+
+class StatusTester : public libbible::Status
+{
+    public:
+    virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message);
+    bool hasBeenUpdated = false;
+};
+
+void StatusTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) {
+    hasBeenUpdated = true;
+}
+
+//-----------------------------------------------------------------------------
+
+class CancelTester : public libbible::Status
+{
+    public:
+    virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message);
+};
+
+void CancelTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) {
+    libbible::terminateDownload();
+}
+
+//-----------------------------------------------------------------------------
+
+void TestLibbible::testGetModules(void) {
+    map<string, vector<string>> mods = libbible::getModules();
+    for(auto pair : mods) {
+        libbible::uninstallMod(pair.first);
+    }
+    CPPUNIT_ASSERT(libbible::getModules().empty());
+    CPPUNIT_ASSERT(libbible::installModFromZip("modules/KJV.zip"));
+    CPPUNIT_ASSERT(libbible::installModFromZip("modules/JPS.zip"));
+    mods = libbible::getModules();
+    CPPUNIT_ASSERT(mods.find("KJV") != mods.end());
+    CPPUNIT_ASSERT(mods["KJV"].size() == 66);
+    CPPUNIT_ASSERT(mods["KJV"][7] == "Ruth");
+    CPPUNIT_ASSERT(mods["KJV"][42] == "John");
+    CPPUNIT_ASSERT(mods.find("JPS") != mods.end());
+    CPPUNIT_ASSERT(mods["JPS"].size() == 39);
+}
+
+void TestLibbible::testGetPassages(void) {
+    auto passages = libbible::getPassages("KJV", "Romans");
+    CPPUNIT_ASSERT(passages[0].modName == "KJV");
+    CPPUNIT_ASSERT(passages[0].book == "Romans");
+    CPPUNIT_ASSERT(passages[0].bookShort == "Rom");
+    CPPUNIT_ASSERT(passages[0].chapterStart == 1);
+    CPPUNIT_ASSERT(passages[0].verseStart == 1);
+    CPPUNIT_ASSERT(passages[0].chapterEnd == 1);
+    CPPUNIT_ASSERT(passages[0].verseEnd == 32);
+    CPPUNIT_ASSERT(passages.size() == 16);
+}
+
+vector<pair<int, int>> getChapVerses(std::vector<libbible::text> text) {
+    vector<pair<int, int>> chapVerses;
+    for(auto tex : text) {
+        //printf("Text is: `%s`\n", tex.text.c_str());
+        //for(auto modifier : tex.modifiers) {
+        //    printf("\tModifiers include: %s\n", modifier.c_str());
+        //}
+        if(chapVerses.empty() ||
+                chapVerses.back().first != tex.chapter ||
+                chapVerses.back().second != tex.verse) {
+            chapVerses.push_back(pair<int, int>(tex.chapter, tex.verse));
+        }
+    }
+    return chapVerses;
+}
+
+void TestLibbible::testGetText(void) {
+    libbible::passage pass;
+    pass.modName = "KJV";
+    pass.bookShort = "Matt";
+    pass.chapterStart = 3;
+    pass.verseStart = 16;
+    pass.chapterEnd = 4;
+    pass.verseEnd = 7;
+    auto text = libbible::getText(pass);
+    // Verify that it includes every verse (3:16-17 + 4:1-7)
+    vector<pair<int, int>> chapVerses = getChapVerses(text);
+    vector<pair<int, int>> shouldContain = vector<pair<int, int>>({pair<int, int>(3, 16),
+            pair<int, int>(3, 17),
+            pair<int, int>(4, 1),
+            pair<int, int>(4, 2),
+            pair<int, int>(4, 3),
+            pair<int, int>(4, 4),
+            pair<int, int>(4, 5),
+            pair<int, int>(4, 6),
+            pair<int, int>(4, 7)});
+    CPPUNIT_ASSERT(chapVerses == shouldContain);
+    libbible::passage pass2;
+    pass2.modName = "KJV";
+    pass2.book = "John";
+    pass2.chapterStart = 3;
+    pass2.verseStart = 16;
+    pass2.chapterEnd = 3;
+    pass2.verseEnd = 16;
+    text = libbible::getText(pass2);
+    string allText;
+    for(auto tex : text) {
+        allText += tex.text;
+    }
+    //printf("Text is: `%s`\n", allText.c_str());
+    CPPUNIT_ASSERT(allText == "For God so loved the world, that he gave his only begotten Son, that whosoever believeth in him should not perish, but have everlasting life. ");
+    
+    text = libbible::getText(libbible::getPassage("KJV", "John 3:3"));
+    allText.clear();
+    for(auto tex : text) {
+        allText += tex.text;
+    }
+    //printf("Text is: `%s`\n", allText.c_str());
+    CPPUNIT_ASSERT(allText == "Jesus answered and said unto him,  Verily, verily, I say unto thee, Except a man be born again, he cannot see the kingdom of God. ");
+    
+    text = libbible::getText(libbible::getPassage("KJV", "Gal 5:22-23"));
+    chapVerses = getChapVerses(text);
+    shouldContain = vector<pair<int, int>>({pair<int, int>(5, 22), pair<int, int>(5, 23)});
+    CPPUNIT_ASSERT(chapVerses == shouldContain);
+
+    text = libbible::getText(libbible::getPassage("KJV", "1 cor 1:31-2:1"));
+    chapVerses = getChapVerses(text);
+    shouldContain = vector<pair<int, int>>({pair<int, int>(1, 31), pair<int, int>(2, 1)});
+    CPPUNIT_ASSERT(chapVerses == shouldContain);
+    
+    text = libbible::getText(libbible::getPassage("KJV", "ps 14-15"));
+    chapVerses = getChapVerses(text);
+    shouldContain = vector<pair<int, int>>({pair<int, int>(14, 1),
+        pair<int, int>(14, 2),
+        pair<int, int>(14, 3),
+        pair<int, int>(14, 4),
+        pair<int, int>(14, 5),
+        pair<int, int>(14, 6),
+        pair<int, int>(14, 7),
+        pair<int, int>(15, 1),
+        pair<int, int>(15, 2),
+        pair<int, int>(15, 3),
+        pair<int, int>(15, 4),
+        pair<int, int>(15, 5)});
+    CPPUNIT_ASSERT(chapVerses == shouldContain);
+
+    text = libbible::getText(libbible::getPassage("KJV", "John 21"));
+    CPPUNIT_ASSERT(text.back().verse == 25);
+}
+
+void TestLibbible::testSettings(void) {
+    libbible::settingsWrite("test", "foo");
+    CPPUNIT_ASSERT(libbible::settingsRead("test") == "foo");
+    libbible::settingsWrite("test", "bar");
+    CPPUNIT_ASSERT(libbible::settingsRead("test") == "bar");
+    libbible::settingsWriteInt("test", 5);
+    CPPUNIT_ASSERT(libbible::settingsReadInt("test") == 5);
+    libbible::settingsWrite("test", "");
+    CPPUNIT_ASSERT(libbible::settingsRead("test") == "");
+}
+
+void TestLibbible::testDownload(void) {
+    map<string, vector<string>> modsAvailable = libbible::downloadModsAvailable();
+    // We try installing the first available one
+    string language;
+    string name;
+    for(auto pair : modsAvailable) {
+        language = pair.first;
+        name = pair.second[0];
+        break;
+    }
+    CPPUNIT_ASSERT(!language.empty() && !name.empty());
+    // Try uninstalling it (shouldn't crash or have nasty side effects!)
+    libbible::uninstallMod(name);
+    // Try installing it with cancel. Shoudn't work because it gets cancelled!
+    CancelTester cancel;
+    libbible::setStatusReporter(cancel);
+    libbible::installModFromInternet(language, name);
+    auto mods = libbible::getModules();
+    CPPUNIT_ASSERT(mods.find(name) == mods.end());
+    // Now we try with normal status
+    StatusTester status;
+    libbible::setStatusReporter(status);
+    libbible::installModFromInternet(language, name);
+    mods = libbible::getModules();
+    CPPUNIT_ASSERT(mods.find(name) != mods.end());
+    CPPUNIT_ASSERT(status.hasBeenUpdated);
+    
+}
+//-----------------------------------------------------------------------------
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestLibbible );
+
+int main(int argc, char* argv[]) {
+    // informs test-listener about testresults
+    CPPUNIT_NS::TestResult testresult;
+
+    // register listener for collecting the test-results
+    CPPUNIT_NS::TestResultCollector collectedresults;
+    testresult.addListener (&collectedresults);
+
+    // register listener for per-test progress output
+    CPPUNIT_NS::BriefTestProgressListener progress;
+    testresult.addListener (&progress);
+
+    // insert test-suite at test-runner by registry
+    CPPUNIT_NS::TestRunner testrunner;
+    testrunner.addTest (CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest ());
+    testrunner.run(testresult);
+
+    // output results in compiler-format
+    CPPUNIT_NS::CompilerOutputter compileroutputter(&collectedresults, std::cerr);
+    compileroutputter.write ();
+
+    // Output XML for Jenkins CPPunit plugin
+    //ofstream xmlFileOut("testLibbibleResults.xml");
+    //XmlOutputter xmlOut(&collectedresults, xmlFileOut);
+    //xmlOut.write();
+
+    // return 0 if tests were successful
+    return collectedresults.wasSuccessful() ? 0 : 1;
+}
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 0000000..4e44514
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,34 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
diff --git a/src/utf8/checked.h b/src/utf8/checked.h
new file mode 100644
index 0000000..1331155
--- /dev/null
+++ b/src/utf8/checked.h
@@ -0,0 +1,327 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+    // Base for the exceptions that may be thrown from the library
+    class exception : public ::std::exception {
+    };
+
+    // Exceptions that may be thrown from the library functions.
+    class invalid_code_point : public exception {
+        uint32_t cp;
+    public:
+        invalid_code_point(uint32_t cp) : cp(cp) {}
+        virtual const char* what() const throw() { return "Invalid code point"; }
+        uint32_t code_point() const {return cp;}
+    };
+
+    class invalid_utf8 : public exception {
+        uint8_t u8;
+    public:
+        invalid_utf8 (uint8_t u) : u8(u) {}
+        virtual const char* what() const throw() { return "Invalid UTF-8"; }
+        uint8_t utf8_octet() const {return u8;}
+    };
+
+    class invalid_utf16 : public exception {
+        uint16_t u16;
+    public:
+        invalid_utf16 (uint16_t u) : u16(u) {}
+        virtual const char* what() const throw() { return "Invalid UTF-16"; }
+        uint16_t utf16_word() const {return u16;}
+    };
+
+    class not_enough_room : public exception {
+    public:
+        virtual const char* what() const throw() { return "Not enough space"; }
+    };
+
+    /// The library API - functions intended to be called by the users
+
+    template <typename octet_iterator>
+    octet_iterator append(uint32_t cp, octet_iterator result)
+    {
+        if (!utf8::internal::is_code_point_valid(cp))
+            throw invalid_code_point(cp);
+
+        if (cp < 0x80)                        // one octet
+            *(result++) = static_cast<uint8_t>(cp);
+        else if (cp < 0x800) {                // two octets
+            *(result++) = static_cast<uint8_t>((cp >> 6)            | 0xc0);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else if (cp < 0x10000) {              // three octets
+            *(result++) = static_cast<uint8_t>((cp >> 12)           | 0xe0);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        else {                                // four octets
+            *(result++) = static_cast<uint8_t>((cp >> 18)           | 0xf0);
+            *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)  | 0x80);
+            *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f)   | 0x80);
+            *(result++) = static_cast<uint8_t>((cp & 0x3f)          | 0x80);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+    {
+        while (start != end) {
+            octet_iterator sequence_start = start;
+            internal::utf_error err_code = utf8::internal::validate_next(start, end);
+            switch (err_code) {
+                case internal::UTF8_OK :
+                    for (octet_iterator it = sequence_start; it != start; ++it)
+                        *out++ = *it;
+                    break;
+                case internal::NOT_ENOUGH_ROOM:
+                    throw not_enough_room();
+                case internal::INVALID_LEAD:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    break;
+                case internal::INCOMPLETE_SEQUENCE:
+                case internal::OVERLONG_SEQUENCE:
+                case internal::INVALID_CODE_POINT:
+                    out = utf8::append (replacement, out);
+                    ++start;
+                    // just one replacement mark for the sequence
+                    while (start != end && utf8::internal::is_trail(*start))
+                        ++start;
+                    break;
+            }
+        }
+        return out;
+    }
+
+    template <typename octet_iterator, typename output_iterator>
+    inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+    {
+        static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+        return utf8::replace_invalid(start, end, out, replacement_marker);
+    }
+
+    template <typename octet_iterator>
+    uint32_t next(octet_iterator& it, octet_iterator end)
+    {
+        uint32_t cp = 0;
+        internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+        switch (err_code) {
+            case internal::UTF8_OK :
+                break;
+            case internal::NOT_ENOUGH_ROOM :
+                throw not_enough_room();
+            case internal::INVALID_LEAD :
+            case internal::INCOMPLETE_SEQUENCE :
+            case internal::OVERLONG_SEQUENCE :
+                throw invalid_utf8(*it);
+            case internal::INVALID_CODE_POINT :
+                throw invalid_code_point(cp);
+        }
+        return cp;
+    }
+
+    template <typename octet_iterator>
+    uint32_t peek_next(octet_iterator it, octet_iterator end)
+    {
+        return utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    uint32_t prior(octet_iterator& it, octet_iterator start)
+    {
+        // can't do much if it == start
+        if (it == start)
+            throw not_enough_room();
+
+        octet_iterator end = it;
+        // Go back until we hit either a lead octet or start
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        return utf8::peek_next(it, end);
+    }
+
+    /// Deprecated in versions that include "prior"
+    template <typename octet_iterator>
+    uint32_t previous(octet_iterator& it, octet_iterator pass_start)
+    {
+        octet_iterator end = it;
+        while (utf8::internal::is_trail(*(--it)))
+            if (it == pass_start)
+                throw invalid_utf8(*it); // error - no lead byte in the sequence
+        octet_iterator temp = it;
+        return utf8::next(temp, end);
+    }
+
+    template <typename octet_iterator, typename distance_type>
+    void advance (octet_iterator& it, distance_type n, octet_iterator end)
+    {
+        for (distance_type i = 0; i < n; ++i)
+            utf8::next(it, end);
+    }
+
+    template <typename octet_iterator>
+    typename std::iterator_traits<octet_iterator>::difference_type
+    distance (octet_iterator first, octet_iterator last)
+    {
+        typename std::iterator_traits<octet_iterator>::difference_type dist;
+        for (dist = 0; first < last; ++dist)
+            utf8::next(first, last);
+        return dist;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+            if (utf8::internal::is_lead_surrogate(cp)) {
+                if (start != end) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    if (utf8::internal::is_trail_surrogate(trail_surrogate))
+                        cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                    else
+                        throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+                }
+                else
+                    throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            }
+            // Lone trail surrogate
+            else if (utf8::internal::is_trail_surrogate(cp))
+                throw invalid_utf16(static_cast<uint16_t>(cp));
+
+            result = utf8::append(cp, result);
+        }
+        return result;
+    }
+
+    template <typename u16bit_iterator, typename octet_iterator>
+    u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+    {
+        while (start != end) {
+            uint32_t cp = utf8::next(start, end);
+            if (cp > 0xffff) { //make a surrogate pair
+                *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+            }
+            else
+                *result++ = static_cast<uint16_t>(cp);
+        }
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+    {
+        while (start != end)
+            result = utf8::append(*(start++), result);
+
+        return result;
+    }
+
+    template <typename octet_iterator, typename u32bit_iterator>
+    u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+    {
+        while (start != end)
+            (*result++) = utf8::next(start, end);
+
+        return result;
+    }
+
+    // The iterator class
+    template <typename octet_iterator>
+    class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
+      octet_iterator it;
+      octet_iterator range_start;
+      octet_iterator range_end;
+      public:
+      iterator () {}
+      explicit iterator (const octet_iterator& octet_it,
+                         const octet_iterator& range_start,
+                         const octet_iterator& range_end) :
+               it(octet_it), range_start(range_start), range_end(range_end)
+      {
+          if (it < range_start || it > range_end)
+              throw std::out_of_range("Invalid utf-8 iterator position");
+      }
+      // the default "big three" are OK
+      octet_iterator base () const { return it; }
+      uint32_t operator * () const
+      {
+          octet_iterator temp = it;
+          return utf8::next(temp, range_end);
+      }
+      bool operator == (const iterator& rhs) const
+      {
+          if (range_start != rhs.range_start || range_end != rhs.range_end)
+              throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+          return (it == rhs.it);
+      }
+      bool operator != (const iterator& rhs) const
+      {
+          return !(operator == (rhs));
+      }
+      iterator& operator ++ ()
+      {
+          utf8::next(it, range_end);
+          return *this;
+      }
+      iterator operator ++ (int)
+      {
+          iterator temp = *this;
+          utf8::next(it, range_end);
+          return temp;
+      }
+      iterator& operator -- ()
+      {
+          utf8::prior(it, range_start);
+          return *this;
+      }
+      iterator operator -- (int)
+      {
+          iterator temp = *this;
+          utf8::prior(it, range_start);
+          return temp;
+      }
+    }; // class iterator
+
+} // namespace utf8
+
+#endif //header guard
+
+
diff --git a/src/utf8/core.h b/src/utf8/core.h
new file mode 100644
index 0000000..693d388
--- /dev/null
+++ b/src/utf8/core.h
@@ -0,0 +1,329 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+namespace utf8
+{
+    // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+    // You may need to change them to match your system.
+    // These typedefs have the same names as ones from cstdint, or boost/cstdint
+    typedef unsigned char   uint8_t;
+    typedef unsigned short  uint16_t;
+    typedef unsigned int    uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+    // Unicode constants
+    // Leading (high) surrogates: 0xd800 - 0xdbff
+    // Trailing (low) surrogates: 0xdc00 - 0xdfff
+    const uint16_t LEAD_SURROGATE_MIN  = 0xd800u;
+    const uint16_t LEAD_SURROGATE_MAX  = 0xdbffu;
+    const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+    const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+    const uint16_t LEAD_OFFSET         = LEAD_SURROGATE_MIN - (0x10000 >> 10);
+    const uint32_t SURROGATE_OFFSET    = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+
+    // Maximum valid value for a Unicode code point
+    const uint32_t CODE_POINT_MAX      = 0x0010ffffu;
+
+    template<typename octet_type>
+    inline uint8_t mask8(octet_type oc)
+    {
+        return static_cast<uint8_t>(0xff & oc);
+    }
+    template<typename u16_type>
+    inline uint16_t mask16(u16_type oc)
+    {
+        return static_cast<uint16_t>(0xffff & oc);
+    }
+    template<typename octet_type>
+    inline bool is_trail(octet_type oc)
+    {
+        return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+    }
+
+    template <typename u16>
+    inline bool is_lead_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_trail_surrogate(u16 cp)
+    {
+        return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u16>
+    inline bool is_surrogate(u16 cp)
+    {
+        return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+    }
+
+    template <typename u32>
+    inline bool is_code_point_valid(u32 cp)
+    {
+        return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+    }
+
+    template <typename octet_iterator>
+    inline typename std::iterator_traits<octet_iterator>::difference_type
+    sequence_length(octet_iterator lead_it)
+    {
+        uint8_t lead = utf8::internal::mask8(*lead_it);
+        if (lead < 0x80)
+            return 1;
+        else if ((lead >> 5) == 0x6)
+            return 2;
+        else if ((lead >> 4) == 0xe)
+            return 3;
+        else if ((lead >> 3) == 0x1e)
+            return 4;
+        else
+            return 0;
+    }
+
+    template <typename octet_difference_type>
+    inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+    {
+        if (cp < 0x80) {
+            if (length != 1) 
+                return true;
+        }
+        else if (cp < 0x800) {
+            if (length != 2) 
+                return true;
+        }
+        else if (cp < 0x10000) {
+            if (length != 3) 
+                return true;
+        }
+
+        return false;
+    }
+
+    enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+    /// Helper for get_sequence_x
+    template <typename octet_iterator>
+    utf_error increase_safely(octet_iterator& it, octet_iterator end)
+    {
+        if (++it == end)
+            return NOT_ENOUGH_ROOM;
+
+        if (!utf8::internal::is_trail(*it))
+            return INCOMPLETE_SEQUENCE;
+        
+        return UTF8_OK;
+    }
+
+    #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}    
+
+    /// get_sequence_x functions decode utf-8 sequences of the length x
+    template <typename octet_iterator>
+    utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end) 
+            return NOT_ENOUGH_ROOM;
+        
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+            return NOT_ENOUGH_ROOM;
+            
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    template <typename octet_iterator>
+    utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        if (it == end)
+           return NOT_ENOUGH_ROOM;
+
+        code_point = utf8::internal::mask8(*it);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
+
+        UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+        code_point += (*it) & 0x3f;
+
+        return UTF8_OK;
+    }
+
+    #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+    template <typename octet_iterator>
+    utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+    {
+        // Save the original value of it so we can go back in case of failure
+        // Of course, it does not make much sense with i.e. stream iterators
+        octet_iterator original_it = it;
+
+        uint32_t cp = 0;
+        // Determine the sequence length based on the lead octet
+        typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+        const octet_difference_type length = utf8::internal::sequence_length(it);
+
+        // Get trail octets and calculate the code point
+        utf_error err = UTF8_OK;
+        switch (length) {
+            case 0: 
+                return INVALID_LEAD;
+            case 1:
+                err = utf8::internal::get_sequence_1(it, end, cp);
+                break;
+            case 2:
+                err = utf8::internal::get_sequence_2(it, end, cp);
+            break;
+            case 3:
+                err = utf8::internal::get_sequence_3(it, end, cp);
+            break;
+            case 4:
+                err = utf8::internal::get_sequence_4(it, end, cp);
+            break;
+        }
+
+        if (err == UTF8_OK) {
+            // Decoding succeeded. Now, security checks...
+            if (utf8::internal::is_code_point_valid(cp)) {
+                if (!utf8::internal::is_overlong_sequence(cp, length)){
+                    // Passed! Return here.
+                    code_point = cp;
+                    ++it;
+                    return UTF8_OK;
+                }
+                else
+                    err = OVERLONG_SEQUENCE;
+            }
+            else 
+                err = INVALID_CODE_POINT;
+        }
+
+        // Failure branch - restore the original value of the iterator
+        it = original_it;
+        return err;
+    }
+
+    template <typename octet_iterator>
+    inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+        uint32_t ignored;
+        return utf8::internal::validate_next(it, end, ignored);
+    }
+
+} // namespace internal
+
+    /// The library API - functions intended to be called by the users
+
+    // Byte order mark
+    const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+    template <typename octet_iterator>
+    octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+    {
+        octet_iterator result = start;
+        while (result != end) {
+            utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+            if (err_code != internal::UTF8_OK)
+                return result;
+        }
+        return result;
+    }
+
+    template <typename octet_iterator>
+    inline bool is_valid(octet_iterator start, octet_iterator end)
+    {
+        return (utf8::find_invalid(start, end) == end);
+    }
+
+    template <typename octet_iterator>
+    inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+    {
+        return (
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+            ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+            ((it != end) && (utf8::internal::mask8(*it))   == bom[2])
+           );
+    }
+	
+    //Deprecated in release 2.3 
+    template <typename octet_iterator>
+    inline bool is_bom (octet_iterator it)
+    {
+        return (
+            (utf8::internal::mask8(*it++)) == bom[0] &&
+            (utf8::internal::mask8(*it++)) == bom[1] &&
+            (utf8::internal::mask8(*it))   == bom[2]
+           );
+    }
+} // namespace utf8
+
+#endif // header guard
+
+
diff --git a/src/utf8/unchecked.h b/src/utf8/unchecked.h
new file mode 100644
index 0000000..cb24271
--- /dev/null
+++ b/src/utf8/unchecked.h
@@ -0,0 +1,228 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+    namespace unchecked 
+    {
+        template <typename octet_iterator>
+        octet_iterator append(uint32_t cp, octet_iterator result)
+        {
+            if (cp < 0x80)                        // one octet
+                *(result++) = static_cast<uint8_t>(cp);  
+            else if (cp < 0x800) {                // two octets
+                *(result++) = static_cast<uint8_t>((cp >> 6)          | 0xc0);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else if (cp < 0x10000) {              // three octets
+                *(result++) = static_cast<uint8_t>((cp >> 12)         | 0xe0);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            else {                                // four octets
+                *(result++) = static_cast<uint8_t>((cp >> 18)         | 0xf0);
+                *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
+                *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+                *(result++) = static_cast<uint8_t>((cp & 0x3f)        | 0x80);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator>
+        uint32_t next(octet_iterator& it)
+        {
+            uint32_t cp = utf8::internal::mask8(*it);
+            typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
+            switch (length) {
+                case 1:
+                    break;
+                case 2:
+                    it++;
+                    cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+                    break;
+                case 3:
+                    ++it; 
+                    cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+                    ++it;
+                    cp += (*it) & 0x3f;
+                    break;
+                case 4:
+                    ++it;
+                    cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);                
+                    ++it;
+                    cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
+                    ++it;
+                    cp += (*it) & 0x3f; 
+                    break;
+            }
+            ++it;
+            return cp;        
+        }
+
+        template <typename octet_iterator>
+        uint32_t peek_next(octet_iterator it)
+        {
+            return utf8::unchecked::next(it);    
+        }
+
+        template <typename octet_iterator>
+        uint32_t prior(octet_iterator& it)
+        {
+            while (utf8::internal::is_trail(*(--it))) ;
+            octet_iterator temp = it;
+            return utf8::unchecked::next(temp);
+        }
+
+        // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
+        template <typename octet_iterator>
+        inline uint32_t previous(octet_iterator& it)
+        {
+            return utf8::unchecked::prior(it);
+        }
+
+        template <typename octet_iterator, typename distance_type>
+        void advance (octet_iterator& it, distance_type n)
+        {
+            for (distance_type i = 0; i < n; ++i)
+                utf8::unchecked::next(it);
+        }
+
+        template <typename octet_iterator>
+        typename std::iterator_traits<octet_iterator>::difference_type
+        distance (octet_iterator first, octet_iterator last)
+        {
+            typename std::iterator_traits<octet_iterator>::difference_type dist;
+            for (dist = 0; first < last; ++dist) 
+                utf8::unchecked::next(first);
+            return dist;
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+        {       
+            while (start != end) {
+                uint32_t cp = utf8::internal::mask16(*start++);
+            // Take care of surrogate pairs first
+                if (utf8::internal::is_lead_surrogate(cp)) {
+                    uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+                    cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+                }
+                result = utf8::unchecked::append(cp, result);
+            }
+            return result;         
+        }
+
+        template <typename u16bit_iterator, typename octet_iterator>
+        u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+        {
+            while (start < end) {
+                uint32_t cp = utf8::unchecked::next(start);
+                if (cp > 0xffff) { //make a surrogate pair
+                    *result++ = static_cast<uint16_t>((cp >> 10)   + internal::LEAD_OFFSET);
+                    *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+                }
+                else
+                    *result++ = static_cast<uint16_t>(cp);
+            }
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+        {
+            while (start != end)
+                result = utf8::unchecked::append(*(start++), result);
+
+            return result;
+        }
+
+        template <typename octet_iterator, typename u32bit_iterator>
+        u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+        {
+            while (start < end)
+                (*result++) = utf8::unchecked::next(start);
+
+            return result;
+        }
+
+        // The iterator class
+        template <typename octet_iterator>
+          class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> { 
+            octet_iterator it;
+            public:
+            iterator () {}
+            explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+            // the default "big three" are OK
+            octet_iterator base () const { return it; }
+            uint32_t operator * () const
+            {
+                octet_iterator temp = it;
+                return utf8::unchecked::next(temp);
+            }
+            bool operator == (const iterator& rhs) const 
+            { 
+                return (it == rhs.it);
+            }
+            bool operator != (const iterator& rhs) const
+            {
+                return !(operator == (rhs));
+            }
+            iterator& operator ++ () 
+            {
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return *this;
+            }
+            iterator operator ++ (int)
+            {
+                iterator temp = *this;
+                ::std::advance(it, utf8::internal::sequence_length(it));
+                return temp;
+            }  
+            iterator& operator -- ()
+            {
+                utf8::unchecked::prior(it);
+                return *this;
+            }
+            iterator operator -- (int)
+            {
+                iterator temp = *this;
+                utf8::unchecked::prior(it);
+                return temp;
+            }
+          }; // class iterator
+
+    } // namespace utf8::unchecked
+} // namespace utf8 
+
+
+#endif // header guard
+
-- 
cgit v1.3