aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorYour Name <you@example.com>2022-02-18 20:35:38 -0500
committerYour Name <you@example.com>2022-02-18 20:35:38 -0500
commit55d58a16e2511741cc625e203205dec86144faf3 (patch)
tree311be7e5fbaf1bc8ece47dd4261af053f2da1c7c /src
parentaa9dabdeaead3c8b1b11f9d4f321265c439bfbfc (diff)
downloadlibbible-55d58a16e2511741cc625e203205dec86144faf3.tar.gz
libbible-55d58a16e2511741cc625e203205dec86144faf3.tar.bz2
libbible-55d58a16e2511741cc625e203205dec86144faf3.zip
Reorganized repository layout
Diffstat (limited to 'src')
-rw-r--r--src/bible.cc329
-rw-r--r--src/lib/libbible.cc262
-rw-r--r--src/lib/libbible.h115
-rw-r--r--src/lib/mods.cc233
-rw-r--r--src/lib/settings.cc23
-rw-r--r--src/test/Makefile20
-rw-r--r--src/test/modules/JPS.zipbin0 -> 1170889 bytes
-rw-r--r--src/test/modules/KJV.zipbin0 -> 4061008 bytes
-rw-r--r--src/test/testLibbible.cc265
-rw-r--r--src/utf8.h34
-rw-r--r--src/utf8/checked.h327
-rw-r--r--src/utf8/core.h329
-rw-r--r--src/utf8/unchecked.h228
13 files changed, 2165 insertions, 0 deletions
diff --git a/src/bible.cc b/src/bible.cc
new file mode 100644
index 0000000..a09c0c0
--- /dev/null
+++ b/src/bible.cc
@@ -0,0 +1,329 @@
+#include "lib/libbible.h"
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <iostream>
+#include "utf8.h"
+
+using namespace std;
+
+void usage() {
+ printf("\nUsage:\n bible [options] [reference]\n\n");
+ printf("Print bible passages.\n\n");
+ printf("Options:\n");
+ printf(" -h, --help display this help message\n");
+ printf(" --list-modules list all installed modules\n");
+ printf(" -m, --module <mod> use specified module\n");
+ printf(" --set-default-module <mod> use specified module by default in future runs\n");
+ printf(" --list-books list books available in the current module\n");
+ printf(" --list-chapters <book> list chapters available in book in the current module\n");
+ printf(" -o, --omit-verse-numbers when printing verse text, skip printing verse and chapter numbers\n");
+ printf(" --list-installable=<lang> list bible versions available for download and install. Default lists for all languages.\n");
+ printf(" --install-network <mod> install module from the network where <mod> is LANG:NAME as provided by --list-installable\n");
+ printf(" --install-zip <path> install module from a zip file\n");
+ printf(" --remove-module <mod> delete a module from the system\n");
+ printf("\n\nExamples:\n bible Gal 5:22-23\n");
+ printf(" bible John 3:16\n bible Romans 12\n bible Matt 5:3-7:27\n");
+ printf(" bible Genesis 1-3\n");
+}
+
+string getDefaultModule() {
+ return libbible::settingsRead("module");
+}
+
+void listModules() {
+ map<string, vector<string>> mods = libbible::getModules();
+ string defaultMod = getDefaultModule();
+ printf("Modules Installed:\n");
+ for(auto pair : mods) {
+ if(pair.first == defaultMod) {
+ printf(" %s (default)\n", pair.first.c_str());
+ } else {
+ printf(" %s\n", pair.first.c_str());
+ }
+ }
+}
+
+void setDefaultModule(string modname) {
+ libbible::settingsWrite("module", modname);
+}
+
+void listBooks(string modname) {
+ map<string, vector<string>> mods = libbible::getModules();
+ if(mods.find(modname) == mods.end()) {
+ printf("ERROR: Module \"%s\" not installed!\n", modname.c_str());
+ } else {
+ printf("Books in Module %s:\n", modname.c_str());
+ for(string book : mods[modname]) {
+ printf(" %s\n", book.c_str());
+ }
+ }
+}
+
+void listChapters(string modname, string book) {
+ printf("Valid chapters for book %s in module %s:\n", book.c_str(), modname.c_str());
+ for(auto pass : libbible::getPassages(modname, book)) {
+ printf(" Chapter %d, Verses %d-%d\n", pass.chapterStart, pass.verseStart, pass.verseEnd);
+ }
+}
+
+void listInstallable(string language) {
+ map<string, vector<string>> installable = libbible::downloadModsAvailable();
+ map<string, string> languages = libbible::getLanguageNames();
+ for(auto pair : installable) {
+ if(!language.empty() && language != pair.first) {
+ continue;
+ }
+ printf("For language %s:", pair.first.c_str());
+ if(!languages[pair.first].empty()) {
+ printf(" (%s)", languages[pair.first].c_str());
+ }
+ printf("\n");
+ for(string name : pair.second) {
+ printf(" %s\n", name.c_str());
+ }
+ }
+}
+
+void installNetwork(string mod) {
+ //Split on :
+ if(mod.find(':') == string::npos) {
+ printf("Unable to process module \"%s\": Must contain colon separated language:name\n", mod.c_str());
+ return;
+ }
+ string lang = mod.substr(0, mod.find(':'));
+ string name = mod.substr(mod.find(':')+1);
+ if(libbible::installModFromInternet(lang, name)) {
+ printf("Module installed.\n");
+ } else {
+ printf("Error installing module!\n");
+ }
+}
+
+void installZip(string path) {
+ libbible::installModFromZip(path);
+}
+
+void removeMod(string mod) {
+ libbible::uninstallMod(mod);
+}
+
+void textWrap(istream& in, ostream& out, size_t width) {
+ string word;
+ string line;
+ char cur = '\0';
+ size_t i = 0;
+
+ while(in.get(cur)) {
+ if(isspace(cur)) {
+ word.clear();
+ }
+ if(cur == '\n') {
+ out << line << '\n';
+ line.clear();
+ word.clear();
+ continue;
+ }
+ word += cur;
+ line += cur;
+ // Anything matching \033.*?m doesn't count
+ size_t credits = 0;
+ size_t found = -1;
+ while((found = line.find("\033", found+1)) != string::npos) {
+ size_t first = line.find_first_of("m", found);
+ if(first != string::npos) {
+ credits += first - found + 1;
+ } else {
+ credits += line.size() - found;
+ }
+ }
+ string::iterator end_it = utf8::find_invalid(line.begin(), line.end());
+ i = utf8::distance(line.begin(), end_it) - credits;
+ //printf("Word: %s, i: %ld\n", word.c_str(), i);
+ if(i > width) {
+ word.erase(0, word.find_first_not_of(" "));
+ if(line.find_last_of(" ") != string::npos) {
+ line.erase(line.find_last_of(" "));
+ out << line << '\n';
+ }
+ line = word;
+ }
+ }
+ out << line;
+}
+
+int main(int argc, char* argv[]) {
+ static struct option long_options[] = {
+ {"help", no_argument, 0, 'h'},
+ {"list-modules", no_argument, 0, 0},
+ {"module", required_argument, 0, 'm'},
+ {"set-default-module", required_argument, 0, 0},
+ {"list-books", no_argument, 0, 0},
+ {"list-chapters", required_argument, 0, 0},
+ {"omit-verse-numbers", no_argument, 0, 'o'},
+ {"list-installable", optional_argument, 0, 0},
+ {"install-network", required_argument, 0, 0},
+ {"install-zip", required_argument, 0, 0},
+ {"remove-module", required_argument, 0, 0}
+ };
+ int opt, option_index;
+ string modname;
+ bool omitVerseNums = false;
+ bool doListBooks = false;
+ string listChaptersBook;
+ string option;
+ while ((opt = getopt_long(argc, argv, "hm:o", long_options, &option_index)) != -1) {
+ switch(opt) {
+ case 'h':
+ usage();
+ return 0;
+ case 'm':
+ modname = string(optarg);
+ break;
+ case 'o':
+ omitVerseNums = true;
+ break;
+ case 0:
+ option = string(long_options[option_index].name);
+ if(option == "list-modules") {
+ listModules();
+ return 0;
+ } else if(option == "set-default-module") {
+ setDefaultModule(string(optarg));
+ } else if(option == "list-books") {
+ doListBooks = true;
+ } else if(option == "list-chapters") {
+ listChaptersBook = string(optarg);
+ } else if(option == "list-installable") {
+ if(optarg == nullptr) {
+ listInstallable(string());
+ } else {
+ listInstallable(string(optarg));
+ }
+ } else if(option == "install-network") {
+ installNetwork(string(optarg));
+ } else if(option == "install-zip") {
+ installZip(string(optarg));
+ } else if(option == "remove-module") {
+ removeMod(string(optarg));
+ }
+ break;
+ default:
+ usage();
+ return 1;
+ }
+ }
+ if(modname.empty()) {
+ modname = getDefaultModule();
+ }
+ if(doListBooks) {
+ listBooks(modname);
+ }
+ if(! listChaptersBook.empty()) {
+ listChapters(modname, listChaptersBook);
+ }
+ string reference;
+ while(optind < argc) {
+ reference += argv[optind++];
+ reference += " ";
+ }
+ if(reference.empty()) {
+ // That's all.
+ return 0;
+ }
+
+ auto text = libbible::getText(libbible::getPassage(modname, reference));
+ int chapter = 0;
+ int verse = 0;
+ const char* indent = " ";
+ bool isNewline = true;
+ stringstream out;
+ for(auto tex : text) {
+ if(!omitVerseNums && tex.chapter != chapter) {
+ out << tex.book << " Chapter " << tex.chapter << ":\n";
+ }
+ bool isParagraph = false;
+ bool isIndent = false;
+ bool isDivineName = false;
+ bool isJesus = false;
+ bool isTitle = false;
+ bool isParallel = false;
+ bool isPreverse = false;
+ for(string modifier : tex.modifiers) {
+ if(modifier == "paragraph") {
+ isParagraph = true;
+ } else if (modifier == "line indent0") {
+ isIndent = true;
+ } else if (modifier == "divineName") {
+ isDivineName = true;
+ } else if (modifier == "wordsOfJesus") {
+ isJesus = true;
+ } else if (modifier == "title") {
+ isTitle = true;
+ } else if (modifier == "parallel") {
+ isParallel = true;
+ } else if (modifier == "preverse") {
+ isPreverse = true;
+ }
+ }
+ if(isPreverse or isTitle or isParallel) {
+ // Someday maybe we add this, but for now, omit
+ tex.text = "";
+ }
+ if(isIndent) {
+ isParagraph = false;
+ if(isNewline) {
+ out << indent;
+ }
+ }
+ if(isParagraph) {
+ out << indent;
+ }
+ if(isDivineName) {
+ transform(tex.text.begin(), tex.text.end(), tex.text.begin(), ::toupper);
+ }
+ if(isJesus) {
+ out << "\033[;31m";
+ }
+ if(omitVerseNums && tex.verse != verse) {
+ out << " ";
+ } else if(!omitVerseNums && tex.verse != verse) {
+ out << " (" << tex.verse << ") ";
+ }
+ chapter = tex.chapter;
+ verse = tex.verse;
+ out << tex.text;
+ if(tex.text.back() == '\n') {
+ isNewline = true;
+ } else {
+ isNewline = false;
+ }
+ if(isJesus) {
+ out << "\033[0m";
+ }
+ }
+ out << "\n";
+
+ // Get window size
+ struct winsize size;
+ ioctl(STDOUT_FILENO, TIOCGWINSZ, &size);
+ // size.ws_col is number of columns, or 0 if it's a pipe
+ int cols = size.ws_col;
+ // If terminal is too small, treat it like a pipe
+ if(cols < 10) {
+ cols = 0;
+ }
+
+ // Now print
+ if(cols == 0) {
+ cout << out.str();
+ } else {
+ stringstream out2;
+ textWrap(out, out2, cols);
+ cout << out2.str();
+ }
+ return 0;
+}
diff --git a/src/lib/libbible.cc b/src/lib/libbible.cc
new file mode 100644
index 0000000..c9acb7d
--- /dev/null
+++ b/src/lib/libbible.cc
@@ -0,0 +1,262 @@
+#include "libbible.h"
+#include <sword/versekey.h>
+#include <sword/markupfiltmgr.h>
+#include <sword/swmodule.h>
+#include <sword/swmgr.h>
+#include <sword/osisfootnotes.h>
+#include <algorithm>
+
+using namespace sword;
+using namespace std;
+
+SWMgr library(new MarkupFilterMgr(FMT_XHTML));
+OSISFootnotes filter;
+
+vector<string> getBooks(SWModule *target) {
+ vector<string> books;
+ VerseKey *key = (VerseKey *) target->getKey();
+ for(char t = 1; t <= key->getTestamentMax(); t++) {
+ key->setTestament(t);
+ for(char b = 1; b <= key->getBookMax(); b++) {
+ key->setBook(b);
+ // Bug (whose fault??) in JPS; they CLAIM to have two testaments,
+ // but they only have one, which causes repeats.
+ if(std::find(books.begin(), books.end(), key->getBookName()) != books.end()) {
+ continue;
+ }
+ // Another issue (maybe bug?) Some translations are NT only,
+ // but still report OT books/chapters.
+ if(string(target->renderText()).empty()) {
+ continue;
+ }
+ books.push_back(key->getBookName());
+ }
+ }
+ return books;
+}
+
+map<string, vector<string>> libbible::getModules() {
+ library.load();
+ map<string, vector<string>> mods;
+ ModMap::iterator it;
+ for (it = library.getModules().begin(); it != library.getModules().end(); it++) {
+ string modName = (*it).second->getName();
+ SWModule *target = library.getModule(modName.c_str());
+ mods[modName] = getBooks(target);
+ }
+ return mods;
+}
+
+vector<libbible::passage> libbible::getPassages(string modName, string book) {
+ vector<libbible::passage> passages;
+ SWModule *target = library.getModule(modName.c_str());
+ if(target == nullptr) {
+ // Module doesn't exist
+ return passages;
+ }
+ target->setKey((book + " " + "1").c_str());
+ VerseKey *key = (VerseKey *) target->getKey();
+ int maxChapter = key->getChapterMax();
+ for(int chapter = 1; chapter <= maxChapter; chapter++) {
+ string ref = book + ' ' + to_string(chapter);
+ target->setKey(ref.c_str());
+ VerseKey *key = (VerseKey *) target->getKey();
+ libbible::passage pass;
+ pass.modName = modName;
+ pass.book = string(key->getBookName());
+ pass.bookShort = string(key->getBookAbbrev());
+ pass.chapterStart = chapter;
+ pass.chapterEnd = chapter;
+ pass.verseStart = 1;
+ pass.verseEnd = key->getVerseMax();
+ passages.push_back(pass);
+ }
+ return passages;
+}
+
+libbible::text getEmptyText(VerseKey *key) {
+ libbible::text t;
+ t.chapter = key->getChapter();
+ t.verse = key->getVerse();
+ t.book = key->getBookName();
+ t.bookShort = key->getBookAbbrev();
+ return t;
+}
+
+libbible::passage libbible::getPassage(string modName, string reference) {
+ libbible::passage pass;
+ pass.modName = modName;
+ SWModule *target = library.getModule(pass.modName.c_str());
+ if(target == nullptr || reference.empty()) {
+ // Bad input
+ return pass;
+ }
+ vector<string> validBooks = getBooks(target);
+ //printf("Hey, I'm inferring missing parts!\n");
+ // Let's use the target to help us
+ target->setKey(reference.c_str());
+ VerseKey *key = (VerseKey *) target->getKey();
+ pass.book = string(key->getBookName());
+ // Hold on a moment, is this book even legal?
+ if(find(validBooks.begin(), validBooks.end(), pass.book) == validBooks.end()) {
+ key->setBookName(validBooks[0].c_str());
+ pass.book = string(key->getBookName());
+ }
+ pass.bookShort = string(key->getBookAbbrev());
+ pass.chapterStart = key->getChapter();
+ pass.verseStart = key->getVerse();
+ //printf("Results so far: book: %s; chapterStart: %d; verseStart: %d\n", pass.book.c_str(), pass.chapterStart, pass.verseStart);
+ // And now we just need chapterEnd and verseEnd. Yippee.
+ string ref = string(reference);
+ ref.erase(remove(ref.begin(), ref.end(), ' '), ref.end());
+ if(ref.find('-') == string::npos) {
+ // There's no range!
+ if(ref.find(':') == string::npos) {
+ // It's a full chapter reference
+ pass.chapterEnd = pass.chapterStart;
+ pass.verseEnd = key->getVerseMax();
+ } else {
+ // It's a single verse reference
+ pass.chapterEnd = pass.chapterStart;
+ pass.verseEnd = pass.verseStart;
+ //printf("Hey, it's a single verse reference!\n");
+ }
+ } else {
+ if(ref.find(':') == string::npos) {
+ // It's a multi-full-chapter reference
+ pass.chapterEnd = stoi(ref.substr(ref.find_last_of('-')+1));
+ key->setChapter(pass.chapterEnd);
+ pass.verseEnd = key->getVerseMax();
+ } else {
+ // It falls in categories c:v-v or c:v-c:v (or, technically, c-c:v)
+ string rangeEnd = ref.substr(ref.find_last_of('-')+1);
+ if(rangeEnd.find(':') == string::npos) {
+ // It's c:v-v
+ pass.verseEnd = stoi(rangeEnd);
+ pass.chapterEnd = pass.chapterStart;
+ } else {
+ // It's c:v-c:v (or c-c:v, but code is the same)
+ pass.chapterEnd = stoi(rangeEnd.substr(0, rangeEnd.find(':')));
+ pass.verseEnd = stoi(rangeEnd.substr(rangeEnd.find(':')+1));
+ }
+ }
+ }
+ return pass;
+}
+
+vector<libbible::text> libbible::getText(libbible::passage pass) {
+ vector<libbible::text> texts;
+ SWModule *target = library.getModule(pass.modName.c_str());
+ filter.setOptionValue("Off");
+ target->addOptionFilter(&filter);
+ if(target == nullptr) {
+ // Module doesn't exist
+ return texts;
+ }
+ if(pass.book.empty()) {
+ pass.book = pass.bookShort;
+ }
+ target->setKey((pass.book
+ + " " + to_string(pass.chapterStart)
+ + ":" + to_string(pass.verseStart)).c_str());
+ VerseKey *key = (VerseKey *) target->getKey();
+
+ bool endOfParagraph = false;
+
+ string book = string(key->getBookName());
+
+ for(; string(key->getBookName()) == book &&
+ (key->getChapter() < pass.chapterEnd
+ || (key->getChapter() == pass.chapterEnd && key->getVerse() <= pass.verseEnd));
+ (*key)++) {
+
+ string text = string(target->renderText());
+ //printf("Working with: %s\n", text.c_str());
+
+ texts.push_back(getEmptyText(key));
+
+ if(key->getVerse() == 1 || endOfParagraph) {
+ if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), "paragraph") == texts.back().modifiers.end()) {
+ texts.back().modifiers.push_back("paragraph");
+ }
+ endOfParagraph = false;
+ }
+
+ // Variable to accumulate unterminated spans
+ std::vector<std::pair<std::string, std::string>> spans;
+ bool spansChanged = false;
+ bool hasAddedText = false;
+ // Iterate over text
+ for(auto i = text.begin(); i != text.end(); i++) {
+ if(*i != '<') {
+ if(spansChanged) {
+ spansChanged = false;
+ if(!texts.back().text.empty()) {
+ texts.push_back(getEmptyText(key));
+ }
+ for(auto& [tag, modifier] : spans) {
+ if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), modifier) == texts.back().modifiers.end()) {
+ texts.back().modifiers.push_back(modifier);
+ }
+ }
+ }
+ if(*i == '\n') {
+ continue; // We add newlines with <br />
+ }
+ if(! hasAddedText && (*i == ' ' || *i == '\t')) {
+ continue;
+ }
+ if(*i == "¶"[0] && i+1 != text.end() && *(i+1) == "¶"[1]) {
+ i++;
+ if(hasAddedText) {
+ texts.back().text += '\n';
+ } else {
+ // Append \n to text in previous texts (if applicable)
+ if(texts.size() > 1) {
+ texts[texts.size()-2].text += '\n';
+ }
+ texts.back().modifiers.push_back("paragraph");
+ continue;
+ }
+ }
+ texts.back().text += *i;
+ hasAddedText = true;
+ }
+ else {
+ string span;
+ for(; i != text.end(); i++) {
+ span.push_back(*i);
+ if(*i == '>') {
+ // The end of the span will be "</tag>".
+ if(span[1] == '/') {
+ string tag = span.substr(2, span.size()-3);
+ for(auto rit = spans.rbegin(); rit != spans.rend(); rit++) {
+ if(rit->first == tag) {
+ spans.erase(rit.base()-1);
+ spansChanged = true;
+ break;
+ }
+ }
+ } else if(span.find("class=\"") != string::npos) {
+ // The span will be formatted "<tag class=\"NAME\">"
+ // We want just the NAME
+ string tag = span.substr(1, span.find(" ")-1);
+ size_t start = span.find("class=\"")+7;
+ size_t end = span.find("\"", start);
+ spans.push_back(std::pair<string, string>(tag, span.substr(start, end-start)));
+ spansChanged = true;
+ } else if(span.find("preverse") != string::npos) {
+ string tag = span.substr(1, span.find(" ")-1);
+ spans.push_back(std::pair<string, string>(tag, "preverse"));
+ } else if(span == "<br />" || span == "<br/>") {
+ texts.back().text += '\n';
+ }
+ break;
+ }
+ }
+ }
+ }
+ endOfParagraph = (text[text.length()-1] == '\n');
+ }
+ return texts;
+}
diff --git a/src/lib/libbible.h b/src/lib/libbible.h
new file mode 100644
index 0000000..f77dc8c
--- /dev/null
+++ b/src/lib/libbible.h
@@ -0,0 +1,115 @@
+#include <string>
+#include <vector>
+#include <map>
+
+namespace libbible {
+
+ struct text {
+ int chapter;
+ int verse;
+ std::string book;
+ std::string bookShort;
+ std::string text;
+ std::vector<std::string> modifiers; // e.g., paragraph, line indent0, divineName, wordsOfJesus
+ };
+
+ struct passage {
+ std::string modName;
+ std::string book;
+ std::string bookShort;
+ int chapterStart;
+ int verseStart;
+ int chapterEnd;
+ int verseEnd;
+ };
+
+ /*
+ * @return Map of modName to supported books
+ */
+ std::map<std::string, std::vector<std::string>> getModules(void);
+
+ /*
+ * @return Vector of valid single full-chapter passages for a book
+ */
+ std::vector<struct passage> getPassages(std::string modName, std::string book);
+
+ /*
+ * @param modName the module to use for determining the passage
+ * @param reference a human-readable reference, e.g., "gen 1:26-27"
+ * @return the passage matching the reference
+ */
+ passage getPassage(std::string modName, std::string reference);
+
+ /*
+ * @return Text for a passage
+ */
+ std::vector<struct text> getText(struct passage pass);
+
+ /**************************
+ * Methods dealing with mods
+ ***************************/
+
+ class Status {
+ public:
+ virtual void update(unsigned long totalBytes, unsigned long completedBytes, std::string message) {}
+ };
+
+ /**
+ * @param status Status update method is called asynchronously as download progresses
+ */
+ void setStatusReporter(Status& status);
+
+ /**
+ * @return A mapping from language to bible version names
+ */
+ std::map<std::string, std::vector<std::string>> downloadModsAvailable();
+
+ /**
+ * @return A mapping from language abbreviations to full language names
+ */
+ std::map<std::string, std::string> getLanguageNames();
+
+ /**
+ * Cancel an in-progress download
+ */
+ void terminateDownload(void);
+
+ /**
+ * @param language The language of the mod to install as provided from downloadModsAvailable
+ * @param name The name of the bible version as provided from downloadModsAvailable
+ * @see downloadModsAvailable()
+ * @return true on success, false otherwise
+ */
+ bool installModFromInternet(std::string language, std::string name);
+
+ /**
+ * @param filename Path to the .zip compressed module to be installed
+ * @return true on success, false otherwise
+ */
+ bool installModFromZip(std::string filename);
+
+ /**
+ * @param modname The name of the module to be removed
+ */
+ void uninstallMod(std::string modname);
+
+ /******************************
+ * Methods dealing with settings
+ *******************************/
+
+ /*
+ * From already established code, valid and useful values are:
+ * int fontsize: the last used size of the font
+ * string passage: the last looked-up passage
+ * string module: the last used module
+ */
+
+ void settingsWrite(std::string key, std::string value);
+
+ std::string settingsRead(std::string key);
+
+ void settingsWriteInt(std::string key, int value);
+
+ int settingsReadInt(std::string key);
+
+}
diff --git a/src/lib/mods.cc b/src/lib/mods.cc
new file mode 100644
index 0000000..ab54e48
--- /dev/null
+++ b/src/lib/mods.cc
@@ -0,0 +1,233 @@
+#include "libbible.h"
+#include <sword/swmgr.h>
+#include <sword/swmodule.h>
+#include <sword/installmgr.h>
+#include <sword/filemgr.h>
+#include <sword/remotetrans.h>
+#include <unzip.h>
+#include <filesystem>
+
+using namespace std;
+
+class myStatusReporter : public sword::StatusReporter {
+ public:
+ myStatusReporter(libbible::Status *status);
+ ~myStatusReporter();
+ void preStatus(long totalBytes, long completedBytes, const char *message);
+ void update(unsigned long totalBytes, unsigned long completedBytes);
+ protected:
+ libbible::Status *status;
+ string message;
+};
+
+myStatusReporter::myStatusReporter(libbible::Status *s) {
+ status = s;
+}
+
+myStatusReporter::~myStatusReporter() {};
+
+//virtual void libbible::Status::update(unsigned long totalBytes, unsigned long completedBytes, string message) {}
+
+void myStatusReporter::preStatus(long totalBytes, long completedBytes, const char *msg) {
+ message = string(msg);
+ status->update((unsigned long) totalBytes, (unsigned long) completedBytes, message);
+ //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str());
+}
+
+void myStatusReporter::update(unsigned long totalBytes, unsigned long completedBytes) {
+ status->update(totalBytes, completedBytes, message);
+ //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str());
+}
+
+string basedir = (getenv("HOME")) + string("/.sword/");
+sword::InstallMgr *installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), nullptr);
+map<string, vector<pair<string, sword::InstallSource *>>> installSources;
+map<string, string> languageNames; // maps abbreviation to full name
+
+void libbible::setStatusReporter(libbible::Status& status) {
+ myStatusReporter *msr = new myStatusReporter(&status);
+ free(installMgr);
+ installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), msr);
+ installMgr->setUserDisclaimerConfirmed(true);
+}
+
+map<string, vector<string>> libbible::downloadModsAvailable() {
+ installSources.clear();
+ languageNames.clear();
+ mkdir((basedir + std::string("mods.d/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+ mkdir((basedir + std::string("modules/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH);
+ installMgr->setUserDisclaimerConfirmed(true);
+ string confpath = basedir + string("InstallMgr/InstallMgr.conf");
+ if(! sword::FileMgr::existsFile(confpath.c_str())) {
+ // Lifted directly from xiphos
+ sword::FileMgr::createParent(confpath.c_str());
+ sword::SWConfig config(confpath.c_str());
+ sword::InstallSource is("FTP");
+ is.caption = "CrossWire";
+ is.source = "ftp.crosswire.org";
+ is.directory = "/pub/sword/raw";
+ config["General"]["PassiveFTP"] = "true";
+ config["Sources"]["FTPSource"] = is.getConfEnt();
+ config.save();
+ installMgr->refreshRemoteSourceConfiguration();
+ }
+ installMgr->readInstallConf();
+ map<string, vector<string>> modsAvailable;
+ map<string, vector<string>> languagesToFull;
+ //printf("Getting langs...\n");
+ for(auto src : installMgr->sources) {
+ if(src.second->getMgr()->Modules.empty()) {
+ //printf("Refreshing remote source: %s\n", src.second->getConfEnt().c_str());
+ installMgr->refreshRemoteSource(src.second);
+ }
+ for(auto mod : src.second->getMgr()->Modules) {
+ auto *curMod = mod.second;
+ string type(curMod->getType());
+ if(type == "Biblical Texts") {
+ string language(curMod->getLanguage());
+ string fullLang;
+ if(curMod->getConfigEntry("LCSH")) {
+ // Split on periods, last field, strip
+ fullLang = string(curMod->getConfigEntry("LCSH"));
+ // If ends with ., remove
+ if(fullLang.ends_with('.')) fullLang = fullLang.substr(0, fullLang.size()-1);
+ if(fullLang.find('.') != string::npos) fullLang = fullLang.substr(fullLang.find_last_of('.')+1);
+ while(fullLang.starts_with(' ')) fullLang = fullLang.substr(1);
+ while(fullLang.ends_with(' ')) fullLang = fullLang.substr(0, fullLang.size()-1);
+ }
+ vector<string> newLangs;
+ languagesToFull.emplace(language, newLangs);
+ languagesToFull[language].push_back(fullLang);
+ vector<string> newMods;
+ vector<pair<string, sword::InstallSource *>> newSources;
+ // emplace only adds if key is unique
+ modsAvailable.emplace(language, newMods);
+ installSources.emplace(language, newSources);
+ modsAvailable[language].push_back(string(curMod->getName()));
+ pair<string, sword::InstallSource *> p(string(curMod->getName()), src.second);
+ installSources[language].push_back(p);
+ }
+ }
+ }
+ // Now use majority voting to move languagesToFull -> languageNames
+ for(const auto& [abbrev, fulls] : languagesToFull) {
+ std::map<string, int> majVote;
+ for(auto full : fulls) {
+ majVote.try_emplace(full, 0);
+ majVote[full]++;
+ }
+ string selected = fulls[0];
+ for(auto full : fulls) {
+ if(majVote[full] > majVote[selected] or (majVote[full] == majVote[selected] and !full.empty() and full.size() < selected.size())) {
+ selected = full;
+ }
+ }
+ if(selected.empty()) languageNames[abbrev] = abbrev;
+ else languageNames[abbrev] = selected;
+ }
+ return modsAvailable;
+}
+
+std::map<std::string, std::string> libbible::getLanguageNames() {
+ if(languageNames.empty()) {
+ downloadModsAvailable();
+ }
+ return languageNames;
+}
+
+void libbible::terminateDownload() {
+ installMgr->terminate();
+}
+
+bool libbible::installModFromInternet(string language, string name) {
+ // Searching through map<string, vector<pair<string, sword::InstallSource *>>> installSources;
+ if(installSources.empty()) {
+ downloadModsAvailable();
+ }
+ for (pair<string, sword::InstallSource *> p : installSources[language]) {
+ if(p.first == name) {
+ sword::SWMgr mgr(basedir.c_str());
+ if(installMgr->installModule(&mgr, 0, name.c_str(), p.second) == 0) {
+ printf("Installed from %s\n", p.second->getConfEnt().c_str());
+ return true;
+ }
+ return false;
+ }
+ }
+ return false;
+}
+
+#define READ_SIZE 8192
+#define delim '/'
+
+bool libbible::installModFromZip(string filename) {
+ // So... turns out it's a mite unsupported to install from a .zip
+ // Here's the deal. We do a syscall to unzip. We fancy like that.
+ // TODO: Use the ZipCompress module from SWORD instead.
+ /*string command = "unzip -o " + filename + " -d " + basedir + "&> /dev/null";
+ if(system(command.c_str())) {
+ //Uh oh...
+ printf("Something bad happened when unpacking %s\n. Is unzip installed?", filename.c_str());
+ }*/
+ unzFile zipfile = unzOpen(filename.c_str());
+ if(zipfile == NULL) {
+ return false;
+ }
+ unz_global_info global_info;
+ if(unzGetGlobalInfo(zipfile, &global_info) != UNZ_OK) {
+ unzClose(zipfile);
+ return false;
+ }
+ char read_buffer[READ_SIZE];
+ ulong i;
+ for(i = 0; i < global_info.number_entry; i++) {
+ unz_file_info file_info;
+ if(unzGetCurrentFileInfo(zipfile, &file_info, read_buffer, READ_SIZE, NULL, 0, NULL, 0) != UNZ_OK) {
+ unzClose(zipfile);
+ return false;
+ }
+ string fname = basedir + string(read_buffer);
+ size_t pos = fname.find_last_of(delim);
+ if(pos != string::npos) {
+ string path = fname.substr(0, pos);
+ filesystem::create_directories(path);
+ }
+ if(unzOpenCurrentFile(zipfile) != UNZ_OK) {
+ unzCloseCurrentFile(zipfile);
+ unzClose(zipfile);
+ return false;
+ }
+ FILE *out = fopen(fname.c_str(), "wb");
+ if(out == NULL) {
+ unzCloseCurrentFile(zipfile);
+ unzClose(zipfile);
+ return false;
+ }
+ int bytesRead;
+ do {
+ bytesRead = unzReadCurrentFile(zipfile, read_buffer, READ_SIZE);
+ if(bytesRead < 0) {
+ printf("error %d\n", bytesRead);
+ unzCloseCurrentFile(zipfile);
+ unzClose(zipfile);
+ return false;
+ }
+ if(bytesRead > 0) {
+ fwrite(read_buffer, bytesRead, 1, out);
+ }
+ } while(bytesRead > 0);
+ fclose(out);
+ unzCloseCurrentFile(zipfile);
+ unzGoToNextFile(zipfile);
+ }
+ unzClose(zipfile);
+ return true;
+}
+
+void libbible::uninstallMod(string modname) {
+ sword::SWMgr mgr(basedir.c_str());
+ sword::ModMap::iterator it = mgr.Modules.find(modname.c_str());
+ if(it != mgr.Modules.end()) {
+ installMgr->removeModule(&mgr, it->second->getName());
+ }
+}
diff --git a/src/lib/settings.cc b/src/lib/settings.cc
new file mode 100644
index 0000000..848e22f
--- /dev/null
+++ b/src/lib/settings.cc
@@ -0,0 +1,23 @@
+#include "libbible.h"
+#include <sword/swconfig.h>
+
+std::string path = (std::getenv("HOME")) + std::string("/.sword/libbible.conf");
+sword::SWConfig config(path.c_str());
+
+void libbible::settingsWrite(std::string key, std::string value) {
+ config["General"][key.c_str()] = sword::SWBuf(value.c_str());
+ config.save();
+}
+
+std::string libbible::settingsRead(std::string key) {
+ return config["General"][key.c_str()].c_str();
+}
+
+void libbible::settingsWriteInt(std::string key, int value) {
+ config["General"][key.c_str()] = sword::SWBuf(std::to_string(value).c_str());
+ config.save();
+}
+
+int libbible::settingsReadInt(std::string key) {
+ return atoi(config["General"][key.c_str()].c_str());
+}
diff --git a/src/test/Makefile b/src/test/Makefile
new file mode 100644
index 0000000..1f8bc8b
--- /dev/null
+++ b/src/test/Makefile
@@ -0,0 +1,20 @@
+LIBS = sword minizip
+override CXXFLAGS += -MMD -Wall -fPIC -std=c++20 `pkg-config $(LIBS) --cflags`
+override LDFLAGS += -lstdc++fs `pkg-config $(LIBS) --libs` -lcppunit ../../libbible.so
+SOURCES = $(wildcard *.cc)
+OBJECTS = $(SOURCES:.cc=.o)
+DEPS = $(OBJECTS:.o=.d)
+TEST = testLibbible
+
+$(TEST): $(OBJECTS)
+ $(CXX) $(OBJECTS) -o $@ $(LDFLAGS)
+
+-include $(DEPS)
+
+.PHONY: test
+test: $(TEST)
+ ./$(TEST)
+
+.PHONY: clean
+clean:
+ $(RM) $(OBJECTS) $(DEPS) $(TEST)
diff --git a/src/test/modules/JPS.zip b/src/test/modules/JPS.zip
new file mode 100644
index 0000000..4f09ff8
--- /dev/null
+++ b/src/test/modules/JPS.zip
Binary files differ
diff --git a/src/test/modules/KJV.zip b/src/test/modules/KJV.zip
new file mode 100644
index 0000000..27c161d
--- /dev/null
+++ b/src/test/modules/KJV.zip
Binary files differ
diff --git a/src/test/testLibbible.cc b/src/test/testLibbible.cc
new file mode 100644
index 0000000..d3a265a
--- /dev/null
+++ b/src/test/testLibbible.cc
@@ -0,0 +1,265 @@
+//#include <libbible.h>
+#include "libbible.h"
+#include <string>
+#include <map>
+#include <vector>
+#include <cppunit/TestCase.h>
+#include <cppunit/TestFixture.h>
+#include <cppunit/ui/text/TextTestRunner.h>
+#include <cppunit/extensions/HelperMacros.h>
+#include <cppunit/extensions/TestFactoryRegistry.h>
+#include <cppunit/TestResult.h>
+#include <cppunit/TestResultCollector.h>
+#include <cppunit/TestRunner.h>
+#include <cppunit/BriefTestProgressListener.h>
+#include <cppunit/CompilerOutputter.h>
+#include <cppunit/XmlOutputter.h>
+#include <netinet/in.h>
+#include <iostream>
+
+using namespace CppUnit;
+using namespace std;
+
+//-----------------------------------------------------------------------------
+
+class TestLibbible : public CppUnit::TestFixture
+{
+ CPPUNIT_TEST_SUITE(TestLibbible);
+ CPPUNIT_TEST(testGetModules);
+ CPPUNIT_TEST(testGetPassages);
+ CPPUNIT_TEST(testGetText);
+ CPPUNIT_TEST(testSettings);
+ CPPUNIT_TEST(testDownload);
+ CPPUNIT_TEST_SUITE_END();
+
+ //public:
+ //void setUp(void);
+ //void tearDown(void);
+
+ protected:
+ void testGetModules(void);
+ void testGetPassages(void);
+ void testGetText(void);
+ void testSettings(void);
+ void testDownload(void);
+
+};
+
+//-----------------------------------------------------------------------------
+
+class StatusTester : public libbible::Status
+{
+ public:
+ virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message);
+ bool hasBeenUpdated = false;
+};
+
+void StatusTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) {
+ hasBeenUpdated = true;
+}
+
+//-----------------------------------------------------------------------------
+
+class CancelTester : public libbible::Status
+{
+ public:
+ virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message);
+};
+
+void CancelTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) {
+ libbible::terminateDownload();
+}
+
+//-----------------------------------------------------------------------------
+
+void TestLibbible::testGetModules(void) {
+ map<string, vector<string>> mods = libbible::getModules();
+ for(auto pair : mods) {
+ libbible::uninstallMod(pair.first);
+ }
+ CPPUNIT_ASSERT(libbible::getModules().empty());
+ CPPUNIT_ASSERT(libbible::installModFromZip("modules/KJV.zip"));
+ CPPUNIT_ASSERT(libbible::installModFromZip("modules/JPS.zip"));
+ mods = libbible::getModules();
+ CPPUNIT_ASSERT(mods.find("KJV") != mods.end());
+ CPPUNIT_ASSERT(mods["KJV"].size() == 66);
+ CPPUNIT_ASSERT(mods["KJV"][7] == "Ruth");
+ CPPUNIT_ASSERT(mods["KJV"][42] == "John");
+ CPPUNIT_ASSERT(mods.find("JPS") != mods.end());
+ CPPUNIT_ASSERT(mods["JPS"].size() == 39);
+}
+
+void TestLibbible::testGetPassages(void) {
+ auto passages = libbible::getPassages("KJV", "Romans");
+ CPPUNIT_ASSERT(passages[0].modName == "KJV");
+ CPPUNIT_ASSERT(passages[0].book == "Romans");
+ CPPUNIT_ASSERT(passages[0].bookShort == "Rom");
+ CPPUNIT_ASSERT(passages[0].chapterStart == 1);
+ CPPUNIT_ASSERT(passages[0].verseStart == 1);
+ CPPUNIT_ASSERT(passages[0].chapterEnd == 1);
+ CPPUNIT_ASSERT(passages[0].verseEnd == 32);
+ CPPUNIT_ASSERT(passages.size() == 16);
+}
+
+vector<pair<int, int>> getChapVerses(std::vector<libbible::text> text) {
+ vector<pair<int, int>> chapVerses;
+ for(auto tex : text) {
+ //printf("Text is: `%s`\n", tex.text.c_str());
+ //for(auto modifier : tex.modifiers) {
+ // printf("\tModifiers include: %s\n", modifier.c_str());
+ //}
+ if(chapVerses.empty() ||
+ chapVerses.back().first != tex.chapter ||
+ chapVerses.back().second != tex.verse) {
+ chapVerses.push_back(pair<int, int>(tex.chapter, tex.verse));
+ }
+ }
+ return chapVerses;
+}
+
+void TestLibbible::testGetText(void) {
+ libbible::passage pass;
+ pass.modName = "KJV";
+ pass.bookShort = "Matt";
+ pass.chapterStart = 3;
+ pass.verseStart = 16;
+ pass.chapterEnd = 4;
+ pass.verseEnd = 7;
+ auto text = libbible::getText(pass);
+ // Verify that it includes every verse (3:16-17 + 4:1-7)
+ vector<pair<int, int>> chapVerses = getChapVerses(text);
+ vector<pair<int, int>> shouldContain = vector<pair<int, int>>({pair<int, int>(3, 16),
+ pair<int, int>(3, 17),
+ pair<int, int>(4, 1),
+ pair<int, int>(4, 2),
+ pair<int, int>(4, 3),
+ pair<int, int>(4, 4),
+ pair<int, int>(4, 5),
+ pair<int, int>(4, 6),
+ pair<int, int>(4, 7)});
+ CPPUNIT_ASSERT(chapVerses == shouldContain);
+ libbible::passage pass2;
+ pass2.modName = "KJV";
+ pass2.book = "John";
+ pass2.chapterStart = 3;
+ pass2.verseStart = 16;
+ pass2.chapterEnd = 3;
+ pass2.verseEnd = 16;
+ text = libbible::getText(pass2);
+ string allText;
+ for(auto tex : text) {
+ allText += tex.text;
+ }
+ //printf("Text is: `%s`\n", allText.c_str());
+ CPPUNIT_ASSERT(allText == "For God so loved the world, that he gave his only begotten Son, that whosoever believeth in him should not perish, but have everlasting life. ");
+
+ text = libbible::getText(libbible::getPassage("KJV", "John 3:3"));
+ allText.clear();
+ for(auto tex : text) {
+ allText += tex.text;
+ }
+ //printf("Text is: `%s`\n", allText.c_str());
+ CPPUNIT_ASSERT(allText == "Jesus answered and said unto him, Verily, verily, I say unto thee, Except a man be born again, he cannot see the kingdom of God. ");
+
+ text = libbible::getText(libbible::getPassage("KJV", "Gal 5:22-23"));
+ chapVerses = getChapVerses(text);
+ shouldContain = vector<pair<int, int>>({pair<int, int>(5, 22), pair<int, int>(5, 23)});
+ CPPUNIT_ASSERT(chapVerses == shouldContain);
+
+ text = libbible::getText(libbible::getPassage("KJV", "1 cor 1:31-2:1"));
+ chapVerses = getChapVerses(text);
+ shouldContain = vector<pair<int, int>>({pair<int, int>(1, 31), pair<int, int>(2, 1)});
+ CPPUNIT_ASSERT(chapVerses == shouldContain);
+
+ text = libbible::getText(libbible::getPassage("KJV", "ps 14-15"));
+ chapVerses = getChapVerses(text);
+ shouldContain = vector<pair<int, int>>({pair<int, int>(14, 1),
+ pair<int, int>(14, 2),
+ pair<int, int>(14, 3),
+ pair<int, int>(14, 4),
+ pair<int, int>(14, 5),
+ pair<int, int>(14, 6),
+ pair<int, int>(14, 7),
+ pair<int, int>(15, 1),
+ pair<int, int>(15, 2),
+ pair<int, int>(15, 3),
+ pair<int, int>(15, 4),
+ pair<int, int>(15, 5)});
+ CPPUNIT_ASSERT(chapVerses == shouldContain);
+
+ text = libbible::getText(libbible::getPassage("KJV", "John 21"));
+ CPPUNIT_ASSERT(text.back().verse == 25);
+}
+
+void TestLibbible::testSettings(void) {
+ libbible::settingsWrite("test", "foo");
+ CPPUNIT_ASSERT(libbible::settingsRead("test") == "foo");
+ libbible::settingsWrite("test", "bar");
+ CPPUNIT_ASSERT(libbible::settingsRead("test") == "bar");
+ libbible::settingsWriteInt("test", 5);
+ CPPUNIT_ASSERT(libbible::settingsReadInt("test") == 5);
+ libbible::settingsWrite("test", "");
+ CPPUNIT_ASSERT(libbible::settingsRead("test") == "");
+}
+
+void TestLibbible::testDownload(void) {
+ map<string, vector<string>> modsAvailable = libbible::downloadModsAvailable();
+ // We try installing the first available one
+ string language;
+ string name;
+ for(auto pair : modsAvailable) {
+ language = pair.first;
+ name = pair.second[0];
+ break;
+ }
+ CPPUNIT_ASSERT(!language.empty() && !name.empty());
+ // Try uninstalling it (shouldn't crash or have nasty side effects!)
+ libbible::uninstallMod(name);
+ // Try installing it with cancel. Shoudn't work because it gets cancelled!
+ CancelTester cancel;
+ libbible::setStatusReporter(cancel);
+ libbible::installModFromInternet(language, name);
+ auto mods = libbible::getModules();
+ CPPUNIT_ASSERT(mods.find(name) == mods.end());
+ // Now we try with normal status
+ StatusTester status;
+ libbible::setStatusReporter(status);
+ libbible::installModFromInternet(language, name);
+ mods = libbible::getModules();
+ CPPUNIT_ASSERT(mods.find(name) != mods.end());
+ CPPUNIT_ASSERT(status.hasBeenUpdated);
+
+}
+//-----------------------------------------------------------------------------
+
+CPPUNIT_TEST_SUITE_REGISTRATION( TestLibbible );
+
+int main(int argc, char* argv[]) {
+ // informs test-listener about testresults
+ CPPUNIT_NS::TestResult testresult;
+
+ // register listener for collecting the test-results
+ CPPUNIT_NS::TestResultCollector collectedresults;
+ testresult.addListener (&collectedresults);
+
+ // register listener for per-test progress output
+ CPPUNIT_NS::BriefTestProgressListener progress;
+ testresult.addListener (&progress);
+
+ // insert test-suite at test-runner by registry
+ CPPUNIT_NS::TestRunner testrunner;
+ testrunner.addTest (CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest ());
+ testrunner.run(testresult);
+
+ // output results in compiler-format
+ CPPUNIT_NS::CompilerOutputter compileroutputter(&collectedresults, std::cerr);
+ compileroutputter.write ();
+
+ // Output XML for Jenkins CPPunit plugin
+ //ofstream xmlFileOut("testLibbibleResults.xml");
+ //XmlOutputter xmlOut(&collectedresults, xmlFileOut);
+ //xmlOut.write();
+
+ // return 0 if tests were successful
+ return collectedresults.wasSuccessful() ? 0 : 1;
+}
diff --git a/src/utf8.h b/src/utf8.h
new file mode 100644
index 0000000..4e44514
--- /dev/null
+++ b/src/utf8.h
@@ -0,0 +1,34 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "utf8/checked.h"
+#include "utf8/unchecked.h"
+
+#endif // header guard
diff --git a/src/utf8/checked.h b/src/utf8/checked.h
new file mode 100644
index 0000000..1331155
--- /dev/null
+++ b/src/utf8/checked.h
@@ -0,0 +1,327 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+#include <stdexcept>
+
+namespace utf8
+{
+ // Base for the exceptions that may be thrown from the library
+ class exception : public ::std::exception {
+ };
+
+ // Exceptions that may be thrown from the library functions.
+ class invalid_code_point : public exception {
+ uint32_t cp;
+ public:
+ invalid_code_point(uint32_t cp) : cp(cp) {}
+ virtual const char* what() const throw() { return "Invalid code point"; }
+ uint32_t code_point() const {return cp;}
+ };
+
+ class invalid_utf8 : public exception {
+ uint8_t u8;
+ public:
+ invalid_utf8 (uint8_t u) : u8(u) {}
+ virtual const char* what() const throw() { return "Invalid UTF-8"; }
+ uint8_t utf8_octet() const {return u8;}
+ };
+
+ class invalid_utf16 : public exception {
+ uint16_t u16;
+ public:
+ invalid_utf16 (uint16_t u) : u16(u) {}
+ virtual const char* what() const throw() { return "Invalid UTF-16"; }
+ uint16_t utf16_word() const {return u16;}
+ };
+
+ class not_enough_room : public exception {
+ public:
+ virtual const char* what() const throw() { return "Not enough space"; }
+ };
+
+ /// The library API - functions intended to be called by the users
+
+ template <typename octet_iterator>
+ octet_iterator append(uint32_t cp, octet_iterator result)
+ {
+ if (!utf8::internal::is_code_point_valid(cp))
+ throw invalid_code_point(cp);
+
+ if (cp < 0x80) // one octet
+ *(result++) = static_cast<uint8_t>(cp);
+ else if (cp < 0x800) { // two octets
+ *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ else if (cp < 0x10000) { // three octets
+ *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
+ *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ else { // four octets
+ *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
+ *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ return result;
+ }
+
+ template <typename octet_iterator, typename output_iterator>
+ output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement)
+ {
+ while (start != end) {
+ octet_iterator sequence_start = start;
+ internal::utf_error err_code = utf8::internal::validate_next(start, end);
+ switch (err_code) {
+ case internal::UTF8_OK :
+ for (octet_iterator it = sequence_start; it != start; ++it)
+ *out++ = *it;
+ break;
+ case internal::NOT_ENOUGH_ROOM:
+ throw not_enough_room();
+ case internal::INVALID_LEAD:
+ out = utf8::append (replacement, out);
+ ++start;
+ break;
+ case internal::INCOMPLETE_SEQUENCE:
+ case internal::OVERLONG_SEQUENCE:
+ case internal::INVALID_CODE_POINT:
+ out = utf8::append (replacement, out);
+ ++start;
+ // just one replacement mark for the sequence
+ while (start != end && utf8::internal::is_trail(*start))
+ ++start;
+ break;
+ }
+ }
+ return out;
+ }
+
+ template <typename octet_iterator, typename output_iterator>
+ inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out)
+ {
+ static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd);
+ return utf8::replace_invalid(start, end, out, replacement_marker);
+ }
+
+ template <typename octet_iterator>
+ uint32_t next(octet_iterator& it, octet_iterator end)
+ {
+ uint32_t cp = 0;
+ internal::utf_error err_code = utf8::internal::validate_next(it, end, cp);
+ switch (err_code) {
+ case internal::UTF8_OK :
+ break;
+ case internal::NOT_ENOUGH_ROOM :
+ throw not_enough_room();
+ case internal::INVALID_LEAD :
+ case internal::INCOMPLETE_SEQUENCE :
+ case internal::OVERLONG_SEQUENCE :
+ throw invalid_utf8(*it);
+ case internal::INVALID_CODE_POINT :
+ throw invalid_code_point(cp);
+ }
+ return cp;
+ }
+
+ template <typename octet_iterator>
+ uint32_t peek_next(octet_iterator it, octet_iterator end)
+ {
+ return utf8::next(it, end);
+ }
+
+ template <typename octet_iterator>
+ uint32_t prior(octet_iterator& it, octet_iterator start)
+ {
+ // can't do much if it == start
+ if (it == start)
+ throw not_enough_room();
+
+ octet_iterator end = it;
+ // Go back until we hit either a lead octet or start
+ while (utf8::internal::is_trail(*(--it)))
+ if (it == start)
+ throw invalid_utf8(*it); // error - no lead byte in the sequence
+ return utf8::peek_next(it, end);
+ }
+
+ /// Deprecated in versions that include "prior"
+ template <typename octet_iterator>
+ uint32_t previous(octet_iterator& it, octet_iterator pass_start)
+ {
+ octet_iterator end = it;
+ while (utf8::internal::is_trail(*(--it)))
+ if (it == pass_start)
+ throw invalid_utf8(*it); // error - no lead byte in the sequence
+ octet_iterator temp = it;
+ return utf8::next(temp, end);
+ }
+
+ template <typename octet_iterator, typename distance_type>
+ void advance (octet_iterator& it, distance_type n, octet_iterator end)
+ {
+ for (distance_type i = 0; i < n; ++i)
+ utf8::next(it, end);
+ }
+
+ template <typename octet_iterator>
+ typename std::iterator_traits<octet_iterator>::difference_type
+ distance (octet_iterator first, octet_iterator last)
+ {
+ typename std::iterator_traits<octet_iterator>::difference_type dist;
+ for (dist = 0; first < last; ++dist)
+ utf8::next(first, last);
+ return dist;
+ }
+
+ template <typename u16bit_iterator, typename octet_iterator>
+ octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+ {
+ while (start != end) {
+ uint32_t cp = utf8::internal::mask16(*start++);
+ // Take care of surrogate pairs first
+ if (utf8::internal::is_lead_surrogate(cp)) {
+ if (start != end) {
+ uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+ if (utf8::internal::is_trail_surrogate(trail_surrogate))
+ cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+ else
+ throw invalid_utf16(static_cast<uint16_t>(trail_surrogate));
+ }
+ else
+ throw invalid_utf16(static_cast<uint16_t>(cp));
+
+ }
+ // Lone trail surrogate
+ else if (utf8::internal::is_trail_surrogate(cp))
+ throw invalid_utf16(static_cast<uint16_t>(cp));
+
+ result = utf8::append(cp, result);
+ }
+ return result;
+ }
+
+ template <typename u16bit_iterator, typename octet_iterator>
+ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+ {
+ while (start != end) {
+ uint32_t cp = utf8::next(start, end);
+ if (cp > 0xffff) { //make a surrogate pair
+ *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
+ *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+ }
+ else
+ *result++ = static_cast<uint16_t>(cp);
+ }
+ return result;
+ }
+
+ template <typename octet_iterator, typename u32bit_iterator>
+ octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+ {
+ while (start != end)
+ result = utf8::append(*(start++), result);
+
+ return result;
+ }
+
+ template <typename octet_iterator, typename u32bit_iterator>
+ u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+ {
+ while (start != end)
+ (*result++) = utf8::next(start, end);
+
+ return result;
+ }
+
+ // The iterator class
+ template <typename octet_iterator>
+ class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
+ octet_iterator it;
+ octet_iterator range_start;
+ octet_iterator range_end;
+ public:
+ iterator () {}
+ explicit iterator (const octet_iterator& octet_it,
+ const octet_iterator& range_start,
+ const octet_iterator& range_end) :
+ it(octet_it), range_start(range_start), range_end(range_end)
+ {
+ if (it < range_start || it > range_end)
+ throw std::out_of_range("Invalid utf-8 iterator position");
+ }
+ // the default "big three" are OK
+ octet_iterator base () const { return it; }
+ uint32_t operator * () const
+ {
+ octet_iterator temp = it;
+ return utf8::next(temp, range_end);
+ }
+ bool operator == (const iterator& rhs) const
+ {
+ if (range_start != rhs.range_start || range_end != rhs.range_end)
+ throw std::logic_error("Comparing utf-8 iterators defined with different ranges");
+ return (it == rhs.it);
+ }
+ bool operator != (const iterator& rhs) const
+ {
+ return !(operator == (rhs));
+ }
+ iterator& operator ++ ()
+ {
+ utf8::next(it, range_end);
+ return *this;
+ }
+ iterator operator ++ (int)
+ {
+ iterator temp = *this;
+ utf8::next(it, range_end);
+ return temp;
+ }
+ iterator& operator -- ()
+ {
+ utf8::prior(it, range_start);
+ return *this;
+ }
+ iterator operator -- (int)
+ {
+ iterator temp = *this;
+ utf8::prior(it, range_start);
+ return temp;
+ }
+ }; // class iterator
+
+} // namespace utf8
+
+#endif //header guard
+
+
diff --git a/src/utf8/core.h b/src/utf8/core.h
new file mode 100644
index 0000000..693d388
--- /dev/null
+++ b/src/utf8/core.h
@@ -0,0 +1,329 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include <iterator>
+
+namespace utf8
+{
+ // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
+ // You may need to change them to match your system.
+ // These typedefs have the same names as ones from cstdint, or boost/cstdint
+ typedef unsigned char uint8_t;
+ typedef unsigned short uint16_t;
+ typedef unsigned int uint32_t;
+
+// Helper code - not intended to be directly called by the library users. May be changed at any time
+namespace internal
+{
+ // Unicode constants
+ // Leading (high) surrogates: 0xd800 - 0xdbff
+ // Trailing (low) surrogates: 0xdc00 - 0xdfff
+ const uint16_t LEAD_SURROGATE_MIN = 0xd800u;
+ const uint16_t LEAD_SURROGATE_MAX = 0xdbffu;
+ const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u;
+ const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu;
+ const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
+ const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
+
+ // Maximum valid value for a Unicode code point
+ const uint32_t CODE_POINT_MAX = 0x0010ffffu;
+
+ template<typename octet_type>
+ inline uint8_t mask8(octet_type oc)
+ {
+ return static_cast<uint8_t>(0xff & oc);
+ }
+ template<typename u16_type>
+ inline uint16_t mask16(u16_type oc)
+ {
+ return static_cast<uint16_t>(0xffff & oc);
+ }
+ template<typename octet_type>
+ inline bool is_trail(octet_type oc)
+ {
+ return ((utf8::internal::mask8(oc) >> 6) == 0x2);
+ }
+
+ template <typename u16>
+ inline bool is_lead_surrogate(u16 cp)
+ {
+ return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
+ }
+
+ template <typename u16>
+ inline bool is_trail_surrogate(u16 cp)
+ {
+ return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+ }
+
+ template <typename u16>
+ inline bool is_surrogate(u16 cp)
+ {
+ return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
+ }
+
+ template <typename u32>
+ inline bool is_code_point_valid(u32 cp)
+ {
+ return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
+ }
+
+ template <typename octet_iterator>
+ inline typename std::iterator_traits<octet_iterator>::difference_type
+ sequence_length(octet_iterator lead_it)
+ {
+ uint8_t lead = utf8::internal::mask8(*lead_it);
+ if (lead < 0x80)
+ return 1;
+ else if ((lead >> 5) == 0x6)
+ return 2;
+ else if ((lead >> 4) == 0xe)
+ return 3;
+ else if ((lead >> 3) == 0x1e)
+ return 4;
+ else
+ return 0;
+ }
+
+ template <typename octet_difference_type>
+ inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
+ {
+ if (cp < 0x80) {
+ if (length != 1)
+ return true;
+ }
+ else if (cp < 0x800) {
+ if (length != 2)
+ return true;
+ }
+ else if (cp < 0x10000) {
+ if (length != 3)
+ return true;
+ }
+
+ return false;
+ }
+
+ enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT};
+
+ /// Helper for get_sequence_x
+ template <typename octet_iterator>
+ utf_error increase_safely(octet_iterator& it, octet_iterator end)
+ {
+ if (++it == end)
+ return NOT_ENOUGH_ROOM;
+
+ if (!utf8::internal::is_trail(*it))
+ return INCOMPLETE_SEQUENCE;
+
+ return UTF8_OK;
+ }
+
+ #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;}
+
+ /// get_sequence_x functions decode utf-8 sequences of the length x
+ template <typename octet_iterator>
+ utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ return UTF8_OK;
+ }
+
+ template <typename octet_iterator>
+ utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
+
+ return UTF8_OK;
+ }
+
+ template <typename octet_iterator>
+ utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point += (*it) & 0x3f;
+
+ return UTF8_OK;
+ }
+
+ template <typename octet_iterator>
+ utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ if (it == end)
+ return NOT_ENOUGH_ROOM;
+
+ code_point = utf8::internal::mask8(*it);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
+
+ UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
+
+ code_point += (*it) & 0x3f;
+
+ return UTF8_OK;
+ }
+
+ #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
+
+ template <typename octet_iterator>
+ utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
+ {
+ // Save the original value of it so we can go back in case of failure
+ // Of course, it does not make much sense with i.e. stream iterators
+ octet_iterator original_it = it;
+
+ uint32_t cp = 0;
+ // Determine the sequence length based on the lead octet
+ typedef typename std::iterator_traits<octet_iterator>::difference_type octet_difference_type;
+ const octet_difference_type length = utf8::internal::sequence_length(it);
+
+ // Get trail octets and calculate the code point
+ utf_error err = UTF8_OK;
+ switch (length) {
+ case 0:
+ return INVALID_LEAD;
+ case 1:
+ err = utf8::internal::get_sequence_1(it, end, cp);
+ break;
+ case 2:
+ err = utf8::internal::get_sequence_2(it, end, cp);
+ break;
+ case 3:
+ err = utf8::internal::get_sequence_3(it, end, cp);
+ break;
+ case 4:
+ err = utf8::internal::get_sequence_4(it, end, cp);
+ break;
+ }
+
+ if (err == UTF8_OK) {
+ // Decoding succeeded. Now, security checks...
+ if (utf8::internal::is_code_point_valid(cp)) {
+ if (!utf8::internal::is_overlong_sequence(cp, length)){
+ // Passed! Return here.
+ code_point = cp;
+ ++it;
+ return UTF8_OK;
+ }
+ else
+ err = OVERLONG_SEQUENCE;
+ }
+ else
+ err = INVALID_CODE_POINT;
+ }
+
+ // Failure branch - restore the original value of the iterator
+ it = original_it;
+ return err;
+ }
+
+ template <typename octet_iterator>
+ inline utf_error validate_next(octet_iterator& it, octet_iterator end) {
+ uint32_t ignored;
+ return utf8::internal::validate_next(it, end, ignored);
+ }
+
+} // namespace internal
+
+ /// The library API - functions intended to be called by the users
+
+ // Byte order mark
+ const uint8_t bom[] = {0xef, 0xbb, 0xbf};
+
+ template <typename octet_iterator>
+ octet_iterator find_invalid(octet_iterator start, octet_iterator end)
+ {
+ octet_iterator result = start;
+ while (result != end) {
+ utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
+ if (err_code != internal::UTF8_OK)
+ return result;
+ }
+ return result;
+ }
+
+ template <typename octet_iterator>
+ inline bool is_valid(octet_iterator start, octet_iterator end)
+ {
+ return (utf8::find_invalid(start, end) == end);
+ }
+
+ template <typename octet_iterator>
+ inline bool starts_with_bom (octet_iterator it, octet_iterator end)
+ {
+ return (
+ ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
+ ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
+ ((it != end) && (utf8::internal::mask8(*it)) == bom[2])
+ );
+ }
+
+ //Deprecated in release 2.3
+ template <typename octet_iterator>
+ inline bool is_bom (octet_iterator it)
+ {
+ return (
+ (utf8::internal::mask8(*it++)) == bom[0] &&
+ (utf8::internal::mask8(*it++)) == bom[1] &&
+ (utf8::internal::mask8(*it)) == bom[2]
+ );
+ }
+} // namespace utf8
+
+#endif // header guard
+
+
diff --git a/src/utf8/unchecked.h b/src/utf8/unchecked.h
new file mode 100644
index 0000000..cb24271
--- /dev/null
+++ b/src/utf8/unchecked.h
@@ -0,0 +1,228 @@
+// Copyright 2006 Nemanja Trifunovic
+
+/*
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+*/
+
+
+#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
+
+#include "core.h"
+
+namespace utf8
+{
+ namespace unchecked
+ {
+ template <typename octet_iterator>
+ octet_iterator append(uint32_t cp, octet_iterator result)
+ {
+ if (cp < 0x80) // one octet
+ *(result++) = static_cast<uint8_t>(cp);
+ else if (cp < 0x800) { // two octets
+ *(result++) = static_cast<uint8_t>((cp >> 6) | 0xc0);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ else if (cp < 0x10000) { // three octets
+ *(result++) = static_cast<uint8_t>((cp >> 12) | 0xe0);
+ *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ else { // four octets
+ *(result++) = static_cast<uint8_t>((cp >> 18) | 0xf0);
+ *(result++) = static_cast<uint8_t>(((cp >> 12) & 0x3f)| 0x80);
+ *(result++) = static_cast<uint8_t>(((cp >> 6) & 0x3f) | 0x80);
+ *(result++) = static_cast<uint8_t>((cp & 0x3f) | 0x80);
+ }
+ return result;
+ }
+
+ template <typename octet_iterator>
+ uint32_t next(octet_iterator& it)
+ {
+ uint32_t cp = utf8::internal::mask8(*it);
+ typename std::iterator_traits<octet_iterator>::difference_type length = utf8::internal::sequence_length(it);
+ switch (length) {
+ case 1:
+ break;
+ case 2:
+ it++;
+ cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f);
+ break;
+ case 3:
+ ++it;
+ cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
+ ++it;
+ cp += (*it) & 0x3f;
+ break;
+ case 4:
+ ++it;
+ cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
+ ++it;
+ cp += (utf8::internal::mask8(*it) << 6) & 0xfff;
+ ++it;
+ cp += (*it) & 0x3f;
+ break;
+ }
+ ++it;
+ return cp;
+ }
+
+ template <typename octet_iterator>
+ uint32_t peek_next(octet_iterator it)
+ {
+ return utf8::unchecked::next(it);
+ }
+
+ template <typename octet_iterator>
+ uint32_t prior(octet_iterator& it)
+ {
+ while (utf8::internal::is_trail(*(--it))) ;
+ octet_iterator temp = it;
+ return utf8::unchecked::next(temp);
+ }
+
+ // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous)
+ template <typename octet_iterator>
+ inline uint32_t previous(octet_iterator& it)
+ {
+ return utf8::unchecked::prior(it);
+ }
+
+ template <typename octet_iterator, typename distance_type>
+ void advance (octet_iterator& it, distance_type n)
+ {
+ for (distance_type i = 0; i < n; ++i)
+ utf8::unchecked::next(it);
+ }
+
+ template <typename octet_iterator>
+ typename std::iterator_traits<octet_iterator>::difference_type
+ distance (octet_iterator first, octet_iterator last)
+ {
+ typename std::iterator_traits<octet_iterator>::difference_type dist;
+ for (dist = 0; first < last; ++dist)
+ utf8::unchecked::next(first);
+ return dist;
+ }
+
+ template <typename u16bit_iterator, typename octet_iterator>
+ octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result)
+ {
+ while (start != end) {
+ uint32_t cp = utf8::internal::mask16(*start++);
+ // Take care of surrogate pairs first
+ if (utf8::internal::is_lead_surrogate(cp)) {
+ uint32_t trail_surrogate = utf8::internal::mask16(*start++);
+ cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET;
+ }
+ result = utf8::unchecked::append(cp, result);
+ }
+ return result;
+ }
+
+ template <typename u16bit_iterator, typename octet_iterator>
+ u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result)
+ {
+ while (start < end) {
+ uint32_t cp = utf8::unchecked::next(start);
+ if (cp > 0xffff) { //make a surrogate pair
+ *result++ = static_cast<uint16_t>((cp >> 10) + internal::LEAD_OFFSET);
+ *result++ = static_cast<uint16_t>((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN);
+ }
+ else
+ *result++ = static_cast<uint16_t>(cp);
+ }
+ return result;
+ }
+
+ template <typename octet_iterator, typename u32bit_iterator>
+ octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result)
+ {
+ while (start != end)
+ result = utf8::unchecked::append(*(start++), result);
+
+ return result;
+ }
+
+ template <typename octet_iterator, typename u32bit_iterator>
+ u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result)
+ {
+ while (start < end)
+ (*result++) = utf8::unchecked::next(start);
+
+ return result;
+ }
+
+ // The iterator class
+ template <typename octet_iterator>
+ class iterator : public std::iterator <std::bidirectional_iterator_tag, uint32_t> {
+ octet_iterator it;
+ public:
+ iterator () {}
+ explicit iterator (const octet_iterator& octet_it): it(octet_it) {}
+ // the default "big three" are OK
+ octet_iterator base () const { return it; }
+ uint32_t operator * () const
+ {
+ octet_iterator temp = it;
+ return utf8::unchecked::next(temp);
+ }
+ bool operator == (const iterator& rhs) const
+ {
+ return (it == rhs.it);
+ }
+ bool operator != (const iterator& rhs) const
+ {
+ return !(operator == (rhs));
+ }
+ iterator& operator ++ ()
+ {
+ ::std::advance(it, utf8::internal::sequence_length(it));
+ return *this;
+ }
+ iterator operator ++ (int)
+ {
+ iterator temp = *this;
+ ::std::advance(it, utf8::internal::sequence_length(it));
+ return temp;
+ }
+ iterator& operator -- ()
+ {
+ utf8::unchecked::prior(it);
+ return *this;
+ }
+ iterator operator -- (int)
+ {
+ iterator temp = *this;
+ utf8::unchecked::prior(it);
+ return temp;
+ }
+ }; // class iterator
+
+ } // namespace utf8::unchecked
+} // namespace utf8
+
+
+#endif // header guard
+