From 55d58a16e2511741cc625e203205dec86144faf3 Mon Sep 17 00:00:00 2001 From: Your Name Date: Fri, 18 Feb 2022 20:35:38 -0500 Subject: Reorganized repository layout --- Makefile | 58 ++++----- bible.cc | 329 ----------------------------------------------- libbible.cc | 262 ------------------------------------- libbible.h | 115 ----------------- mods.cc | 233 --------------------------------- modules/JPS.zip | Bin 1170889 -> 0 bytes modules/KJV.zip | Bin 4061008 -> 0 bytes settings.cc | 23 ---- src/bible.cc | 329 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib/libbible.cc | 262 +++++++++++++++++++++++++++++++++++++ src/lib/libbible.h | 115 +++++++++++++++++ src/lib/mods.cc | 233 +++++++++++++++++++++++++++++++++ src/lib/settings.cc | 23 ++++ src/test/Makefile | 20 +++ src/test/modules/JPS.zip | Bin 0 -> 1170889 bytes src/test/modules/KJV.zip | Bin 0 -> 4061008 bytes src/test/testLibbible.cc | 265 ++++++++++++++++++++++++++++++++++++++ src/utf8.h | 34 +++++ src/utf8/checked.h | 327 ++++++++++++++++++++++++++++++++++++++++++++++ src/utf8/core.h | 329 +++++++++++++++++++++++++++++++++++++++++++++++ src/utf8/unchecked.h | 228 ++++++++++++++++++++++++++++++++ testLibbible.cc | 265 -------------------------------------- utf8.h | 34 ----- utf8/checked.h | 327 ---------------------------------------------- utf8/core.h | 329 ----------------------------------------------- utf8/unchecked.h | 228 -------------------------------- 26 files changed, 2190 insertions(+), 2178 deletions(-) delete mode 100644 bible.cc delete mode 100644 libbible.cc delete mode 100644 libbible.h delete mode 100644 mods.cc delete mode 100644 modules/JPS.zip delete mode 100644 modules/KJV.zip delete mode 100644 settings.cc create mode 100644 src/bible.cc create mode 100644 src/lib/libbible.cc create mode 100644 src/lib/libbible.h create mode 100644 src/lib/mods.cc create mode 100644 src/lib/settings.cc create mode 100644 src/test/Makefile create mode 100644 src/test/modules/JPS.zip create mode 100644 src/test/modules/KJV.zip create mode 100644 src/test/testLibbible.cc create mode 100644 src/utf8.h create mode 100644 src/utf8/checked.h create mode 100644 src/utf8/core.h create mode 100644 src/utf8/unchecked.h delete mode 100644 testLibbible.cc delete mode 100644 utf8.h delete mode 100644 utf8/checked.h delete mode 100644 utf8/core.h delete mode 100644 utf8/unchecked.h diff --git a/Makefile b/Makefile index 6b367f8..40d2712 100644 --- a/Makefile +++ b/Makefile @@ -1,44 +1,36 @@ -CC=g++ -LIBS=sword minizip -CFLAGS=-c -Wall -fPIC -std=c++20 -LDLIBS=-lstdc++fs -SOURCES=libbible.cc mods.cc settings.cc -OBJECTS=$(SOURCES:.cc=.o) -LIBRARY=libbible.so -EXECUTABLE=bible -ifeq ($(PREFIX),) - PREFIX := /usr -endif +LIBS = sword minizip +override CXXFLAGS += -MMD -Wall -fPIC -std=c++20 `pkg-config $(LIBS) --cflags` +override LDFLAGS += -lstdc++fs `pkg-config $(LIBS) --libs` +SOURCES = $(wildcard src/*.cc) $(wildcard src/lib/*.cc) +OBJECTS = $(SOURCES:.cc=.o) +DEPS = $(OBJECTS:.o=.d) +EXECUTABLE = bible +LIBRARY = libbible.so +PREFIX := /usr -all: $(SOURCES) $(LIBRARY) $(EXECUTABLE) +$(EXECUTABLE): $(OBJECTS) + $(CXX) $(OBJECTS) -o $@ $(LDFLAGS) + +-include $(DEPS) + +$(LIBRARY): $(OBJECTS) + $(CXX) $(OBJECTS) -shared -o $@ $(LDFLAGS) + +.PHONY: test +test: $(LIBRARY) + $(MAKE) -C src/test/ test install: $(LIBRARY) $(EXECUTABLE) install -d $(DESTDIR)$(PREFIX)/lib/ install -m 644 $(LIBRARY) $(DESTDIR)$(PREFIX)/lib/ install -d $(DESTDIR)$(PREFIX)/include/ - install -m 644 libbible.h $(DESTDIR)$(PREFIX)/include/ + install -m 644 src/lib/libbible.h $(DESTDIR)$(PREFIX)/include/ install -d $(DESTDIR)$(PREFIX)/bin/ - install -m 755 bible $(DESTDIR)$(PREFIX)/bin/ + install -m 755 $(EXECUTABLE) $(DESTDIR)$(PREFIX)/bin/ install -d $(DESTDIR)`pkg-config --variable=completionsdir bash-completion` install -m 644 bible.bash $(DESTDIR)`pkg-config --variable=completionsdir bash-completion`/$(EXECUTABLE) -test: $(OBJECTS) testLibbible.o - $(CC) $(OBJECTS) testLibbible.o -o $@ $(LDFLAGS) `pkg-config $(LIBS) --libs` -lcppunit - -testLibbible.o: testLibbible.cc - $(CC) $(CFLAGS) testLibbible.cc -o $@ - -bible: $(OBJECTS) bible.o - $(CC) $(OBJECTS) bible.o -o $@ $(LDFLAGS) `pkg-config $(LIBS) --libs` $(LDLIBS) - -bible.o: bible.cc - $(CC) $(CFLAGS) bible.cc -o $@ - -$(LIBRARY): $(OBJECTS) - $(CC) $(OBJECTS) -shared -o $@ $(LDFLAGS) `pkg-config $(LIBS) --libs` $(LDLIBS) - -.cc.o: - $(CC) $(CFLAGS) $< -o $@ `pkg-config $(LIBS) --cflags` - +.PHONY: clean clean: - rm -rf *.o $(LIBRARY) test bible + $(RM) $(OBJECTS) $(DEPS) $(EXECUTABLE) $(LIBRARY) $(TEST) + $(MAKE) -C src/test/ clean diff --git a/bible.cc b/bible.cc deleted file mode 100644 index fe2c828..0000000 --- a/bible.cc +++ /dev/null @@ -1,329 +0,0 @@ -#include "libbible.h" -#include -#include -#include -#include -#include -#include -#include -#include "utf8.h" - -using namespace std; - -void usage() { - printf("\nUsage:\n bible [options] [reference]\n\n"); - printf("Print bible passages.\n\n"); - printf("Options:\n"); - printf(" -h, --help display this help message\n"); - printf(" --list-modules list all installed modules\n"); - printf(" -m, --module use specified module\n"); - printf(" --set-default-module use specified module by default in future runs\n"); - printf(" --list-books list books available in the current module\n"); - printf(" --list-chapters list chapters available in book in the current module\n"); - printf(" -o, --omit-verse-numbers when printing verse text, skip printing verse and chapter numbers\n"); - printf(" --list-installable= list bible versions available for download and install. Default lists for all languages.\n"); - printf(" --install-network install module from the network where is LANG:NAME as provided by --list-installable\n"); - printf(" --install-zip install module from a zip file\n"); - printf(" --remove-module delete a module from the system\n"); - printf("\n\nExamples:\n bible Gal 5:22-23\n"); - printf(" bible John 3:16\n bible Romans 12\n bible Matt 5:3-7:27\n"); - printf(" bible Genesis 1-3\n"); -} - -string getDefaultModule() { - return libbible::settingsRead("module"); -} - -void listModules() { - map> mods = libbible::getModules(); - string defaultMod = getDefaultModule(); - printf("Modules Installed:\n"); - for(auto pair : mods) { - if(pair.first == defaultMod) { - printf(" %s (default)\n", pair.first.c_str()); - } else { - printf(" %s\n", pair.first.c_str()); - } - } -} - -void setDefaultModule(string modname) { - libbible::settingsWrite("module", modname); -} - -void listBooks(string modname) { - map> mods = libbible::getModules(); - if(mods.find(modname) == mods.end()) { - printf("ERROR: Module \"%s\" not installed!\n", modname.c_str()); - } else { - printf("Books in Module %s:\n", modname.c_str()); - for(string book : mods[modname]) { - printf(" %s\n", book.c_str()); - } - } -} - -void listChapters(string modname, string book) { - printf("Valid chapters for book %s in module %s:\n", book.c_str(), modname.c_str()); - for(auto pass : libbible::getPassages(modname, book)) { - printf(" Chapter %d, Verses %d-%d\n", pass.chapterStart, pass.verseStart, pass.verseEnd); - } -} - -void listInstallable(string language) { - map> installable = libbible::downloadModsAvailable(); - map languages = libbible::getLanguageNames(); - for(auto pair : installable) { - if(!language.empty() && language != pair.first) { - continue; - } - printf("For language %s:", pair.first.c_str()); - if(!languages[pair.first].empty()) { - printf(" (%s)", languages[pair.first].c_str()); - } - printf("\n"); - for(string name : pair.second) { - printf(" %s\n", name.c_str()); - } - } -} - -void installNetwork(string mod) { - //Split on : - if(mod.find(':') == string::npos) { - printf("Unable to process module \"%s\": Must contain colon separated language:name\n", mod.c_str()); - return; - } - string lang = mod.substr(0, mod.find(':')); - string name = mod.substr(mod.find(':')+1); - if(libbible::installModFromInternet(lang, name)) { - printf("Module installed.\n"); - } else { - printf("Error installing module!\n"); - } -} - -void installZip(string path) { - libbible::installModFromZip(path); -} - -void removeMod(string mod) { - libbible::uninstallMod(mod); -} - -void textWrap(istream& in, ostream& out, size_t width) { - string word; - string line; - char cur = '\0'; - size_t i = 0; - - while(in.get(cur)) { - if(isspace(cur)) { - word.clear(); - } - if(cur == '\n') { - out << line << '\n'; - line.clear(); - word.clear(); - continue; - } - word += cur; - line += cur; - // Anything matching \033.*?m doesn't count - size_t credits = 0; - size_t found = -1; - while((found = line.find("\033", found+1)) != string::npos) { - size_t first = line.find_first_of("m", found); - if(first != string::npos) { - credits += first - found + 1; - } else { - credits += line.size() - found; - } - } - string::iterator end_it = utf8::find_invalid(line.begin(), line.end()); - i = utf8::distance(line.begin(), end_it) - credits; - //printf("Word: %s, i: %ld\n", word.c_str(), i); - if(i > width) { - word.erase(0, word.find_first_not_of(" ")); - if(line.find_last_of(" ") != string::npos) { - line.erase(line.find_last_of(" ")); - out << line << '\n'; - } - line = word; - } - } - out << line; -} - -int main(int argc, char* argv[]) { - static struct option long_options[] = { - {"help", no_argument, 0, 'h'}, - {"list-modules", no_argument, 0, 0}, - {"module", required_argument, 0, 'm'}, - {"set-default-module", required_argument, 0, 0}, - {"list-books", no_argument, 0, 0}, - {"list-chapters", required_argument, 0, 0}, - {"omit-verse-numbers", no_argument, 0, 'o'}, - {"list-installable", optional_argument, 0, 0}, - {"install-network", required_argument, 0, 0}, - {"install-zip", required_argument, 0, 0}, - {"remove-module", required_argument, 0, 0} - }; - int opt, option_index; - string modname; - bool omitVerseNums = false; - bool doListBooks = false; - string listChaptersBook; - string option; - while ((opt = getopt_long(argc, argv, "hm:o", long_options, &option_index)) != -1) { - switch(opt) { - case 'h': - usage(); - return 0; - case 'm': - modname = string(optarg); - break; - case 'o': - omitVerseNums = true; - break; - case 0: - option = string(long_options[option_index].name); - if(option == "list-modules") { - listModules(); - return 0; - } else if(option == "set-default-module") { - setDefaultModule(string(optarg)); - } else if(option == "list-books") { - doListBooks = true; - } else if(option == "list-chapters") { - listChaptersBook = string(optarg); - } else if(option == "list-installable") { - if(optarg == nullptr) { - listInstallable(string()); - } else { - listInstallable(string(optarg)); - } - } else if(option == "install-network") { - installNetwork(string(optarg)); - } else if(option == "install-zip") { - installZip(string(optarg)); - } else if(option == "remove-module") { - removeMod(string(optarg)); - } - break; - default: - usage(); - return 1; - } - } - if(modname.empty()) { - modname = getDefaultModule(); - } - if(doListBooks) { - listBooks(modname); - } - if(! listChaptersBook.empty()) { - listChapters(modname, listChaptersBook); - } - string reference; - while(optind < argc) { - reference += argv[optind++]; - reference += " "; - } - if(reference.empty()) { - // That's all. - return 0; - } - - auto text = libbible::getText(libbible::getPassage(modname, reference)); - int chapter = 0; - int verse = 0; - const char* indent = " "; - bool isNewline = true; - stringstream out; - for(auto tex : text) { - if(!omitVerseNums && tex.chapter != chapter) { - out << tex.book << " Chapter " << tex.chapter << ":\n"; - } - bool isParagraph = false; - bool isIndent = false; - bool isDivineName = false; - bool isJesus = false; - bool isTitle = false; - bool isParallel = false; - bool isPreverse = false; - for(string modifier : tex.modifiers) { - if(modifier == "paragraph") { - isParagraph = true; - } else if (modifier == "line indent0") { - isIndent = true; - } else if (modifier == "divineName") { - isDivineName = true; - } else if (modifier == "wordsOfJesus") { - isJesus = true; - } else if (modifier == "title") { - isTitle = true; - } else if (modifier == "parallel") { - isParallel = true; - } else if (modifier == "preverse") { - isPreverse = true; - } - } - if(isPreverse or isTitle or isParallel) { - // Someday maybe we add this, but for now, omit - tex.text = ""; - } - if(isIndent) { - isParagraph = false; - if(isNewline) { - out << indent; - } - } - if(isParagraph) { - out << indent; - } - if(isDivineName) { - transform(tex.text.begin(), tex.text.end(), tex.text.begin(), ::toupper); - } - if(isJesus) { - out << "\033[;31m"; - } - if(omitVerseNums && tex.verse != verse) { - out << " "; - } else if(!omitVerseNums && tex.verse != verse) { - out << " (" << tex.verse << ") "; - } - chapter = tex.chapter; - verse = tex.verse; - out << tex.text; - if(tex.text.back() == '\n') { - isNewline = true; - } else { - isNewline = false; - } - if(isJesus) { - out << "\033[0m"; - } - } - out << "\n"; - - // Get window size - struct winsize size; - ioctl(STDOUT_FILENO, TIOCGWINSZ, &size); - // size.ws_col is number of columns, or 0 if it's a pipe - int cols = size.ws_col; - // If terminal is too small, treat it like a pipe - if(cols < 10) { - cols = 0; - } - - // Now print - if(cols == 0) { - cout << out.str(); - } else { - stringstream out2; - textWrap(out, out2, cols); - cout << out2.str(); - } - return 0; -} diff --git a/libbible.cc b/libbible.cc deleted file mode 100644 index c9acb7d..0000000 --- a/libbible.cc +++ /dev/null @@ -1,262 +0,0 @@ -#include "libbible.h" -#include -#include -#include -#include -#include -#include - -using namespace sword; -using namespace std; - -SWMgr library(new MarkupFilterMgr(FMT_XHTML)); -OSISFootnotes filter; - -vector getBooks(SWModule *target) { - vector books; - VerseKey *key = (VerseKey *) target->getKey(); - for(char t = 1; t <= key->getTestamentMax(); t++) { - key->setTestament(t); - for(char b = 1; b <= key->getBookMax(); b++) { - key->setBook(b); - // Bug (whose fault??) in JPS; they CLAIM to have two testaments, - // but they only have one, which causes repeats. - if(std::find(books.begin(), books.end(), key->getBookName()) != books.end()) { - continue; - } - // Another issue (maybe bug?) Some translations are NT only, - // but still report OT books/chapters. - if(string(target->renderText()).empty()) { - continue; - } - books.push_back(key->getBookName()); - } - } - return books; -} - -map> libbible::getModules() { - library.load(); - map> mods; - ModMap::iterator it; - for (it = library.getModules().begin(); it != library.getModules().end(); it++) { - string modName = (*it).second->getName(); - SWModule *target = library.getModule(modName.c_str()); - mods[modName] = getBooks(target); - } - return mods; -} - -vector libbible::getPassages(string modName, string book) { - vector passages; - SWModule *target = library.getModule(modName.c_str()); - if(target == nullptr) { - // Module doesn't exist - return passages; - } - target->setKey((book + " " + "1").c_str()); - VerseKey *key = (VerseKey *) target->getKey(); - int maxChapter = key->getChapterMax(); - for(int chapter = 1; chapter <= maxChapter; chapter++) { - string ref = book + ' ' + to_string(chapter); - target->setKey(ref.c_str()); - VerseKey *key = (VerseKey *) target->getKey(); - libbible::passage pass; - pass.modName = modName; - pass.book = string(key->getBookName()); - pass.bookShort = string(key->getBookAbbrev()); - pass.chapterStart = chapter; - pass.chapterEnd = chapter; - pass.verseStart = 1; - pass.verseEnd = key->getVerseMax(); - passages.push_back(pass); - } - return passages; -} - -libbible::text getEmptyText(VerseKey *key) { - libbible::text t; - t.chapter = key->getChapter(); - t.verse = key->getVerse(); - t.book = key->getBookName(); - t.bookShort = key->getBookAbbrev(); - return t; -} - -libbible::passage libbible::getPassage(string modName, string reference) { - libbible::passage pass; - pass.modName = modName; - SWModule *target = library.getModule(pass.modName.c_str()); - if(target == nullptr || reference.empty()) { - // Bad input - return pass; - } - vector validBooks = getBooks(target); - //printf("Hey, I'm inferring missing parts!\n"); - // Let's use the target to help us - target->setKey(reference.c_str()); - VerseKey *key = (VerseKey *) target->getKey(); - pass.book = string(key->getBookName()); - // Hold on a moment, is this book even legal? - if(find(validBooks.begin(), validBooks.end(), pass.book) == validBooks.end()) { - key->setBookName(validBooks[0].c_str()); - pass.book = string(key->getBookName()); - } - pass.bookShort = string(key->getBookAbbrev()); - pass.chapterStart = key->getChapter(); - pass.verseStart = key->getVerse(); - //printf("Results so far: book: %s; chapterStart: %d; verseStart: %d\n", pass.book.c_str(), pass.chapterStart, pass.verseStart); - // And now we just need chapterEnd and verseEnd. Yippee. - string ref = string(reference); - ref.erase(remove(ref.begin(), ref.end(), ' '), ref.end()); - if(ref.find('-') == string::npos) { - // There's no range! - if(ref.find(':') == string::npos) { - // It's a full chapter reference - pass.chapterEnd = pass.chapterStart; - pass.verseEnd = key->getVerseMax(); - } else { - // It's a single verse reference - pass.chapterEnd = pass.chapterStart; - pass.verseEnd = pass.verseStart; - //printf("Hey, it's a single verse reference!\n"); - } - } else { - if(ref.find(':') == string::npos) { - // It's a multi-full-chapter reference - pass.chapterEnd = stoi(ref.substr(ref.find_last_of('-')+1)); - key->setChapter(pass.chapterEnd); - pass.verseEnd = key->getVerseMax(); - } else { - // It falls in categories c:v-v or c:v-c:v (or, technically, c-c:v) - string rangeEnd = ref.substr(ref.find_last_of('-')+1); - if(rangeEnd.find(':') == string::npos) { - // It's c:v-v - pass.verseEnd = stoi(rangeEnd); - pass.chapterEnd = pass.chapterStart; - } else { - // It's c:v-c:v (or c-c:v, but code is the same) - pass.chapterEnd = stoi(rangeEnd.substr(0, rangeEnd.find(':'))); - pass.verseEnd = stoi(rangeEnd.substr(rangeEnd.find(':')+1)); - } - } - } - return pass; -} - -vector libbible::getText(libbible::passage pass) { - vector texts; - SWModule *target = library.getModule(pass.modName.c_str()); - filter.setOptionValue("Off"); - target->addOptionFilter(&filter); - if(target == nullptr) { - // Module doesn't exist - return texts; - } - if(pass.book.empty()) { - pass.book = pass.bookShort; - } - target->setKey((pass.book - + " " + to_string(pass.chapterStart) - + ":" + to_string(pass.verseStart)).c_str()); - VerseKey *key = (VerseKey *) target->getKey(); - - bool endOfParagraph = false; - - string book = string(key->getBookName()); - - for(; string(key->getBookName()) == book && - (key->getChapter() < pass.chapterEnd - || (key->getChapter() == pass.chapterEnd && key->getVerse() <= pass.verseEnd)); - (*key)++) { - - string text = string(target->renderText()); - //printf("Working with: %s\n", text.c_str()); - - texts.push_back(getEmptyText(key)); - - if(key->getVerse() == 1 || endOfParagraph) { - if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), "paragraph") == texts.back().modifiers.end()) { - texts.back().modifiers.push_back("paragraph"); - } - endOfParagraph = false; - } - - // Variable to accumulate unterminated spans - std::vector> spans; - bool spansChanged = false; - bool hasAddedText = false; - // Iterate over text - for(auto i = text.begin(); i != text.end(); i++) { - if(*i != '<') { - if(spansChanged) { - spansChanged = false; - if(!texts.back().text.empty()) { - texts.push_back(getEmptyText(key)); - } - for(auto& [tag, modifier] : spans) { - if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), modifier) == texts.back().modifiers.end()) { - texts.back().modifiers.push_back(modifier); - } - } - } - if(*i == '\n') { - continue; // We add newlines with
- } - if(! hasAddedText && (*i == ' ' || *i == '\t')) { - continue; - } - if(*i == "¶"[0] && i+1 != text.end() && *(i+1) == "¶"[1]) { - i++; - if(hasAddedText) { - texts.back().text += '\n'; - } else { - // Append \n to text in previous texts (if applicable) - if(texts.size() > 1) { - texts[texts.size()-2].text += '\n'; - } - texts.back().modifiers.push_back("paragraph"); - continue; - } - } - texts.back().text += *i; - hasAddedText = true; - } - else { - string span; - for(; i != text.end(); i++) { - span.push_back(*i); - if(*i == '>') { - // The end of the span will be "". - if(span[1] == '/') { - string tag = span.substr(2, span.size()-3); - for(auto rit = spans.rbegin(); rit != spans.rend(); rit++) { - if(rit->first == tag) { - spans.erase(rit.base()-1); - spansChanged = true; - break; - } - } - } else if(span.find("class=\"") != string::npos) { - // The span will be formatted "" - // We want just the NAME - string tag = span.substr(1, span.find(" ")-1); - size_t start = span.find("class=\"")+7; - size_t end = span.find("\"", start); - spans.push_back(std::pair(tag, span.substr(start, end-start))); - spansChanged = true; - } else if(span.find("preverse") != string::npos) { - string tag = span.substr(1, span.find(" ")-1); - spans.push_back(std::pair(tag, "preverse")); - } else if(span == "
" || span == "
") { - texts.back().text += '\n'; - } - break; - } - } - } - } - endOfParagraph = (text[text.length()-1] == '\n'); - } - return texts; -} diff --git a/libbible.h b/libbible.h deleted file mode 100644 index f77dc8c..0000000 --- a/libbible.h +++ /dev/null @@ -1,115 +0,0 @@ -#include -#include -#include - -namespace libbible { - - struct text { - int chapter; - int verse; - std::string book; - std::string bookShort; - std::string text; - std::vector modifiers; // e.g., paragraph, line indent0, divineName, wordsOfJesus - }; - - struct passage { - std::string modName; - std::string book; - std::string bookShort; - int chapterStart; - int verseStart; - int chapterEnd; - int verseEnd; - }; - - /* - * @return Map of modName to supported books - */ - std::map> getModules(void); - - /* - * @return Vector of valid single full-chapter passages for a book - */ - std::vector getPassages(std::string modName, std::string book); - - /* - * @param modName the module to use for determining the passage - * @param reference a human-readable reference, e.g., "gen 1:26-27" - * @return the passage matching the reference - */ - passage getPassage(std::string modName, std::string reference); - - /* - * @return Text for a passage - */ - std::vector getText(struct passage pass); - - /************************** - * Methods dealing with mods - ***************************/ - - class Status { - public: - virtual void update(unsigned long totalBytes, unsigned long completedBytes, std::string message) {} - }; - - /** - * @param status Status update method is called asynchronously as download progresses - */ - void setStatusReporter(Status& status); - - /** - * @return A mapping from language to bible version names - */ - std::map> downloadModsAvailable(); - - /** - * @return A mapping from language abbreviations to full language names - */ - std::map getLanguageNames(); - - /** - * Cancel an in-progress download - */ - void terminateDownload(void); - - /** - * @param language The language of the mod to install as provided from downloadModsAvailable - * @param name The name of the bible version as provided from downloadModsAvailable - * @see downloadModsAvailable() - * @return true on success, false otherwise - */ - bool installModFromInternet(std::string language, std::string name); - - /** - * @param filename Path to the .zip compressed module to be installed - * @return true on success, false otherwise - */ - bool installModFromZip(std::string filename); - - /** - * @param modname The name of the module to be removed - */ - void uninstallMod(std::string modname); - - /****************************** - * Methods dealing with settings - *******************************/ - - /* - * From already established code, valid and useful values are: - * int fontsize: the last used size of the font - * string passage: the last looked-up passage - * string module: the last used module - */ - - void settingsWrite(std::string key, std::string value); - - std::string settingsRead(std::string key); - - void settingsWriteInt(std::string key, int value); - - int settingsReadInt(std::string key); - -} diff --git a/mods.cc b/mods.cc deleted file mode 100644 index ab54e48..0000000 --- a/mods.cc +++ /dev/null @@ -1,233 +0,0 @@ -#include "libbible.h" -#include -#include -#include -#include -#include -#include -#include - -using namespace std; - -class myStatusReporter : public sword::StatusReporter { - public: - myStatusReporter(libbible::Status *status); - ~myStatusReporter(); - void preStatus(long totalBytes, long completedBytes, const char *message); - void update(unsigned long totalBytes, unsigned long completedBytes); - protected: - libbible::Status *status; - string message; -}; - -myStatusReporter::myStatusReporter(libbible::Status *s) { - status = s; -} - -myStatusReporter::~myStatusReporter() {}; - -//virtual void libbible::Status::update(unsigned long totalBytes, unsigned long completedBytes, string message) {} - -void myStatusReporter::preStatus(long totalBytes, long completedBytes, const char *msg) { - message = string(msg); - status->update((unsigned long) totalBytes, (unsigned long) completedBytes, message); - //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str()); -} - -void myStatusReporter::update(unsigned long totalBytes, unsigned long completedBytes) { - status->update(totalBytes, completedBytes, message); - //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str()); -} - -string basedir = (getenv("HOME")) + string("/.sword/"); -sword::InstallMgr *installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), nullptr); -map>> installSources; -map languageNames; // maps abbreviation to full name - -void libbible::setStatusReporter(libbible::Status& status) { - myStatusReporter *msr = new myStatusReporter(&status); - free(installMgr); - installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), msr); - installMgr->setUserDisclaimerConfirmed(true); -} - -map> libbible::downloadModsAvailable() { - installSources.clear(); - languageNames.clear(); - mkdir((basedir + std::string("mods.d/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - mkdir((basedir + std::string("modules/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); - installMgr->setUserDisclaimerConfirmed(true); - string confpath = basedir + string("InstallMgr/InstallMgr.conf"); - if(! sword::FileMgr::existsFile(confpath.c_str())) { - // Lifted directly from xiphos - sword::FileMgr::createParent(confpath.c_str()); - sword::SWConfig config(confpath.c_str()); - sword::InstallSource is("FTP"); - is.caption = "CrossWire"; - is.source = "ftp.crosswire.org"; - is.directory = "/pub/sword/raw"; - config["General"]["PassiveFTP"] = "true"; - config["Sources"]["FTPSource"] = is.getConfEnt(); - config.save(); - installMgr->refreshRemoteSourceConfiguration(); - } - installMgr->readInstallConf(); - map> modsAvailable; - map> languagesToFull; - //printf("Getting langs...\n"); - for(auto src : installMgr->sources) { - if(src.second->getMgr()->Modules.empty()) { - //printf("Refreshing remote source: %s\n", src.second->getConfEnt().c_str()); - installMgr->refreshRemoteSource(src.second); - } - for(auto mod : src.second->getMgr()->Modules) { - auto *curMod = mod.second; - string type(curMod->getType()); - if(type == "Biblical Texts") { - string language(curMod->getLanguage()); - string fullLang; - if(curMod->getConfigEntry("LCSH")) { - // Split on periods, last field, strip - fullLang = string(curMod->getConfigEntry("LCSH")); - // If ends with ., remove - if(fullLang.ends_with('.')) fullLang = fullLang.substr(0, fullLang.size()-1); - if(fullLang.find('.') != string::npos) fullLang = fullLang.substr(fullLang.find_last_of('.')+1); - while(fullLang.starts_with(' ')) fullLang = fullLang.substr(1); - while(fullLang.ends_with(' ')) fullLang = fullLang.substr(0, fullLang.size()-1); - } - vector newLangs; - languagesToFull.emplace(language, newLangs); - languagesToFull[language].push_back(fullLang); - vector newMods; - vector> newSources; - // emplace only adds if key is unique - modsAvailable.emplace(language, newMods); - installSources.emplace(language, newSources); - modsAvailable[language].push_back(string(curMod->getName())); - pair p(string(curMod->getName()), src.second); - installSources[language].push_back(p); - } - } - } - // Now use majority voting to move languagesToFull -> languageNames - for(const auto& [abbrev, fulls] : languagesToFull) { - std::map majVote; - for(auto full : fulls) { - majVote.try_emplace(full, 0); - majVote[full]++; - } - string selected = fulls[0]; - for(auto full : fulls) { - if(majVote[full] > majVote[selected] or (majVote[full] == majVote[selected] and !full.empty() and full.size() < selected.size())) { - selected = full; - } - } - if(selected.empty()) languageNames[abbrev] = abbrev; - else languageNames[abbrev] = selected; - } - return modsAvailable; -} - -std::map libbible::getLanguageNames() { - if(languageNames.empty()) { - downloadModsAvailable(); - } - return languageNames; -} - -void libbible::terminateDownload() { - installMgr->terminate(); -} - -bool libbible::installModFromInternet(string language, string name) { - // Searching through map>> installSources; - if(installSources.empty()) { - downloadModsAvailable(); - } - for (pair p : installSources[language]) { - if(p.first == name) { - sword::SWMgr mgr(basedir.c_str()); - if(installMgr->installModule(&mgr, 0, name.c_str(), p.second) == 0) { - printf("Installed from %s\n", p.second->getConfEnt().c_str()); - return true; - } - return false; - } - } - return false; -} - -#define READ_SIZE 8192 -#define delim '/' - -bool libbible::installModFromZip(string filename) { - // So... turns out it's a mite unsupported to install from a .zip - // Here's the deal. We do a syscall to unzip. We fancy like that. - // TODO: Use the ZipCompress module from SWORD instead. - /*string command = "unzip -o " + filename + " -d " + basedir + "&> /dev/null"; - if(system(command.c_str())) { - //Uh oh... - printf("Something bad happened when unpacking %s\n. Is unzip installed?", filename.c_str()); - }*/ - unzFile zipfile = unzOpen(filename.c_str()); - if(zipfile == NULL) { - return false; - } - unz_global_info global_info; - if(unzGetGlobalInfo(zipfile, &global_info) != UNZ_OK) { - unzClose(zipfile); - return false; - } - char read_buffer[READ_SIZE]; - ulong i; - for(i = 0; i < global_info.number_entry; i++) { - unz_file_info file_info; - if(unzGetCurrentFileInfo(zipfile, &file_info, read_buffer, READ_SIZE, NULL, 0, NULL, 0) != UNZ_OK) { - unzClose(zipfile); - return false; - } - string fname = basedir + string(read_buffer); - size_t pos = fname.find_last_of(delim); - if(pos != string::npos) { - string path = fname.substr(0, pos); - filesystem::create_directories(path); - } - if(unzOpenCurrentFile(zipfile) != UNZ_OK) { - unzCloseCurrentFile(zipfile); - unzClose(zipfile); - return false; - } - FILE *out = fopen(fname.c_str(), "wb"); - if(out == NULL) { - unzCloseCurrentFile(zipfile); - unzClose(zipfile); - return false; - } - int bytesRead; - do { - bytesRead = unzReadCurrentFile(zipfile, read_buffer, READ_SIZE); - if(bytesRead < 0) { - printf("error %d\n", bytesRead); - unzCloseCurrentFile(zipfile); - unzClose(zipfile); - return false; - } - if(bytesRead > 0) { - fwrite(read_buffer, bytesRead, 1, out); - } - } while(bytesRead > 0); - fclose(out); - unzCloseCurrentFile(zipfile); - unzGoToNextFile(zipfile); - } - unzClose(zipfile); - return true; -} - -void libbible::uninstallMod(string modname) { - sword::SWMgr mgr(basedir.c_str()); - sword::ModMap::iterator it = mgr.Modules.find(modname.c_str()); - if(it != mgr.Modules.end()) { - installMgr->removeModule(&mgr, it->second->getName()); - } -} diff --git a/modules/JPS.zip b/modules/JPS.zip deleted file mode 100644 index 4f09ff8..0000000 Binary files a/modules/JPS.zip and /dev/null differ diff --git a/modules/KJV.zip b/modules/KJV.zip deleted file mode 100644 index 27c161d..0000000 Binary files a/modules/KJV.zip and /dev/null differ diff --git a/settings.cc b/settings.cc deleted file mode 100644 index 848e22f..0000000 --- a/settings.cc +++ /dev/null @@ -1,23 +0,0 @@ -#include "libbible.h" -#include - -std::string path = (std::getenv("HOME")) + std::string("/.sword/libbible.conf"); -sword::SWConfig config(path.c_str()); - -void libbible::settingsWrite(std::string key, std::string value) { - config["General"][key.c_str()] = sword::SWBuf(value.c_str()); - config.save(); -} - -std::string libbible::settingsRead(std::string key) { - return config["General"][key.c_str()].c_str(); -} - -void libbible::settingsWriteInt(std::string key, int value) { - config["General"][key.c_str()] = sword::SWBuf(std::to_string(value).c_str()); - config.save(); -} - -int libbible::settingsReadInt(std::string key) { - return atoi(config["General"][key.c_str()].c_str()); -} diff --git a/src/bible.cc b/src/bible.cc new file mode 100644 index 0000000..a09c0c0 --- /dev/null +++ b/src/bible.cc @@ -0,0 +1,329 @@ +#include "lib/libbible.h" +#include +#include +#include +#include +#include +#include +#include +#include "utf8.h" + +using namespace std; + +void usage() { + printf("\nUsage:\n bible [options] [reference]\n\n"); + printf("Print bible passages.\n\n"); + printf("Options:\n"); + printf(" -h, --help display this help message\n"); + printf(" --list-modules list all installed modules\n"); + printf(" -m, --module use specified module\n"); + printf(" --set-default-module use specified module by default in future runs\n"); + printf(" --list-books list books available in the current module\n"); + printf(" --list-chapters list chapters available in book in the current module\n"); + printf(" -o, --omit-verse-numbers when printing verse text, skip printing verse and chapter numbers\n"); + printf(" --list-installable= list bible versions available for download and install. Default lists for all languages.\n"); + printf(" --install-network install module from the network where is LANG:NAME as provided by --list-installable\n"); + printf(" --install-zip install module from a zip file\n"); + printf(" --remove-module delete a module from the system\n"); + printf("\n\nExamples:\n bible Gal 5:22-23\n"); + printf(" bible John 3:16\n bible Romans 12\n bible Matt 5:3-7:27\n"); + printf(" bible Genesis 1-3\n"); +} + +string getDefaultModule() { + return libbible::settingsRead("module"); +} + +void listModules() { + map> mods = libbible::getModules(); + string defaultMod = getDefaultModule(); + printf("Modules Installed:\n"); + for(auto pair : mods) { + if(pair.first == defaultMod) { + printf(" %s (default)\n", pair.first.c_str()); + } else { + printf(" %s\n", pair.first.c_str()); + } + } +} + +void setDefaultModule(string modname) { + libbible::settingsWrite("module", modname); +} + +void listBooks(string modname) { + map> mods = libbible::getModules(); + if(mods.find(modname) == mods.end()) { + printf("ERROR: Module \"%s\" not installed!\n", modname.c_str()); + } else { + printf("Books in Module %s:\n", modname.c_str()); + for(string book : mods[modname]) { + printf(" %s\n", book.c_str()); + } + } +} + +void listChapters(string modname, string book) { + printf("Valid chapters for book %s in module %s:\n", book.c_str(), modname.c_str()); + for(auto pass : libbible::getPassages(modname, book)) { + printf(" Chapter %d, Verses %d-%d\n", pass.chapterStart, pass.verseStart, pass.verseEnd); + } +} + +void listInstallable(string language) { + map> installable = libbible::downloadModsAvailable(); + map languages = libbible::getLanguageNames(); + for(auto pair : installable) { + if(!language.empty() && language != pair.first) { + continue; + } + printf("For language %s:", pair.first.c_str()); + if(!languages[pair.first].empty()) { + printf(" (%s)", languages[pair.first].c_str()); + } + printf("\n"); + for(string name : pair.second) { + printf(" %s\n", name.c_str()); + } + } +} + +void installNetwork(string mod) { + //Split on : + if(mod.find(':') == string::npos) { + printf("Unable to process module \"%s\": Must contain colon separated language:name\n", mod.c_str()); + return; + } + string lang = mod.substr(0, mod.find(':')); + string name = mod.substr(mod.find(':')+1); + if(libbible::installModFromInternet(lang, name)) { + printf("Module installed.\n"); + } else { + printf("Error installing module!\n"); + } +} + +void installZip(string path) { + libbible::installModFromZip(path); +} + +void removeMod(string mod) { + libbible::uninstallMod(mod); +} + +void textWrap(istream& in, ostream& out, size_t width) { + string word; + string line; + char cur = '\0'; + size_t i = 0; + + while(in.get(cur)) { + if(isspace(cur)) { + word.clear(); + } + if(cur == '\n') { + out << line << '\n'; + line.clear(); + word.clear(); + continue; + } + word += cur; + line += cur; + // Anything matching \033.*?m doesn't count + size_t credits = 0; + size_t found = -1; + while((found = line.find("\033", found+1)) != string::npos) { + size_t first = line.find_first_of("m", found); + if(first != string::npos) { + credits += first - found + 1; + } else { + credits += line.size() - found; + } + } + string::iterator end_it = utf8::find_invalid(line.begin(), line.end()); + i = utf8::distance(line.begin(), end_it) - credits; + //printf("Word: %s, i: %ld\n", word.c_str(), i); + if(i > width) { + word.erase(0, word.find_first_not_of(" ")); + if(line.find_last_of(" ") != string::npos) { + line.erase(line.find_last_of(" ")); + out << line << '\n'; + } + line = word; + } + } + out << line; +} + +int main(int argc, char* argv[]) { + static struct option long_options[] = { + {"help", no_argument, 0, 'h'}, + {"list-modules", no_argument, 0, 0}, + {"module", required_argument, 0, 'm'}, + {"set-default-module", required_argument, 0, 0}, + {"list-books", no_argument, 0, 0}, + {"list-chapters", required_argument, 0, 0}, + {"omit-verse-numbers", no_argument, 0, 'o'}, + {"list-installable", optional_argument, 0, 0}, + {"install-network", required_argument, 0, 0}, + {"install-zip", required_argument, 0, 0}, + {"remove-module", required_argument, 0, 0} + }; + int opt, option_index; + string modname; + bool omitVerseNums = false; + bool doListBooks = false; + string listChaptersBook; + string option; + while ((opt = getopt_long(argc, argv, "hm:o", long_options, &option_index)) != -1) { + switch(opt) { + case 'h': + usage(); + return 0; + case 'm': + modname = string(optarg); + break; + case 'o': + omitVerseNums = true; + break; + case 0: + option = string(long_options[option_index].name); + if(option == "list-modules") { + listModules(); + return 0; + } else if(option == "set-default-module") { + setDefaultModule(string(optarg)); + } else if(option == "list-books") { + doListBooks = true; + } else if(option == "list-chapters") { + listChaptersBook = string(optarg); + } else if(option == "list-installable") { + if(optarg == nullptr) { + listInstallable(string()); + } else { + listInstallable(string(optarg)); + } + } else if(option == "install-network") { + installNetwork(string(optarg)); + } else if(option == "install-zip") { + installZip(string(optarg)); + } else if(option == "remove-module") { + removeMod(string(optarg)); + } + break; + default: + usage(); + return 1; + } + } + if(modname.empty()) { + modname = getDefaultModule(); + } + if(doListBooks) { + listBooks(modname); + } + if(! listChaptersBook.empty()) { + listChapters(modname, listChaptersBook); + } + string reference; + while(optind < argc) { + reference += argv[optind++]; + reference += " "; + } + if(reference.empty()) { + // That's all. + return 0; + } + + auto text = libbible::getText(libbible::getPassage(modname, reference)); + int chapter = 0; + int verse = 0; + const char* indent = " "; + bool isNewline = true; + stringstream out; + for(auto tex : text) { + if(!omitVerseNums && tex.chapter != chapter) { + out << tex.book << " Chapter " << tex.chapter << ":\n"; + } + bool isParagraph = false; + bool isIndent = false; + bool isDivineName = false; + bool isJesus = false; + bool isTitle = false; + bool isParallel = false; + bool isPreverse = false; + for(string modifier : tex.modifiers) { + if(modifier == "paragraph") { + isParagraph = true; + } else if (modifier == "line indent0") { + isIndent = true; + } else if (modifier == "divineName") { + isDivineName = true; + } else if (modifier == "wordsOfJesus") { + isJesus = true; + } else if (modifier == "title") { + isTitle = true; + } else if (modifier == "parallel") { + isParallel = true; + } else if (modifier == "preverse") { + isPreverse = true; + } + } + if(isPreverse or isTitle or isParallel) { + // Someday maybe we add this, but for now, omit + tex.text = ""; + } + if(isIndent) { + isParagraph = false; + if(isNewline) { + out << indent; + } + } + if(isParagraph) { + out << indent; + } + if(isDivineName) { + transform(tex.text.begin(), tex.text.end(), tex.text.begin(), ::toupper); + } + if(isJesus) { + out << "\033[;31m"; + } + if(omitVerseNums && tex.verse != verse) { + out << " "; + } else if(!omitVerseNums && tex.verse != verse) { + out << " (" << tex.verse << ") "; + } + chapter = tex.chapter; + verse = tex.verse; + out << tex.text; + if(tex.text.back() == '\n') { + isNewline = true; + } else { + isNewline = false; + } + if(isJesus) { + out << "\033[0m"; + } + } + out << "\n"; + + // Get window size + struct winsize size; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &size); + // size.ws_col is number of columns, or 0 if it's a pipe + int cols = size.ws_col; + // If terminal is too small, treat it like a pipe + if(cols < 10) { + cols = 0; + } + + // Now print + if(cols == 0) { + cout << out.str(); + } else { + stringstream out2; + textWrap(out, out2, cols); + cout << out2.str(); + } + return 0; +} diff --git a/src/lib/libbible.cc b/src/lib/libbible.cc new file mode 100644 index 0000000..c9acb7d --- /dev/null +++ b/src/lib/libbible.cc @@ -0,0 +1,262 @@ +#include "libbible.h" +#include +#include +#include +#include +#include +#include + +using namespace sword; +using namespace std; + +SWMgr library(new MarkupFilterMgr(FMT_XHTML)); +OSISFootnotes filter; + +vector getBooks(SWModule *target) { + vector books; + VerseKey *key = (VerseKey *) target->getKey(); + for(char t = 1; t <= key->getTestamentMax(); t++) { + key->setTestament(t); + for(char b = 1; b <= key->getBookMax(); b++) { + key->setBook(b); + // Bug (whose fault??) in JPS; they CLAIM to have two testaments, + // but they only have one, which causes repeats. + if(std::find(books.begin(), books.end(), key->getBookName()) != books.end()) { + continue; + } + // Another issue (maybe bug?) Some translations are NT only, + // but still report OT books/chapters. + if(string(target->renderText()).empty()) { + continue; + } + books.push_back(key->getBookName()); + } + } + return books; +} + +map> libbible::getModules() { + library.load(); + map> mods; + ModMap::iterator it; + for (it = library.getModules().begin(); it != library.getModules().end(); it++) { + string modName = (*it).second->getName(); + SWModule *target = library.getModule(modName.c_str()); + mods[modName] = getBooks(target); + } + return mods; +} + +vector libbible::getPassages(string modName, string book) { + vector passages; + SWModule *target = library.getModule(modName.c_str()); + if(target == nullptr) { + // Module doesn't exist + return passages; + } + target->setKey((book + " " + "1").c_str()); + VerseKey *key = (VerseKey *) target->getKey(); + int maxChapter = key->getChapterMax(); + for(int chapter = 1; chapter <= maxChapter; chapter++) { + string ref = book + ' ' + to_string(chapter); + target->setKey(ref.c_str()); + VerseKey *key = (VerseKey *) target->getKey(); + libbible::passage pass; + pass.modName = modName; + pass.book = string(key->getBookName()); + pass.bookShort = string(key->getBookAbbrev()); + pass.chapterStart = chapter; + pass.chapterEnd = chapter; + pass.verseStart = 1; + pass.verseEnd = key->getVerseMax(); + passages.push_back(pass); + } + return passages; +} + +libbible::text getEmptyText(VerseKey *key) { + libbible::text t; + t.chapter = key->getChapter(); + t.verse = key->getVerse(); + t.book = key->getBookName(); + t.bookShort = key->getBookAbbrev(); + return t; +} + +libbible::passage libbible::getPassage(string modName, string reference) { + libbible::passage pass; + pass.modName = modName; + SWModule *target = library.getModule(pass.modName.c_str()); + if(target == nullptr || reference.empty()) { + // Bad input + return pass; + } + vector validBooks = getBooks(target); + //printf("Hey, I'm inferring missing parts!\n"); + // Let's use the target to help us + target->setKey(reference.c_str()); + VerseKey *key = (VerseKey *) target->getKey(); + pass.book = string(key->getBookName()); + // Hold on a moment, is this book even legal? + if(find(validBooks.begin(), validBooks.end(), pass.book) == validBooks.end()) { + key->setBookName(validBooks[0].c_str()); + pass.book = string(key->getBookName()); + } + pass.bookShort = string(key->getBookAbbrev()); + pass.chapterStart = key->getChapter(); + pass.verseStart = key->getVerse(); + //printf("Results so far: book: %s; chapterStart: %d; verseStart: %d\n", pass.book.c_str(), pass.chapterStart, pass.verseStart); + // And now we just need chapterEnd and verseEnd. Yippee. + string ref = string(reference); + ref.erase(remove(ref.begin(), ref.end(), ' '), ref.end()); + if(ref.find('-') == string::npos) { + // There's no range! + if(ref.find(':') == string::npos) { + // It's a full chapter reference + pass.chapterEnd = pass.chapterStart; + pass.verseEnd = key->getVerseMax(); + } else { + // It's a single verse reference + pass.chapterEnd = pass.chapterStart; + pass.verseEnd = pass.verseStart; + //printf("Hey, it's a single verse reference!\n"); + } + } else { + if(ref.find(':') == string::npos) { + // It's a multi-full-chapter reference + pass.chapterEnd = stoi(ref.substr(ref.find_last_of('-')+1)); + key->setChapter(pass.chapterEnd); + pass.verseEnd = key->getVerseMax(); + } else { + // It falls in categories c:v-v or c:v-c:v (or, technically, c-c:v) + string rangeEnd = ref.substr(ref.find_last_of('-')+1); + if(rangeEnd.find(':') == string::npos) { + // It's c:v-v + pass.verseEnd = stoi(rangeEnd); + pass.chapterEnd = pass.chapterStart; + } else { + // It's c:v-c:v (or c-c:v, but code is the same) + pass.chapterEnd = stoi(rangeEnd.substr(0, rangeEnd.find(':'))); + pass.verseEnd = stoi(rangeEnd.substr(rangeEnd.find(':')+1)); + } + } + } + return pass; +} + +vector libbible::getText(libbible::passage pass) { + vector texts; + SWModule *target = library.getModule(pass.modName.c_str()); + filter.setOptionValue("Off"); + target->addOptionFilter(&filter); + if(target == nullptr) { + // Module doesn't exist + return texts; + } + if(pass.book.empty()) { + pass.book = pass.bookShort; + } + target->setKey((pass.book + + " " + to_string(pass.chapterStart) + + ":" + to_string(pass.verseStart)).c_str()); + VerseKey *key = (VerseKey *) target->getKey(); + + bool endOfParagraph = false; + + string book = string(key->getBookName()); + + for(; string(key->getBookName()) == book && + (key->getChapter() < pass.chapterEnd + || (key->getChapter() == pass.chapterEnd && key->getVerse() <= pass.verseEnd)); + (*key)++) { + + string text = string(target->renderText()); + //printf("Working with: %s\n", text.c_str()); + + texts.push_back(getEmptyText(key)); + + if(key->getVerse() == 1 || endOfParagraph) { + if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), "paragraph") == texts.back().modifiers.end()) { + texts.back().modifiers.push_back("paragraph"); + } + endOfParagraph = false; + } + + // Variable to accumulate unterminated spans + std::vector> spans; + bool spansChanged = false; + bool hasAddedText = false; + // Iterate over text + for(auto i = text.begin(); i != text.end(); i++) { + if(*i != '<') { + if(spansChanged) { + spansChanged = false; + if(!texts.back().text.empty()) { + texts.push_back(getEmptyText(key)); + } + for(auto& [tag, modifier] : spans) { + if(find(texts.back().modifiers.begin(), texts.back().modifiers.end(), modifier) == texts.back().modifiers.end()) { + texts.back().modifiers.push_back(modifier); + } + } + } + if(*i == '\n') { + continue; // We add newlines with
+ } + if(! hasAddedText && (*i == ' ' || *i == '\t')) { + continue; + } + if(*i == "¶"[0] && i+1 != text.end() && *(i+1) == "¶"[1]) { + i++; + if(hasAddedText) { + texts.back().text += '\n'; + } else { + // Append \n to text in previous texts (if applicable) + if(texts.size() > 1) { + texts[texts.size()-2].text += '\n'; + } + texts.back().modifiers.push_back("paragraph"); + continue; + } + } + texts.back().text += *i; + hasAddedText = true; + } + else { + string span; + for(; i != text.end(); i++) { + span.push_back(*i); + if(*i == '>') { + // The end of the span will be "
". + if(span[1] == '/') { + string tag = span.substr(2, span.size()-3); + for(auto rit = spans.rbegin(); rit != spans.rend(); rit++) { + if(rit->first == tag) { + spans.erase(rit.base()-1); + spansChanged = true; + break; + } + } + } else if(span.find("class=\"") != string::npos) { + // The span will be formatted "" + // We want just the NAME + string tag = span.substr(1, span.find(" ")-1); + size_t start = span.find("class=\"")+7; + size_t end = span.find("\"", start); + spans.push_back(std::pair(tag, span.substr(start, end-start))); + spansChanged = true; + } else if(span.find("preverse") != string::npos) { + string tag = span.substr(1, span.find(" ")-1); + spans.push_back(std::pair(tag, "preverse")); + } else if(span == "
" || span == "
") { + texts.back().text += '\n'; + } + break; + } + } + } + } + endOfParagraph = (text[text.length()-1] == '\n'); + } + return texts; +} diff --git a/src/lib/libbible.h b/src/lib/libbible.h new file mode 100644 index 0000000..f77dc8c --- /dev/null +++ b/src/lib/libbible.h @@ -0,0 +1,115 @@ +#include +#include +#include + +namespace libbible { + + struct text { + int chapter; + int verse; + std::string book; + std::string bookShort; + std::string text; + std::vector modifiers; // e.g., paragraph, line indent0, divineName, wordsOfJesus + }; + + struct passage { + std::string modName; + std::string book; + std::string bookShort; + int chapterStart; + int verseStart; + int chapterEnd; + int verseEnd; + }; + + /* + * @return Map of modName to supported books + */ + std::map> getModules(void); + + /* + * @return Vector of valid single full-chapter passages for a book + */ + std::vector getPassages(std::string modName, std::string book); + + /* + * @param modName the module to use for determining the passage + * @param reference a human-readable reference, e.g., "gen 1:26-27" + * @return the passage matching the reference + */ + passage getPassage(std::string modName, std::string reference); + + /* + * @return Text for a passage + */ + std::vector getText(struct passage pass); + + /************************** + * Methods dealing with mods + ***************************/ + + class Status { + public: + virtual void update(unsigned long totalBytes, unsigned long completedBytes, std::string message) {} + }; + + /** + * @param status Status update method is called asynchronously as download progresses + */ + void setStatusReporter(Status& status); + + /** + * @return A mapping from language to bible version names + */ + std::map> downloadModsAvailable(); + + /** + * @return A mapping from language abbreviations to full language names + */ + std::map getLanguageNames(); + + /** + * Cancel an in-progress download + */ + void terminateDownload(void); + + /** + * @param language The language of the mod to install as provided from downloadModsAvailable + * @param name The name of the bible version as provided from downloadModsAvailable + * @see downloadModsAvailable() + * @return true on success, false otherwise + */ + bool installModFromInternet(std::string language, std::string name); + + /** + * @param filename Path to the .zip compressed module to be installed + * @return true on success, false otherwise + */ + bool installModFromZip(std::string filename); + + /** + * @param modname The name of the module to be removed + */ + void uninstallMod(std::string modname); + + /****************************** + * Methods dealing with settings + *******************************/ + + /* + * From already established code, valid and useful values are: + * int fontsize: the last used size of the font + * string passage: the last looked-up passage + * string module: the last used module + */ + + void settingsWrite(std::string key, std::string value); + + std::string settingsRead(std::string key); + + void settingsWriteInt(std::string key, int value); + + int settingsReadInt(std::string key); + +} diff --git a/src/lib/mods.cc b/src/lib/mods.cc new file mode 100644 index 0000000..ab54e48 --- /dev/null +++ b/src/lib/mods.cc @@ -0,0 +1,233 @@ +#include "libbible.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +class myStatusReporter : public sword::StatusReporter { + public: + myStatusReporter(libbible::Status *status); + ~myStatusReporter(); + void preStatus(long totalBytes, long completedBytes, const char *message); + void update(unsigned long totalBytes, unsigned long completedBytes); + protected: + libbible::Status *status; + string message; +}; + +myStatusReporter::myStatusReporter(libbible::Status *s) { + status = s; +} + +myStatusReporter::~myStatusReporter() {}; + +//virtual void libbible::Status::update(unsigned long totalBytes, unsigned long completedBytes, string message) {} + +void myStatusReporter::preStatus(long totalBytes, long completedBytes, const char *msg) { + message = string(msg); + status->update((unsigned long) totalBytes, (unsigned long) completedBytes, message); + //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str()); +} + +void myStatusReporter::update(unsigned long totalBytes, unsigned long completedBytes) { + status->update(totalBytes, completedBytes, message); + //printf("Got a status update: %ld / %ld, \"%s\"\n", completedBytes, totalBytes, message.c_str()); +} + +string basedir = (getenv("HOME")) + string("/.sword/"); +sword::InstallMgr *installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), nullptr); +map>> installSources; +map languageNames; // maps abbreviation to full name + +void libbible::setStatusReporter(libbible::Status& status) { + myStatusReporter *msr = new myStatusReporter(&status); + free(installMgr); + installMgr = new sword::InstallMgr((basedir + std::string("InstallMgr")).c_str(), msr); + installMgr->setUserDisclaimerConfirmed(true); +} + +map> libbible::downloadModsAvailable() { + installSources.clear(); + languageNames.clear(); + mkdir((basedir + std::string("mods.d/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + mkdir((basedir + std::string("modules/")).c_str(), S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH); + installMgr->setUserDisclaimerConfirmed(true); + string confpath = basedir + string("InstallMgr/InstallMgr.conf"); + if(! sword::FileMgr::existsFile(confpath.c_str())) { + // Lifted directly from xiphos + sword::FileMgr::createParent(confpath.c_str()); + sword::SWConfig config(confpath.c_str()); + sword::InstallSource is("FTP"); + is.caption = "CrossWire"; + is.source = "ftp.crosswire.org"; + is.directory = "/pub/sword/raw"; + config["General"]["PassiveFTP"] = "true"; + config["Sources"]["FTPSource"] = is.getConfEnt(); + config.save(); + installMgr->refreshRemoteSourceConfiguration(); + } + installMgr->readInstallConf(); + map> modsAvailable; + map> languagesToFull; + //printf("Getting langs...\n"); + for(auto src : installMgr->sources) { + if(src.second->getMgr()->Modules.empty()) { + //printf("Refreshing remote source: %s\n", src.second->getConfEnt().c_str()); + installMgr->refreshRemoteSource(src.second); + } + for(auto mod : src.second->getMgr()->Modules) { + auto *curMod = mod.second; + string type(curMod->getType()); + if(type == "Biblical Texts") { + string language(curMod->getLanguage()); + string fullLang; + if(curMod->getConfigEntry("LCSH")) { + // Split on periods, last field, strip + fullLang = string(curMod->getConfigEntry("LCSH")); + // If ends with ., remove + if(fullLang.ends_with('.')) fullLang = fullLang.substr(0, fullLang.size()-1); + if(fullLang.find('.') != string::npos) fullLang = fullLang.substr(fullLang.find_last_of('.')+1); + while(fullLang.starts_with(' ')) fullLang = fullLang.substr(1); + while(fullLang.ends_with(' ')) fullLang = fullLang.substr(0, fullLang.size()-1); + } + vector newLangs; + languagesToFull.emplace(language, newLangs); + languagesToFull[language].push_back(fullLang); + vector newMods; + vector> newSources; + // emplace only adds if key is unique + modsAvailable.emplace(language, newMods); + installSources.emplace(language, newSources); + modsAvailable[language].push_back(string(curMod->getName())); + pair p(string(curMod->getName()), src.second); + installSources[language].push_back(p); + } + } + } + // Now use majority voting to move languagesToFull -> languageNames + for(const auto& [abbrev, fulls] : languagesToFull) { + std::map majVote; + for(auto full : fulls) { + majVote.try_emplace(full, 0); + majVote[full]++; + } + string selected = fulls[0]; + for(auto full : fulls) { + if(majVote[full] > majVote[selected] or (majVote[full] == majVote[selected] and !full.empty() and full.size() < selected.size())) { + selected = full; + } + } + if(selected.empty()) languageNames[abbrev] = abbrev; + else languageNames[abbrev] = selected; + } + return modsAvailable; +} + +std::map libbible::getLanguageNames() { + if(languageNames.empty()) { + downloadModsAvailable(); + } + return languageNames; +} + +void libbible::terminateDownload() { + installMgr->terminate(); +} + +bool libbible::installModFromInternet(string language, string name) { + // Searching through map>> installSources; + if(installSources.empty()) { + downloadModsAvailable(); + } + for (pair p : installSources[language]) { + if(p.first == name) { + sword::SWMgr mgr(basedir.c_str()); + if(installMgr->installModule(&mgr, 0, name.c_str(), p.second) == 0) { + printf("Installed from %s\n", p.second->getConfEnt().c_str()); + return true; + } + return false; + } + } + return false; +} + +#define READ_SIZE 8192 +#define delim '/' + +bool libbible::installModFromZip(string filename) { + // So... turns out it's a mite unsupported to install from a .zip + // Here's the deal. We do a syscall to unzip. We fancy like that. + // TODO: Use the ZipCompress module from SWORD instead. + /*string command = "unzip -o " + filename + " -d " + basedir + "&> /dev/null"; + if(system(command.c_str())) { + //Uh oh... + printf("Something bad happened when unpacking %s\n. Is unzip installed?", filename.c_str()); + }*/ + unzFile zipfile = unzOpen(filename.c_str()); + if(zipfile == NULL) { + return false; + } + unz_global_info global_info; + if(unzGetGlobalInfo(zipfile, &global_info) != UNZ_OK) { + unzClose(zipfile); + return false; + } + char read_buffer[READ_SIZE]; + ulong i; + for(i = 0; i < global_info.number_entry; i++) { + unz_file_info file_info; + if(unzGetCurrentFileInfo(zipfile, &file_info, read_buffer, READ_SIZE, NULL, 0, NULL, 0) != UNZ_OK) { + unzClose(zipfile); + return false; + } + string fname = basedir + string(read_buffer); + size_t pos = fname.find_last_of(delim); + if(pos != string::npos) { + string path = fname.substr(0, pos); + filesystem::create_directories(path); + } + if(unzOpenCurrentFile(zipfile) != UNZ_OK) { + unzCloseCurrentFile(zipfile); + unzClose(zipfile); + return false; + } + FILE *out = fopen(fname.c_str(), "wb"); + if(out == NULL) { + unzCloseCurrentFile(zipfile); + unzClose(zipfile); + return false; + } + int bytesRead; + do { + bytesRead = unzReadCurrentFile(zipfile, read_buffer, READ_SIZE); + if(bytesRead < 0) { + printf("error %d\n", bytesRead); + unzCloseCurrentFile(zipfile); + unzClose(zipfile); + return false; + } + if(bytesRead > 0) { + fwrite(read_buffer, bytesRead, 1, out); + } + } while(bytesRead > 0); + fclose(out); + unzCloseCurrentFile(zipfile); + unzGoToNextFile(zipfile); + } + unzClose(zipfile); + return true; +} + +void libbible::uninstallMod(string modname) { + sword::SWMgr mgr(basedir.c_str()); + sword::ModMap::iterator it = mgr.Modules.find(modname.c_str()); + if(it != mgr.Modules.end()) { + installMgr->removeModule(&mgr, it->second->getName()); + } +} diff --git a/src/lib/settings.cc b/src/lib/settings.cc new file mode 100644 index 0000000..848e22f --- /dev/null +++ b/src/lib/settings.cc @@ -0,0 +1,23 @@ +#include "libbible.h" +#include + +std::string path = (std::getenv("HOME")) + std::string("/.sword/libbible.conf"); +sword::SWConfig config(path.c_str()); + +void libbible::settingsWrite(std::string key, std::string value) { + config["General"][key.c_str()] = sword::SWBuf(value.c_str()); + config.save(); +} + +std::string libbible::settingsRead(std::string key) { + return config["General"][key.c_str()].c_str(); +} + +void libbible::settingsWriteInt(std::string key, int value) { + config["General"][key.c_str()] = sword::SWBuf(std::to_string(value).c_str()); + config.save(); +} + +int libbible::settingsReadInt(std::string key) { + return atoi(config["General"][key.c_str()].c_str()); +} diff --git a/src/test/Makefile b/src/test/Makefile new file mode 100644 index 0000000..1f8bc8b --- /dev/null +++ b/src/test/Makefile @@ -0,0 +1,20 @@ +LIBS = sword minizip +override CXXFLAGS += -MMD -Wall -fPIC -std=c++20 `pkg-config $(LIBS) --cflags` +override LDFLAGS += -lstdc++fs `pkg-config $(LIBS) --libs` -lcppunit ../../libbible.so +SOURCES = $(wildcard *.cc) +OBJECTS = $(SOURCES:.cc=.o) +DEPS = $(OBJECTS:.o=.d) +TEST = testLibbible + +$(TEST): $(OBJECTS) + $(CXX) $(OBJECTS) -o $@ $(LDFLAGS) + +-include $(DEPS) + +.PHONY: test +test: $(TEST) + ./$(TEST) + +.PHONY: clean +clean: + $(RM) $(OBJECTS) $(DEPS) $(TEST) diff --git a/src/test/modules/JPS.zip b/src/test/modules/JPS.zip new file mode 100644 index 0000000..4f09ff8 Binary files /dev/null and b/src/test/modules/JPS.zip differ diff --git a/src/test/modules/KJV.zip b/src/test/modules/KJV.zip new file mode 100644 index 0000000..27c161d Binary files /dev/null and b/src/test/modules/KJV.zip differ diff --git a/src/test/testLibbible.cc b/src/test/testLibbible.cc new file mode 100644 index 0000000..d3a265a --- /dev/null +++ b/src/test/testLibbible.cc @@ -0,0 +1,265 @@ +//#include +#include "libbible.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace CppUnit; +using namespace std; + +//----------------------------------------------------------------------------- + +class TestLibbible : public CppUnit::TestFixture +{ + CPPUNIT_TEST_SUITE(TestLibbible); + CPPUNIT_TEST(testGetModules); + CPPUNIT_TEST(testGetPassages); + CPPUNIT_TEST(testGetText); + CPPUNIT_TEST(testSettings); + CPPUNIT_TEST(testDownload); + CPPUNIT_TEST_SUITE_END(); + + //public: + //void setUp(void); + //void tearDown(void); + + protected: + void testGetModules(void); + void testGetPassages(void); + void testGetText(void); + void testSettings(void); + void testDownload(void); + +}; + +//----------------------------------------------------------------------------- + +class StatusTester : public libbible::Status +{ + public: + virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message); + bool hasBeenUpdated = false; +}; + +void StatusTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) { + hasBeenUpdated = true; +} + +//----------------------------------------------------------------------------- + +class CancelTester : public libbible::Status +{ + public: + virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message); +}; + +void CancelTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) { + libbible::terminateDownload(); +} + +//----------------------------------------------------------------------------- + +void TestLibbible::testGetModules(void) { + map> mods = libbible::getModules(); + for(auto pair : mods) { + libbible::uninstallMod(pair.first); + } + CPPUNIT_ASSERT(libbible::getModules().empty()); + CPPUNIT_ASSERT(libbible::installModFromZip("modules/KJV.zip")); + CPPUNIT_ASSERT(libbible::installModFromZip("modules/JPS.zip")); + mods = libbible::getModules(); + CPPUNIT_ASSERT(mods.find("KJV") != mods.end()); + CPPUNIT_ASSERT(mods["KJV"].size() == 66); + CPPUNIT_ASSERT(mods["KJV"][7] == "Ruth"); + CPPUNIT_ASSERT(mods["KJV"][42] == "John"); + CPPUNIT_ASSERT(mods.find("JPS") != mods.end()); + CPPUNIT_ASSERT(mods["JPS"].size() == 39); +} + +void TestLibbible::testGetPassages(void) { + auto passages = libbible::getPassages("KJV", "Romans"); + CPPUNIT_ASSERT(passages[0].modName == "KJV"); + CPPUNIT_ASSERT(passages[0].book == "Romans"); + CPPUNIT_ASSERT(passages[0].bookShort == "Rom"); + CPPUNIT_ASSERT(passages[0].chapterStart == 1); + CPPUNIT_ASSERT(passages[0].verseStart == 1); + CPPUNIT_ASSERT(passages[0].chapterEnd == 1); + CPPUNIT_ASSERT(passages[0].verseEnd == 32); + CPPUNIT_ASSERT(passages.size() == 16); +} + +vector> getChapVerses(std::vector text) { + vector> chapVerses; + for(auto tex : text) { + //printf("Text is: `%s`\n", tex.text.c_str()); + //for(auto modifier : tex.modifiers) { + // printf("\tModifiers include: %s\n", modifier.c_str()); + //} + if(chapVerses.empty() || + chapVerses.back().first != tex.chapter || + chapVerses.back().second != tex.verse) { + chapVerses.push_back(pair(tex.chapter, tex.verse)); + } + } + return chapVerses; +} + +void TestLibbible::testGetText(void) { + libbible::passage pass; + pass.modName = "KJV"; + pass.bookShort = "Matt"; + pass.chapterStart = 3; + pass.verseStart = 16; + pass.chapterEnd = 4; + pass.verseEnd = 7; + auto text = libbible::getText(pass); + // Verify that it includes every verse (3:16-17 + 4:1-7) + vector> chapVerses = getChapVerses(text); + vector> shouldContain = vector>({pair(3, 16), + pair(3, 17), + pair(4, 1), + pair(4, 2), + pair(4, 3), + pair(4, 4), + pair(4, 5), + pair(4, 6), + pair(4, 7)}); + CPPUNIT_ASSERT(chapVerses == shouldContain); + libbible::passage pass2; + pass2.modName = "KJV"; + pass2.book = "John"; + pass2.chapterStart = 3; + pass2.verseStart = 16; + pass2.chapterEnd = 3; + pass2.verseEnd = 16; + text = libbible::getText(pass2); + string allText; + for(auto tex : text) { + allText += tex.text; + } + //printf("Text is: `%s`\n", allText.c_str()); + CPPUNIT_ASSERT(allText == "For God so loved the world, that he gave his only begotten Son, that whosoever believeth in him should not perish, but have everlasting life. "); + + text = libbible::getText(libbible::getPassage("KJV", "John 3:3")); + allText.clear(); + for(auto tex : text) { + allText += tex.text; + } + //printf("Text is: `%s`\n", allText.c_str()); + CPPUNIT_ASSERT(allText == "Jesus answered and said unto him, Verily, verily, I say unto thee, Except a man be born again, he cannot see the kingdom of God. "); + + text = libbible::getText(libbible::getPassage("KJV", "Gal 5:22-23")); + chapVerses = getChapVerses(text); + shouldContain = vector>({pair(5, 22), pair(5, 23)}); + CPPUNIT_ASSERT(chapVerses == shouldContain); + + text = libbible::getText(libbible::getPassage("KJV", "1 cor 1:31-2:1")); + chapVerses = getChapVerses(text); + shouldContain = vector>({pair(1, 31), pair(2, 1)}); + CPPUNIT_ASSERT(chapVerses == shouldContain); + + text = libbible::getText(libbible::getPassage("KJV", "ps 14-15")); + chapVerses = getChapVerses(text); + shouldContain = vector>({pair(14, 1), + pair(14, 2), + pair(14, 3), + pair(14, 4), + pair(14, 5), + pair(14, 6), + pair(14, 7), + pair(15, 1), + pair(15, 2), + pair(15, 3), + pair(15, 4), + pair(15, 5)}); + CPPUNIT_ASSERT(chapVerses == shouldContain); + + text = libbible::getText(libbible::getPassage("KJV", "John 21")); + CPPUNIT_ASSERT(text.back().verse == 25); +} + +void TestLibbible::testSettings(void) { + libbible::settingsWrite("test", "foo"); + CPPUNIT_ASSERT(libbible::settingsRead("test") == "foo"); + libbible::settingsWrite("test", "bar"); + CPPUNIT_ASSERT(libbible::settingsRead("test") == "bar"); + libbible::settingsWriteInt("test", 5); + CPPUNIT_ASSERT(libbible::settingsReadInt("test") == 5); + libbible::settingsWrite("test", ""); + CPPUNIT_ASSERT(libbible::settingsRead("test") == ""); +} + +void TestLibbible::testDownload(void) { + map> modsAvailable = libbible::downloadModsAvailable(); + // We try installing the first available one + string language; + string name; + for(auto pair : modsAvailable) { + language = pair.first; + name = pair.second[0]; + break; + } + CPPUNIT_ASSERT(!language.empty() && !name.empty()); + // Try uninstalling it (shouldn't crash or have nasty side effects!) + libbible::uninstallMod(name); + // Try installing it with cancel. Shoudn't work because it gets cancelled! + CancelTester cancel; + libbible::setStatusReporter(cancel); + libbible::installModFromInternet(language, name); + auto mods = libbible::getModules(); + CPPUNIT_ASSERT(mods.find(name) == mods.end()); + // Now we try with normal status + StatusTester status; + libbible::setStatusReporter(status); + libbible::installModFromInternet(language, name); + mods = libbible::getModules(); + CPPUNIT_ASSERT(mods.find(name) != mods.end()); + CPPUNIT_ASSERT(status.hasBeenUpdated); + +} +//----------------------------------------------------------------------------- + +CPPUNIT_TEST_SUITE_REGISTRATION( TestLibbible ); + +int main(int argc, char* argv[]) { + // informs test-listener about testresults + CPPUNIT_NS::TestResult testresult; + + // register listener for collecting the test-results + CPPUNIT_NS::TestResultCollector collectedresults; + testresult.addListener (&collectedresults); + + // register listener for per-test progress output + CPPUNIT_NS::BriefTestProgressListener progress; + testresult.addListener (&progress); + + // insert test-suite at test-runner by registry + CPPUNIT_NS::TestRunner testrunner; + testrunner.addTest (CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest ()); + testrunner.run(testresult); + + // output results in compiler-format + CPPUNIT_NS::CompilerOutputter compileroutputter(&collectedresults, std::cerr); + compileroutputter.write (); + + // Output XML for Jenkins CPPunit plugin + //ofstream xmlFileOut("testLibbibleResults.xml"); + //XmlOutputter xmlOut(&collectedresults, xmlFileOut); + //xmlOut.write(); + + // return 0 if tests were successful + return collectedresults.wasSuccessful() ? 0 : 1; +} diff --git a/src/utf8.h b/src/utf8.h new file mode 100644 index 0000000..4e44514 --- /dev/null +++ b/src/utf8.h @@ -0,0 +1,34 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "utf8/checked.h" +#include "utf8/unchecked.h" + +#endif // header guard diff --git a/src/utf8/checked.h b/src/utf8/checked.h new file mode 100644 index 0000000..1331155 --- /dev/null +++ b/src/utf8/checked.h @@ -0,0 +1,327 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" +#include + +namespace utf8 +{ + // Base for the exceptions that may be thrown from the library + class exception : public ::std::exception { + }; + + // Exceptions that may be thrown from the library functions. + class invalid_code_point : public exception { + uint32_t cp; + public: + invalid_code_point(uint32_t cp) : cp(cp) {} + virtual const char* what() const throw() { return "Invalid code point"; } + uint32_t code_point() const {return cp;} + }; + + class invalid_utf8 : public exception { + uint8_t u8; + public: + invalid_utf8 (uint8_t u) : u8(u) {} + virtual const char* what() const throw() { return "Invalid UTF-8"; } + uint8_t utf8_octet() const {return u8;} + }; + + class invalid_utf16 : public exception { + uint16_t u16; + public: + invalid_utf16 (uint16_t u) : u16(u) {} + virtual const char* what() const throw() { return "Invalid UTF-16"; } + uint16_t utf16_word() const {return u16;} + }; + + class not_enough_room : public exception { + public: + virtual const char* what() const throw() { return "Not enough space"; } + }; + + /// The library API - functions intended to be called by the users + + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (!utf8::internal::is_code_point_valid(cp)) + throw invalid_code_point(cp); + + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + template + output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) + { + while (start != end) { + octet_iterator sequence_start = start; + internal::utf_error err_code = utf8::internal::validate_next(start, end); + switch (err_code) { + case internal::UTF8_OK : + for (octet_iterator it = sequence_start; it != start; ++it) + *out++ = *it; + break; + case internal::NOT_ENOUGH_ROOM: + throw not_enough_room(); + case internal::INVALID_LEAD: + out = utf8::append (replacement, out); + ++start; + break; + case internal::INCOMPLETE_SEQUENCE: + case internal::OVERLONG_SEQUENCE: + case internal::INVALID_CODE_POINT: + out = utf8::append (replacement, out); + ++start; + // just one replacement mark for the sequence + while (start != end && utf8::internal::is_trail(*start)) + ++start; + break; + } + } + return out; + } + + template + inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) + { + static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); + return utf8::replace_invalid(start, end, out, replacement_marker); + } + + template + uint32_t next(octet_iterator& it, octet_iterator end) + { + uint32_t cp = 0; + internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); + switch (err_code) { + case internal::UTF8_OK : + break; + case internal::NOT_ENOUGH_ROOM : + throw not_enough_room(); + case internal::INVALID_LEAD : + case internal::INCOMPLETE_SEQUENCE : + case internal::OVERLONG_SEQUENCE : + throw invalid_utf8(*it); + case internal::INVALID_CODE_POINT : + throw invalid_code_point(cp); + } + return cp; + } + + template + uint32_t peek_next(octet_iterator it, octet_iterator end) + { + return utf8::next(it, end); + } + + template + uint32_t prior(octet_iterator& it, octet_iterator start) + { + // can't do much if it == start + if (it == start) + throw not_enough_room(); + + octet_iterator end = it; + // Go back until we hit either a lead octet or start + while (utf8::internal::is_trail(*(--it))) + if (it == start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + return utf8::peek_next(it, end); + } + + /// Deprecated in versions that include "prior" + template + uint32_t previous(octet_iterator& it, octet_iterator pass_start) + { + octet_iterator end = it; + while (utf8::internal::is_trail(*(--it))) + if (it == pass_start) + throw invalid_utf8(*it); // error - no lead byte in the sequence + octet_iterator temp = it; + return utf8::next(temp, end); + } + + template + void advance (octet_iterator& it, distance_type n, octet_iterator end) + { + for (distance_type i = 0; i < n; ++i) + utf8::next(it, end); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::next(first, last); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) { + if (start != end) { + uint32_t trail_surrogate = utf8::internal::mask16(*start++); + if (utf8::internal::is_trail_surrogate(trail_surrogate)) + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + else + throw invalid_utf16(static_cast(trail_surrogate)); + } + else + throw invalid_utf16(static_cast(cp)); + + } + // Lone trail surrogate + else if (utf8::internal::is_trail_surrogate(cp)) + throw invalid_utf16(static_cast(cp)); + + result = utf8::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start != end) { + uint32_t cp = utf8::next(start, end); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start != end) + (*result++) = utf8::next(start, end); + + return result; + } + + // The iterator class + template + class iterator : public std::iterator { + octet_iterator it; + octet_iterator range_start; + octet_iterator range_end; + public: + iterator () {} + explicit iterator (const octet_iterator& octet_it, + const octet_iterator& range_start, + const octet_iterator& range_end) : + it(octet_it), range_start(range_start), range_end(range_end) + { + if (it < range_start || it > range_end) + throw std::out_of_range("Invalid utf-8 iterator position"); + } + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::next(temp, range_end); + } + bool operator == (const iterator& rhs) const + { + if (range_start != rhs.range_start || range_end != rhs.range_end) + throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + utf8::next(it, range_end); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + utf8::next(it, range_end); + return temp; + } + iterator& operator -- () + { + utf8::prior(it, range_start); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + utf8::prior(it, range_start); + return temp; + } + }; // class iterator + +} // namespace utf8 + +#endif //header guard + + diff --git a/src/utf8/core.h b/src/utf8/core.h new file mode 100644 index 0000000..693d388 --- /dev/null +++ b/src/utf8/core.h @@ -0,0 +1,329 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include + +namespace utf8 +{ + // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers + // You may need to change them to match your system. + // These typedefs have the same names as ones from cstdint, or boost/cstdint + typedef unsigned char uint8_t; + typedef unsigned short uint16_t; + typedef unsigned int uint32_t; + +// Helper code - not intended to be directly called by the library users. May be changed at any time +namespace internal +{ + // Unicode constants + // Leading (high) surrogates: 0xd800 - 0xdbff + // Trailing (low) surrogates: 0xdc00 - 0xdfff + const uint16_t LEAD_SURROGATE_MIN = 0xd800u; + const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; + const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; + const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; + const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); + const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; + + // Maximum valid value for a Unicode code point + const uint32_t CODE_POINT_MAX = 0x0010ffffu; + + template + inline uint8_t mask8(octet_type oc) + { + return static_cast(0xff & oc); + } + template + inline uint16_t mask16(u16_type oc) + { + return static_cast(0xffff & oc); + } + template + inline bool is_trail(octet_type oc) + { + return ((utf8::internal::mask8(oc) >> 6) == 0x2); + } + + template + inline bool is_lead_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); + } + + template + inline bool is_trail_surrogate(u16 cp) + { + return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_surrogate(u16 cp) + { + return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); + } + + template + inline bool is_code_point_valid(u32 cp) + { + return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); + } + + template + inline typename std::iterator_traits::difference_type + sequence_length(octet_iterator lead_it) + { + uint8_t lead = utf8::internal::mask8(*lead_it); + if (lead < 0x80) + return 1; + else if ((lead >> 5) == 0x6) + return 2; + else if ((lead >> 4) == 0xe) + return 3; + else if ((lead >> 3) == 0x1e) + return 4; + else + return 0; + } + + template + inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) + { + if (cp < 0x80) { + if (length != 1) + return true; + } + else if (cp < 0x800) { + if (length != 2) + return true; + } + else if (cp < 0x10000) { + if (length != 3) + return true; + } + + return false; + } + + enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; + + /// Helper for get_sequence_x + template + utf_error increase_safely(octet_iterator& it, octet_iterator end) + { + if (++it == end) + return NOT_ENOUGH_ROOM; + + if (!utf8::internal::is_trail(*it)) + return INCOMPLETE_SEQUENCE; + + return UTF8_OK; + } + + #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} + + /// get_sequence_x functions decode utf-8 sequences of the length x + template + utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + return UTF8_OK; + } + + template + utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); + + return UTF8_OK; + } + + template + utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + template + utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + if (it == end) + return NOT_ENOUGH_ROOM; + + code_point = utf8::internal::mask8(*it); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; + + UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) + + code_point += (*it) & 0x3f; + + return UTF8_OK; + } + + #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR + + template + utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) + { + // Save the original value of it so we can go back in case of failure + // Of course, it does not make much sense with i.e. stream iterators + octet_iterator original_it = it; + + uint32_t cp = 0; + // Determine the sequence length based on the lead octet + typedef typename std::iterator_traits::difference_type octet_difference_type; + const octet_difference_type length = utf8::internal::sequence_length(it); + + // Get trail octets and calculate the code point + utf_error err = UTF8_OK; + switch (length) { + case 0: + return INVALID_LEAD; + case 1: + err = utf8::internal::get_sequence_1(it, end, cp); + break; + case 2: + err = utf8::internal::get_sequence_2(it, end, cp); + break; + case 3: + err = utf8::internal::get_sequence_3(it, end, cp); + break; + case 4: + err = utf8::internal::get_sequence_4(it, end, cp); + break; + } + + if (err == UTF8_OK) { + // Decoding succeeded. Now, security checks... + if (utf8::internal::is_code_point_valid(cp)) { + if (!utf8::internal::is_overlong_sequence(cp, length)){ + // Passed! Return here. + code_point = cp; + ++it; + return UTF8_OK; + } + else + err = OVERLONG_SEQUENCE; + } + else + err = INVALID_CODE_POINT; + } + + // Failure branch - restore the original value of the iterator + it = original_it; + return err; + } + + template + inline utf_error validate_next(octet_iterator& it, octet_iterator end) { + uint32_t ignored; + return utf8::internal::validate_next(it, end, ignored); + } + +} // namespace internal + + /// The library API - functions intended to be called by the users + + // Byte order mark + const uint8_t bom[] = {0xef, 0xbb, 0xbf}; + + template + octet_iterator find_invalid(octet_iterator start, octet_iterator end) + { + octet_iterator result = start; + while (result != end) { + utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); + if (err_code != internal::UTF8_OK) + return result; + } + return result; + } + + template + inline bool is_valid(octet_iterator start, octet_iterator end) + { + return (utf8::find_invalid(start, end) == end); + } + + template + inline bool starts_with_bom (octet_iterator it, octet_iterator end) + { + return ( + ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && + ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && + ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) + ); + } + + //Deprecated in release 2.3 + template + inline bool is_bom (octet_iterator it) + { + return ( + (utf8::internal::mask8(*it++)) == bom[0] && + (utf8::internal::mask8(*it++)) == bom[1] && + (utf8::internal::mask8(*it)) == bom[2] + ); + } +} // namespace utf8 + +#endif // header guard + + diff --git a/src/utf8/unchecked.h b/src/utf8/unchecked.h new file mode 100644 index 0000000..cb24271 --- /dev/null +++ b/src/utf8/unchecked.h @@ -0,0 +1,228 @@ +// Copyright 2006 Nemanja Trifunovic + +/* +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. +*/ + + +#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 +#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 + +#include "core.h" + +namespace utf8 +{ + namespace unchecked + { + template + octet_iterator append(uint32_t cp, octet_iterator result) + { + if (cp < 0x80) // one octet + *(result++) = static_cast(cp); + else if (cp < 0x800) { // two octets + *(result++) = static_cast((cp >> 6) | 0xc0); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else if (cp < 0x10000) { // three octets + *(result++) = static_cast((cp >> 12) | 0xe0); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + else { // four octets + *(result++) = static_cast((cp >> 18) | 0xf0); + *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); + *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(result++) = static_cast((cp & 0x3f) | 0x80); + } + return result; + } + + template + uint32_t next(octet_iterator& it) + { + uint32_t cp = utf8::internal::mask8(*it); + typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); + switch (length) { + case 1: + break; + case 2: + it++; + cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); + break; + case 3: + ++it; + cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); + ++it; + cp += (*it) & 0x3f; + break; + case 4: + ++it; + cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); + ++it; + cp += (utf8::internal::mask8(*it) << 6) & 0xfff; + ++it; + cp += (*it) & 0x3f; + break; + } + ++it; + return cp; + } + + template + uint32_t peek_next(octet_iterator it) + { + return utf8::unchecked::next(it); + } + + template + uint32_t prior(octet_iterator& it) + { + while (utf8::internal::is_trail(*(--it))) ; + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + + // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) + template + inline uint32_t previous(octet_iterator& it) + { + return utf8::unchecked::prior(it); + } + + template + void advance (octet_iterator& it, distance_type n) + { + for (distance_type i = 0; i < n; ++i) + utf8::unchecked::next(it); + } + + template + typename std::iterator_traits::difference_type + distance (octet_iterator first, octet_iterator last) + { + typename std::iterator_traits::difference_type dist; + for (dist = 0; first < last; ++dist) + utf8::unchecked::next(first); + return dist; + } + + template + octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) + { + while (start != end) { + uint32_t cp = utf8::internal::mask16(*start++); + // Take care of surrogate pairs first + if (utf8::internal::is_lead_surrogate(cp)) { + uint32_t trail_surrogate = utf8::internal::mask16(*start++); + cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; + } + result = utf8::unchecked::append(cp, result); + } + return result; + } + + template + u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) + { + while (start < end) { + uint32_t cp = utf8::unchecked::next(start); + if (cp > 0xffff) { //make a surrogate pair + *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); + *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); + } + else + *result++ = static_cast(cp); + } + return result; + } + + template + octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) + { + while (start != end) + result = utf8::unchecked::append(*(start++), result); + + return result; + } + + template + u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) + { + while (start < end) + (*result++) = utf8::unchecked::next(start); + + return result; + } + + // The iterator class + template + class iterator : public std::iterator { + octet_iterator it; + public: + iterator () {} + explicit iterator (const octet_iterator& octet_it): it(octet_it) {} + // the default "big three" are OK + octet_iterator base () const { return it; } + uint32_t operator * () const + { + octet_iterator temp = it; + return utf8::unchecked::next(temp); + } + bool operator == (const iterator& rhs) const + { + return (it == rhs.it); + } + bool operator != (const iterator& rhs) const + { + return !(operator == (rhs)); + } + iterator& operator ++ () + { + ::std::advance(it, utf8::internal::sequence_length(it)); + return *this; + } + iterator operator ++ (int) + { + iterator temp = *this; + ::std::advance(it, utf8::internal::sequence_length(it)); + return temp; + } + iterator& operator -- () + { + utf8::unchecked::prior(it); + return *this; + } + iterator operator -- (int) + { + iterator temp = *this; + utf8::unchecked::prior(it); + return temp; + } + }; // class iterator + + } // namespace utf8::unchecked +} // namespace utf8 + + +#endif // header guard + diff --git a/testLibbible.cc b/testLibbible.cc deleted file mode 100644 index d3a265a..0000000 --- a/testLibbible.cc +++ /dev/null @@ -1,265 +0,0 @@ -//#include -#include "libbible.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace CppUnit; -using namespace std; - -//----------------------------------------------------------------------------- - -class TestLibbible : public CppUnit::TestFixture -{ - CPPUNIT_TEST_SUITE(TestLibbible); - CPPUNIT_TEST(testGetModules); - CPPUNIT_TEST(testGetPassages); - CPPUNIT_TEST(testGetText); - CPPUNIT_TEST(testSettings); - CPPUNIT_TEST(testDownload); - CPPUNIT_TEST_SUITE_END(); - - //public: - //void setUp(void); - //void tearDown(void); - - protected: - void testGetModules(void); - void testGetPassages(void); - void testGetText(void); - void testSettings(void); - void testDownload(void); - -}; - -//----------------------------------------------------------------------------- - -class StatusTester : public libbible::Status -{ - public: - virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message); - bool hasBeenUpdated = false; -}; - -void StatusTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) { - hasBeenUpdated = true; -} - -//----------------------------------------------------------------------------- - -class CancelTester : public libbible::Status -{ - public: - virtual void update(unsigned long totalBytes, unsigned long completedBytes, string message); -}; - -void CancelTester::update(unsigned long totalBytes, unsigned long completedBytes, string message) { - libbible::terminateDownload(); -} - -//----------------------------------------------------------------------------- - -void TestLibbible::testGetModules(void) { - map> mods = libbible::getModules(); - for(auto pair : mods) { - libbible::uninstallMod(pair.first); - } - CPPUNIT_ASSERT(libbible::getModules().empty()); - CPPUNIT_ASSERT(libbible::installModFromZip("modules/KJV.zip")); - CPPUNIT_ASSERT(libbible::installModFromZip("modules/JPS.zip")); - mods = libbible::getModules(); - CPPUNIT_ASSERT(mods.find("KJV") != mods.end()); - CPPUNIT_ASSERT(mods["KJV"].size() == 66); - CPPUNIT_ASSERT(mods["KJV"][7] == "Ruth"); - CPPUNIT_ASSERT(mods["KJV"][42] == "John"); - CPPUNIT_ASSERT(mods.find("JPS") != mods.end()); - CPPUNIT_ASSERT(mods["JPS"].size() == 39); -} - -void TestLibbible::testGetPassages(void) { - auto passages = libbible::getPassages("KJV", "Romans"); - CPPUNIT_ASSERT(passages[0].modName == "KJV"); - CPPUNIT_ASSERT(passages[0].book == "Romans"); - CPPUNIT_ASSERT(passages[0].bookShort == "Rom"); - CPPUNIT_ASSERT(passages[0].chapterStart == 1); - CPPUNIT_ASSERT(passages[0].verseStart == 1); - CPPUNIT_ASSERT(passages[0].chapterEnd == 1); - CPPUNIT_ASSERT(passages[0].verseEnd == 32); - CPPUNIT_ASSERT(passages.size() == 16); -} - -vector> getChapVerses(std::vector text) { - vector> chapVerses; - for(auto tex : text) { - //printf("Text is: `%s`\n", tex.text.c_str()); - //for(auto modifier : tex.modifiers) { - // printf("\tModifiers include: %s\n", modifier.c_str()); - //} - if(chapVerses.empty() || - chapVerses.back().first != tex.chapter || - chapVerses.back().second != tex.verse) { - chapVerses.push_back(pair(tex.chapter, tex.verse)); - } - } - return chapVerses; -} - -void TestLibbible::testGetText(void) { - libbible::passage pass; - pass.modName = "KJV"; - pass.bookShort = "Matt"; - pass.chapterStart = 3; - pass.verseStart = 16; - pass.chapterEnd = 4; - pass.verseEnd = 7; - auto text = libbible::getText(pass); - // Verify that it includes every verse (3:16-17 + 4:1-7) - vector> chapVerses = getChapVerses(text); - vector> shouldContain = vector>({pair(3, 16), - pair(3, 17), - pair(4, 1), - pair(4, 2), - pair(4, 3), - pair(4, 4), - pair(4, 5), - pair(4, 6), - pair(4, 7)}); - CPPUNIT_ASSERT(chapVerses == shouldContain); - libbible::passage pass2; - pass2.modName = "KJV"; - pass2.book = "John"; - pass2.chapterStart = 3; - pass2.verseStart = 16; - pass2.chapterEnd = 3; - pass2.verseEnd = 16; - text = libbible::getText(pass2); - string allText; - for(auto tex : text) { - allText += tex.text; - } - //printf("Text is: `%s`\n", allText.c_str()); - CPPUNIT_ASSERT(allText == "For God so loved the world, that he gave his only begotten Son, that whosoever believeth in him should not perish, but have everlasting life. "); - - text = libbible::getText(libbible::getPassage("KJV", "John 3:3")); - allText.clear(); - for(auto tex : text) { - allText += tex.text; - } - //printf("Text is: `%s`\n", allText.c_str()); - CPPUNIT_ASSERT(allText == "Jesus answered and said unto him, Verily, verily, I say unto thee, Except a man be born again, he cannot see the kingdom of God. "); - - text = libbible::getText(libbible::getPassage("KJV", "Gal 5:22-23")); - chapVerses = getChapVerses(text); - shouldContain = vector>({pair(5, 22), pair(5, 23)}); - CPPUNIT_ASSERT(chapVerses == shouldContain); - - text = libbible::getText(libbible::getPassage("KJV", "1 cor 1:31-2:1")); - chapVerses = getChapVerses(text); - shouldContain = vector>({pair(1, 31), pair(2, 1)}); - CPPUNIT_ASSERT(chapVerses == shouldContain); - - text = libbible::getText(libbible::getPassage("KJV", "ps 14-15")); - chapVerses = getChapVerses(text); - shouldContain = vector>({pair(14, 1), - pair(14, 2), - pair(14, 3), - pair(14, 4), - pair(14, 5), - pair(14, 6), - pair(14, 7), - pair(15, 1), - pair(15, 2), - pair(15, 3), - pair(15, 4), - pair(15, 5)}); - CPPUNIT_ASSERT(chapVerses == shouldContain); - - text = libbible::getText(libbible::getPassage("KJV", "John 21")); - CPPUNIT_ASSERT(text.back().verse == 25); -} - -void TestLibbible::testSettings(void) { - libbible::settingsWrite("test", "foo"); - CPPUNIT_ASSERT(libbible::settingsRead("test") == "foo"); - libbible::settingsWrite("test", "bar"); - CPPUNIT_ASSERT(libbible::settingsRead("test") == "bar"); - libbible::settingsWriteInt("test", 5); - CPPUNIT_ASSERT(libbible::settingsReadInt("test") == 5); - libbible::settingsWrite("test", ""); - CPPUNIT_ASSERT(libbible::settingsRead("test") == ""); -} - -void TestLibbible::testDownload(void) { - map> modsAvailable = libbible::downloadModsAvailable(); - // We try installing the first available one - string language; - string name; - for(auto pair : modsAvailable) { - language = pair.first; - name = pair.second[0]; - break; - } - CPPUNIT_ASSERT(!language.empty() && !name.empty()); - // Try uninstalling it (shouldn't crash or have nasty side effects!) - libbible::uninstallMod(name); - // Try installing it with cancel. Shoudn't work because it gets cancelled! - CancelTester cancel; - libbible::setStatusReporter(cancel); - libbible::installModFromInternet(language, name); - auto mods = libbible::getModules(); - CPPUNIT_ASSERT(mods.find(name) == mods.end()); - // Now we try with normal status - StatusTester status; - libbible::setStatusReporter(status); - libbible::installModFromInternet(language, name); - mods = libbible::getModules(); - CPPUNIT_ASSERT(mods.find(name) != mods.end()); - CPPUNIT_ASSERT(status.hasBeenUpdated); - -} -//----------------------------------------------------------------------------- - -CPPUNIT_TEST_SUITE_REGISTRATION( TestLibbible ); - -int main(int argc, char* argv[]) { - // informs test-listener about testresults - CPPUNIT_NS::TestResult testresult; - - // register listener for collecting the test-results - CPPUNIT_NS::TestResultCollector collectedresults; - testresult.addListener (&collectedresults); - - // register listener for per-test progress output - CPPUNIT_NS::BriefTestProgressListener progress; - testresult.addListener (&progress); - - // insert test-suite at test-runner by registry - CPPUNIT_NS::TestRunner testrunner; - testrunner.addTest (CPPUNIT_NS::TestFactoryRegistry::getRegistry().makeTest ()); - testrunner.run(testresult); - - // output results in compiler-format - CPPUNIT_NS::CompilerOutputter compileroutputter(&collectedresults, std::cerr); - compileroutputter.write (); - - // Output XML for Jenkins CPPunit plugin - //ofstream xmlFileOut("testLibbibleResults.xml"); - //XmlOutputter xmlOut(&collectedresults, xmlFileOut); - //xmlOut.write(); - - // return 0 if tests were successful - return collectedresults.wasSuccessful() ? 0 : 1; -} diff --git a/utf8.h b/utf8.h deleted file mode 100644 index 4e44514..0000000 --- a/utf8.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2006 Nemanja Trifunovic - -/* -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - - -#ifndef UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 -#define UTF8_FOR_CPP_2675DCD0_9480_4c0c_B92A_CC14C027B731 - -#include "utf8/checked.h" -#include "utf8/unchecked.h" - -#endif // header guard diff --git a/utf8/checked.h b/utf8/checked.h deleted file mode 100644 index 1331155..0000000 --- a/utf8/checked.h +++ /dev/null @@ -1,327 +0,0 @@ -// Copyright 2006 Nemanja Trifunovic - -/* -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - - -#ifndef UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 -#define UTF8_FOR_CPP_CHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 - -#include "core.h" -#include - -namespace utf8 -{ - // Base for the exceptions that may be thrown from the library - class exception : public ::std::exception { - }; - - // Exceptions that may be thrown from the library functions. - class invalid_code_point : public exception { - uint32_t cp; - public: - invalid_code_point(uint32_t cp) : cp(cp) {} - virtual const char* what() const throw() { return "Invalid code point"; } - uint32_t code_point() const {return cp;} - }; - - class invalid_utf8 : public exception { - uint8_t u8; - public: - invalid_utf8 (uint8_t u) : u8(u) {} - virtual const char* what() const throw() { return "Invalid UTF-8"; } - uint8_t utf8_octet() const {return u8;} - }; - - class invalid_utf16 : public exception { - uint16_t u16; - public: - invalid_utf16 (uint16_t u) : u16(u) {} - virtual const char* what() const throw() { return "Invalid UTF-16"; } - uint16_t utf16_word() const {return u16;} - }; - - class not_enough_room : public exception { - public: - virtual const char* what() const throw() { return "Not enough space"; } - }; - - /// The library API - functions intended to be called by the users - - template - octet_iterator append(uint32_t cp, octet_iterator result) - { - if (!utf8::internal::is_code_point_valid(cp)) - throw invalid_code_point(cp); - - if (cp < 0x80) // one octet - *(result++) = static_cast(cp); - else if (cp < 0x800) { // two octets - *(result++) = static_cast((cp >> 6) | 0xc0); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else if (cp < 0x10000) { // three octets - *(result++) = static_cast((cp >> 12) | 0xe0); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else { // four octets - *(result++) = static_cast((cp >> 18) | 0xf0); - *(result++) = static_cast(((cp >> 12) & 0x3f) | 0x80); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - return result; - } - - template - output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out, uint32_t replacement) - { - while (start != end) { - octet_iterator sequence_start = start; - internal::utf_error err_code = utf8::internal::validate_next(start, end); - switch (err_code) { - case internal::UTF8_OK : - for (octet_iterator it = sequence_start; it != start; ++it) - *out++ = *it; - break; - case internal::NOT_ENOUGH_ROOM: - throw not_enough_room(); - case internal::INVALID_LEAD: - out = utf8::append (replacement, out); - ++start; - break; - case internal::INCOMPLETE_SEQUENCE: - case internal::OVERLONG_SEQUENCE: - case internal::INVALID_CODE_POINT: - out = utf8::append (replacement, out); - ++start; - // just one replacement mark for the sequence - while (start != end && utf8::internal::is_trail(*start)) - ++start; - break; - } - } - return out; - } - - template - inline output_iterator replace_invalid(octet_iterator start, octet_iterator end, output_iterator out) - { - static const uint32_t replacement_marker = utf8::internal::mask16(0xfffd); - return utf8::replace_invalid(start, end, out, replacement_marker); - } - - template - uint32_t next(octet_iterator& it, octet_iterator end) - { - uint32_t cp = 0; - internal::utf_error err_code = utf8::internal::validate_next(it, end, cp); - switch (err_code) { - case internal::UTF8_OK : - break; - case internal::NOT_ENOUGH_ROOM : - throw not_enough_room(); - case internal::INVALID_LEAD : - case internal::INCOMPLETE_SEQUENCE : - case internal::OVERLONG_SEQUENCE : - throw invalid_utf8(*it); - case internal::INVALID_CODE_POINT : - throw invalid_code_point(cp); - } - return cp; - } - - template - uint32_t peek_next(octet_iterator it, octet_iterator end) - { - return utf8::next(it, end); - } - - template - uint32_t prior(octet_iterator& it, octet_iterator start) - { - // can't do much if it == start - if (it == start) - throw not_enough_room(); - - octet_iterator end = it; - // Go back until we hit either a lead octet or start - while (utf8::internal::is_trail(*(--it))) - if (it == start) - throw invalid_utf8(*it); // error - no lead byte in the sequence - return utf8::peek_next(it, end); - } - - /// Deprecated in versions that include "prior" - template - uint32_t previous(octet_iterator& it, octet_iterator pass_start) - { - octet_iterator end = it; - while (utf8::internal::is_trail(*(--it))) - if (it == pass_start) - throw invalid_utf8(*it); // error - no lead byte in the sequence - octet_iterator temp = it; - return utf8::next(temp, end); - } - - template - void advance (octet_iterator& it, distance_type n, octet_iterator end) - { - for (distance_type i = 0; i < n; ++i) - utf8::next(it, end); - } - - template - typename std::iterator_traits::difference_type - distance (octet_iterator first, octet_iterator last) - { - typename std::iterator_traits::difference_type dist; - for (dist = 0; first < last; ++dist) - utf8::next(first, last); - return dist; - } - - template - octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { - while (start != end) { - uint32_t cp = utf8::internal::mask16(*start++); - // Take care of surrogate pairs first - if (utf8::internal::is_lead_surrogate(cp)) { - if (start != end) { - uint32_t trail_surrogate = utf8::internal::mask16(*start++); - if (utf8::internal::is_trail_surrogate(trail_surrogate)) - cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; - else - throw invalid_utf16(static_cast(trail_surrogate)); - } - else - throw invalid_utf16(static_cast(cp)); - - } - // Lone trail surrogate - else if (utf8::internal::is_trail_surrogate(cp)) - throw invalid_utf16(static_cast(cp)); - - result = utf8::append(cp, result); - } - return result; - } - - template - u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) - { - while (start != end) { - uint32_t cp = utf8::next(start, end); - if (cp > 0xffff) { //make a surrogate pair - *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); - *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); - } - else - *result++ = static_cast(cp); - } - return result; - } - - template - octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) - { - while (start != end) - result = utf8::append(*(start++), result); - - return result; - } - - template - u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) - { - while (start != end) - (*result++) = utf8::next(start, end); - - return result; - } - - // The iterator class - template - class iterator : public std::iterator { - octet_iterator it; - octet_iterator range_start; - octet_iterator range_end; - public: - iterator () {} - explicit iterator (const octet_iterator& octet_it, - const octet_iterator& range_start, - const octet_iterator& range_end) : - it(octet_it), range_start(range_start), range_end(range_end) - { - if (it < range_start || it > range_end) - throw std::out_of_range("Invalid utf-8 iterator position"); - } - // the default "big three" are OK - octet_iterator base () const { return it; } - uint32_t operator * () const - { - octet_iterator temp = it; - return utf8::next(temp, range_end); - } - bool operator == (const iterator& rhs) const - { - if (range_start != rhs.range_start || range_end != rhs.range_end) - throw std::logic_error("Comparing utf-8 iterators defined with different ranges"); - return (it == rhs.it); - } - bool operator != (const iterator& rhs) const - { - return !(operator == (rhs)); - } - iterator& operator ++ () - { - utf8::next(it, range_end); - return *this; - } - iterator operator ++ (int) - { - iterator temp = *this; - utf8::next(it, range_end); - return temp; - } - iterator& operator -- () - { - utf8::prior(it, range_start); - return *this; - } - iterator operator -- (int) - { - iterator temp = *this; - utf8::prior(it, range_start); - return temp; - } - }; // class iterator - -} // namespace utf8 - -#endif //header guard - - diff --git a/utf8/core.h b/utf8/core.h deleted file mode 100644 index 693d388..0000000 --- a/utf8/core.h +++ /dev/null @@ -1,329 +0,0 @@ -// Copyright 2006 Nemanja Trifunovic - -/* -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - - -#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 -#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 - -#include - -namespace utf8 -{ - // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers - // You may need to change them to match your system. - // These typedefs have the same names as ones from cstdint, or boost/cstdint - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - -// Helper code - not intended to be directly called by the library users. May be changed at any time -namespace internal -{ - // Unicode constants - // Leading (high) surrogates: 0xd800 - 0xdbff - // Trailing (low) surrogates: 0xdc00 - 0xdfff - const uint16_t LEAD_SURROGATE_MIN = 0xd800u; - const uint16_t LEAD_SURROGATE_MAX = 0xdbffu; - const uint16_t TRAIL_SURROGATE_MIN = 0xdc00u; - const uint16_t TRAIL_SURROGATE_MAX = 0xdfffu; - const uint16_t LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); - const uint32_t SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; - - // Maximum valid value for a Unicode code point - const uint32_t CODE_POINT_MAX = 0x0010ffffu; - - template - inline uint8_t mask8(octet_type oc) - { - return static_cast(0xff & oc); - } - template - inline uint16_t mask16(u16_type oc) - { - return static_cast(0xffff & oc); - } - template - inline bool is_trail(octet_type oc) - { - return ((utf8::internal::mask8(oc) >> 6) == 0x2); - } - - template - inline bool is_lead_surrogate(u16 cp) - { - return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); - } - - template - inline bool is_trail_surrogate(u16 cp) - { - return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); - } - - template - inline bool is_surrogate(u16 cp) - { - return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); - } - - template - inline bool is_code_point_valid(u32 cp) - { - return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp)); - } - - template - inline typename std::iterator_traits::difference_type - sequence_length(octet_iterator lead_it) - { - uint8_t lead = utf8::internal::mask8(*lead_it); - if (lead < 0x80) - return 1; - else if ((lead >> 5) == 0x6) - return 2; - else if ((lead >> 4) == 0xe) - return 3; - else if ((lead >> 3) == 0x1e) - return 4; - else - return 0; - } - - template - inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length) - { - if (cp < 0x80) { - if (length != 1) - return true; - } - else if (cp < 0x800) { - if (length != 2) - return true; - } - else if (cp < 0x10000) { - if (length != 3) - return true; - } - - return false; - } - - enum utf_error {UTF8_OK, NOT_ENOUGH_ROOM, INVALID_LEAD, INCOMPLETE_SEQUENCE, OVERLONG_SEQUENCE, INVALID_CODE_POINT}; - - /// Helper for get_sequence_x - template - utf_error increase_safely(octet_iterator& it, octet_iterator end) - { - if (++it == end) - return NOT_ENOUGH_ROOM; - - if (!utf8::internal::is_trail(*it)) - return INCOMPLETE_SEQUENCE; - - return UTF8_OK; - } - - #define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) {utf_error ret = increase_safely(IT, END); if (ret != UTF8_OK) return ret;} - - /// get_sequence_x functions decode utf-8 sequences of the length x - template - utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point) - { - if (it == end) - return NOT_ENOUGH_ROOM; - - code_point = utf8::internal::mask8(*it); - - return UTF8_OK; - } - - template - utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point) - { - if (it == end) - return NOT_ENOUGH_ROOM; - - code_point = utf8::internal::mask8(*it); - - UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) - - code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f); - - return UTF8_OK; - } - - template - utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point) - { - if (it == end) - return NOT_ENOUGH_ROOM; - - code_point = utf8::internal::mask8(*it); - - UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) - - code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); - - UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) - - code_point += (*it) & 0x3f; - - return UTF8_OK; - } - - template - utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point) - { - if (it == end) - return NOT_ENOUGH_ROOM; - - code_point = utf8::internal::mask8(*it); - - UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) - - code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); - - UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) - - code_point += (utf8::internal::mask8(*it) << 6) & 0xfff; - - UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end) - - code_point += (*it) & 0x3f; - - return UTF8_OK; - } - - #undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR - - template - utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point) - { - // Save the original value of it so we can go back in case of failure - // Of course, it does not make much sense with i.e. stream iterators - octet_iterator original_it = it; - - uint32_t cp = 0; - // Determine the sequence length based on the lead octet - typedef typename std::iterator_traits::difference_type octet_difference_type; - const octet_difference_type length = utf8::internal::sequence_length(it); - - // Get trail octets and calculate the code point - utf_error err = UTF8_OK; - switch (length) { - case 0: - return INVALID_LEAD; - case 1: - err = utf8::internal::get_sequence_1(it, end, cp); - break; - case 2: - err = utf8::internal::get_sequence_2(it, end, cp); - break; - case 3: - err = utf8::internal::get_sequence_3(it, end, cp); - break; - case 4: - err = utf8::internal::get_sequence_4(it, end, cp); - break; - } - - if (err == UTF8_OK) { - // Decoding succeeded. Now, security checks... - if (utf8::internal::is_code_point_valid(cp)) { - if (!utf8::internal::is_overlong_sequence(cp, length)){ - // Passed! Return here. - code_point = cp; - ++it; - return UTF8_OK; - } - else - err = OVERLONG_SEQUENCE; - } - else - err = INVALID_CODE_POINT; - } - - // Failure branch - restore the original value of the iterator - it = original_it; - return err; - } - - template - inline utf_error validate_next(octet_iterator& it, octet_iterator end) { - uint32_t ignored; - return utf8::internal::validate_next(it, end, ignored); - } - -} // namespace internal - - /// The library API - functions intended to be called by the users - - // Byte order mark - const uint8_t bom[] = {0xef, 0xbb, 0xbf}; - - template - octet_iterator find_invalid(octet_iterator start, octet_iterator end) - { - octet_iterator result = start; - while (result != end) { - utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end); - if (err_code != internal::UTF8_OK) - return result; - } - return result; - } - - template - inline bool is_valid(octet_iterator start, octet_iterator end) - { - return (utf8::find_invalid(start, end) == end); - } - - template - inline bool starts_with_bom (octet_iterator it, octet_iterator end) - { - return ( - ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) && - ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) && - ((it != end) && (utf8::internal::mask8(*it)) == bom[2]) - ); - } - - //Deprecated in release 2.3 - template - inline bool is_bom (octet_iterator it) - { - return ( - (utf8::internal::mask8(*it++)) == bom[0] && - (utf8::internal::mask8(*it++)) == bom[1] && - (utf8::internal::mask8(*it)) == bom[2] - ); - } -} // namespace utf8 - -#endif // header guard - - diff --git a/utf8/unchecked.h b/utf8/unchecked.h deleted file mode 100644 index cb24271..0000000 --- a/utf8/unchecked.h +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2006 Nemanja Trifunovic - -/* -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - - -#ifndef UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 -#define UTF8_FOR_CPP_UNCHECKED_H_2675DCD0_9480_4c0c_B92A_CC14C027B731 - -#include "core.h" - -namespace utf8 -{ - namespace unchecked - { - template - octet_iterator append(uint32_t cp, octet_iterator result) - { - if (cp < 0x80) // one octet - *(result++) = static_cast(cp); - else if (cp < 0x800) { // two octets - *(result++) = static_cast((cp >> 6) | 0xc0); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else if (cp < 0x10000) { // three octets - *(result++) = static_cast((cp >> 12) | 0xe0); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - else { // four octets - *(result++) = static_cast((cp >> 18) | 0xf0); - *(result++) = static_cast(((cp >> 12) & 0x3f)| 0x80); - *(result++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(result++) = static_cast((cp & 0x3f) | 0x80); - } - return result; - } - - template - uint32_t next(octet_iterator& it) - { - uint32_t cp = utf8::internal::mask8(*it); - typename std::iterator_traits::difference_type length = utf8::internal::sequence_length(it); - switch (length) { - case 1: - break; - case 2: - it++; - cp = ((cp << 6) & 0x7ff) + ((*it) & 0x3f); - break; - case 3: - ++it; - cp = ((cp << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff); - ++it; - cp += (*it) & 0x3f; - break; - case 4: - ++it; - cp = ((cp << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff); - ++it; - cp += (utf8::internal::mask8(*it) << 6) & 0xfff; - ++it; - cp += (*it) & 0x3f; - break; - } - ++it; - return cp; - } - - template - uint32_t peek_next(octet_iterator it) - { - return utf8::unchecked::next(it); - } - - template - uint32_t prior(octet_iterator& it) - { - while (utf8::internal::is_trail(*(--it))) ; - octet_iterator temp = it; - return utf8::unchecked::next(temp); - } - - // Deprecated in versions that include prior, but only for the sake of consistency (see utf8::previous) - template - inline uint32_t previous(octet_iterator& it) - { - return utf8::unchecked::prior(it); - } - - template - void advance (octet_iterator& it, distance_type n) - { - for (distance_type i = 0; i < n; ++i) - utf8::unchecked::next(it); - } - - template - typename std::iterator_traits::difference_type - distance (octet_iterator first, octet_iterator last) - { - typename std::iterator_traits::difference_type dist; - for (dist = 0; first < last; ++dist) - utf8::unchecked::next(first); - return dist; - } - - template - octet_iterator utf16to8 (u16bit_iterator start, u16bit_iterator end, octet_iterator result) - { - while (start != end) { - uint32_t cp = utf8::internal::mask16(*start++); - // Take care of surrogate pairs first - if (utf8::internal::is_lead_surrogate(cp)) { - uint32_t trail_surrogate = utf8::internal::mask16(*start++); - cp = (cp << 10) + trail_surrogate + internal::SURROGATE_OFFSET; - } - result = utf8::unchecked::append(cp, result); - } - return result; - } - - template - u16bit_iterator utf8to16 (octet_iterator start, octet_iterator end, u16bit_iterator result) - { - while (start < end) { - uint32_t cp = utf8::unchecked::next(start); - if (cp > 0xffff) { //make a surrogate pair - *result++ = static_cast((cp >> 10) + internal::LEAD_OFFSET); - *result++ = static_cast((cp & 0x3ff) + internal::TRAIL_SURROGATE_MIN); - } - else - *result++ = static_cast(cp); - } - return result; - } - - template - octet_iterator utf32to8 (u32bit_iterator start, u32bit_iterator end, octet_iterator result) - { - while (start != end) - result = utf8::unchecked::append(*(start++), result); - - return result; - } - - template - u32bit_iterator utf8to32 (octet_iterator start, octet_iterator end, u32bit_iterator result) - { - while (start < end) - (*result++) = utf8::unchecked::next(start); - - return result; - } - - // The iterator class - template - class iterator : public std::iterator { - octet_iterator it; - public: - iterator () {} - explicit iterator (const octet_iterator& octet_it): it(octet_it) {} - // the default "big three" are OK - octet_iterator base () const { return it; } - uint32_t operator * () const - { - octet_iterator temp = it; - return utf8::unchecked::next(temp); - } - bool operator == (const iterator& rhs) const - { - return (it == rhs.it); - } - bool operator != (const iterator& rhs) const - { - return !(operator == (rhs)); - } - iterator& operator ++ () - { - ::std::advance(it, utf8::internal::sequence_length(it)); - return *this; - } - iterator operator ++ (int) - { - iterator temp = *this; - ::std::advance(it, utf8::internal::sequence_length(it)); - return temp; - } - iterator& operator -- () - { - utf8::unchecked::prior(it); - return *this; - } - iterator operator -- (int) - { - iterator temp = *this; - utf8::unchecked::prior(it); - return temp; - } - }; // class iterator - - } // namespace utf8::unchecked -} // namespace utf8 - - -#endif // header guard - -- cgit v1.2.3