From 2ab51e507d620c4479e07ca0ec47d22c8c66bc90 Mon Sep 17 00:00:00 2001 From: Your Name Date: Tue, 13 Apr 2021 15:14:34 -0400 Subject: Initial commit --- parser/scrapeToJson.py | 234 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 234 insertions(+) create mode 100755 parser/scrapeToJson.py (limited to 'parser/scrapeToJson.py') diff --git a/parser/scrapeToJson.py b/parser/scrapeToJson.py new file mode 100755 index 0000000..57fcbbc --- /dev/null +++ b/parser/scrapeToJson.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python3 + +import json +import re +import utils + +def processMonster(data, weapons, armors): + names2names = {'ac': 'Armor Class', 'hp': 'Hit Points', 'speed': 'Speed', 'saves': 'Saving Throws', 'd_resistances': 'Damage Resistances?', 'd_vulnerabilities': 'Damage Vulnerabilities', 'd_immunities': 'Damage Immunities', 'c_immunities': 'Condition Immunities', 'senses': 'Senses', 'langs': 'Languages', 'skills': 'Skills'} + desc = {} + for name in names2names: + m = re.search('(\*\*{}\.?\*\*)(.*)'.format(names2names[name]), data) + if m: + desc[name] = m.group(2).strip() + else: + desc[name] = "" + for name in ['d_resistances', 'd_vulnerabilities', 'd_immunities', 'c_immunities']: + # Formatted a, b, c[; d, e, and f from nonmagical attacks[ that aren't g]] + # Maybe without the a, b, c part + parts = [desc[name]] + if '; ' in desc[name]: + parts = desc[name].split('; ') + desc[name] = [] + for part in parts: + part = part.strip() + # Look for "nonmagical", and "that aren't x" + qualifiers = [] + if 'nonmagical' in part: + qualifiers.append('nonmagical') + if 'that aren\'t' in part: + qualifiers.append('non-' + re.search('(?<=that aren\'t ).*$', part).group(0)) + for typ in re.findall('([a-z]+(?=,)|^[a-z]+$|(?<=, )[a-z]+$|(?<=and )[a-z]+(?= from))', part): + desc[name].append({'type': typ, 'qualifiers': qualifiers}) + # Calc things about hp + hitdieMatch = re.search('(\d+)d(\d+)', desc['hp']) + desc['hit_die_count'] = int(hitdieMatch.group(1)) + desc['hit_die_sides'] = int(hitdieMatch.group(2)) + del desc['hp'] + desc['name'] = re.search('(?<=name: ).*', data).group(0).strip() + desc['type'] = re.search('(?<=type: ).*', data).group(0).strip() + desc['cr'] = float(re.search('(?<=cr: ).*', data).group(0).strip()) + description = re.search('(?<=_).*(?=_)', data).group(0).strip() + desc['size'] = description.split(' ')[0] + desc['alignment'] = description.split(', ')[1] + desc['stats'] = {ability: int(score.strip().split(' ')[0]) for ability, score in zip(['str', 'dex', 'con', 'int', 'wis', 'cha'], re.findall('(?<=\|) *\d.*?(?=\|)', data))} + def getBonus(ability): + return (desc['stats'][ability] - 10) // 2 + desc['inventory'] = [] # Fill with weapons and armor + # Modify ac stuff + desc['natural_armor'] = [] + correctAC = int(desc['ac'].split(' ')[0] if ' ' in desc['ac'] else desc['ac']) + natural = '' + armorBonus = 0 + armor = re.search('(?<=\().*(?=\))', desc['ac']) + if armor: + armor = armor.group(0).lower() + if ',' in armor: + armor = armor.split(',') + else: + armor = [armor] + for a in armor: + a = a.strip() + # If it has "armor" in it, remove that + a = re.search('^(.*?)(?: armor)?$', a).group(1) + #print('Working with {}'.format(a)) + if a == 'natural' or a == 'patchwork' or 'scraps' in a: + natural = a + continue + if 'with' in a: + continue + # Search for it in armors + found = False + for armorDict in armors: + if armorDict['name'] == a: + found = True + bonus = armorDict['ac'] + typ = armorDict['type'] + desc['inventory'].append(armorDict) + break + if not found: + print('Cound not identify armor: {}'.format(a)) + continue + #else: + # print('Found {} armor {} (bonus = {})'.format(typ, name, bonus)) + if typ == 'light': + armorBonus = bonus + getBonus('dex') + elif typ == 'medium': + armorBonus = bonus + min(2, getBonus('dex')) + elif typ == 'heavy': + armorBonus = bonus + elif typ == 'misc' or typ == 'shield': + armorBonus += bonus + if armorBonus == 0 and not natural: # Got through all that and came up dry + armorBonus = 10 + getBonus('dex') + if natural: + desc['natural_armor'].append({'name': natural, 'bonus': correctAC - armorBonus}) + elif armorBonus != correctAC: + print('Got a bad result for {}: armor string is {}, but we calculated {}'.format(desc['name'], desc['ac'], armorBonus)) + del desc['ac'] + + # Search for a description section + desc['description'] = '' + description = re.search('(?s)(?<={}).*?(?=###|$)'.format('### Description'), data) + if description: + desc['description'] = description.group(0).strip() + + # Next do sections + names2sectHeads = {'feature': '\*\*Challenge\*\*', 'action': '### Actions', 'legendary_action': '### Legendary Actions', 'reaction': '### Reactions'} + # We put them all in "features" list + desc['features'] = [] + for name in names2sectHeads: + section = re.search('(?s)(?<={}).*?(?=###|$)'.format(names2sectHeads[name]), data) + if section: + # There might be a special section text as the first new line after the header + #text = re.match('(?s)(\s*\w[^\*].*?)([\r\n]+[\*#]|$)', '\n'.join(section.group(0).split('\n')[1:])) + #if text and re.search('\w', text.group(1)): + # desc[name]['_text'] = text.group(1).strip() + for m in re.findall('(?s)\n\*\*(.*?)\.?\*\*(.*?)(?=\n\*\*|$)', section.group(0)): + desc['features'].append({'name': m[0].lower(), 'text': m[1].strip(), 'type': name}) + # Next, simplify and codify a few things + # Guess the proficiency bonus + desc['prof'] = int(max(0, (desc['cr']-1) // 4) + 2) + # Now convert skills, saves, and attacks to be based on proficiency and abilities rather than raw numbers + skillStr = desc['skills'] + desc['skills'] = {} # Map skill to plurality of proficiency + if skillStr: + for skill in skillStr.split(','): + skillName, skillBonus, ability = utils.procSkill(skill) + abilityBonus = getBonus(ability) + profTimes = (skillBonus - abilityBonus) / desc['prof'] + if round(profTimes) != profTimes: + print('Things came out funny for {}; skill {} has bonus {}, but proficiency is {} and the relevant ability ({}) gets {}'.format(desc['name'], skillName, skillBonus, desc['prof'], ability, getBonus(ability))) + desc['skills'][skillName] = round(profTimes) + savesStr = desc['saves'] + desc['saves'] = [] + if savesStr: + for save in savesStr.split(', '): + ability = save.split(' ')[0].lower() + if int(save.split('+')[1]) != getBonus(ability) + desc['prof']: + print('Things came out funny for {}; {} save has bonus {}, but proficiency is {} and the relevant ability ({}) gets {}'.format(desc['name'], ability, int(save.split('+')[1]), desc['prof'], ability, getBonus(ability))) + desc['saves'].append(ability) + for action in desc['features']: + if re.match('.*Attack:', action['text']): + #toHit = int(re.search('\+(\d+) to hit', action['text']).group(1)) + #selectedAbility = None + #for ability in ['str', 'dex', 'int', 'wis', 'cha', 'con']: + # if desc['prof'] + getBonus(ability) == toHit: + # selectedAbility = ability + # break + #if not selectedAbility: + # print('Cannot find relevant ability for {}, proficiency = {}'.format(desc['name'], desc['prof'])) + # continue + #action['details']['ability'] = selectedAbility + details = {} + details['range'] = [0, 0] + details['reach'] = 0 + for rangereach in ['range', 'reach']: + #rangeMatch = re.search('{} (\d+(?:/\d+)?) ft'.format(rangereach), action['text']) + rangeMatch = re.search('{} ([^,]*),'.format(rangereach), action['text']) + if rangeMatch: + distance = rangeMatch.group(1) + if '/' in distance and rangereach == 'range': + distance = [int(part.split('ft')[0].strip()) for part in distance.split('/')] + else: + distance = int(distance.split('ft')[0].strip()) + if rangereach == 'range': + distance = [distance, distance] + details[rangereach] = distance + details['properties'] = [] + details['damage'] = [] + # It could be something like "1 piecring damage" (see sprite). + dmgSection = re.search('_Hit:_ .*?\.', action['text']).group(0) + for dmgMatch in re.findall('(?:plus |or )?\d+(?: \(\d+d\d+[\+− \d]*\))? [a-z]* damage', dmgSection): + isOr = dmgMatch.split(' ')[0] == 'or' + if re.match('\d+ [a-z]* damage', dmgMatch): + details['damage'].append({ + 'dmg_die_count': int(dmgMatch.split(' ')[0]), + 'dmg_die_sides': int(dmgMatch.split(' ')[0]), + 'dmg_type': re.search('[a-z]+(?= damage)', dmgMatch).group(0), + 'is_or': isOr # Always false + }) + else: + toAppend = { + 'dmg_die_count': int(re.search('\d+(?=d\d)', dmgMatch).group(0)), + 'dmg_die_sides': int(re.search('(?<=\dd)\d+', dmgMatch).group(0)), + 'dmg_type': re.search('[a-z]+(?= damage)', dmgMatch).group(0), + 'is_or': isOr + } + if isOr and toAppend['dmg_type'] == details['damage'][-1]['dmg_type'] and toAppend['dmg_die_sides'] == details['damage'][-1]['dmg_die_sides'] + 2: + details['properties'].append('versatile') + else: + if isOr: + print('We got here for {}!!!!!!!!!!!!!!'.format(desc['name'])) + details['damage'].append(toAppend) + details['text'] = re.search('(?s)(_Hit:_ (?:\d+ [^\.]*\.)?)(.*)', action['text']).group(2).strip() + if len(details['damage']) == 0: + details['damage'].append({'dmg_die_count': 0, 'dmg_die_sides': 0, 'dmg_type': '-'}) + action['attack'] = {} + for name, value in utils.formatWeapon(action['name'], details['range'][0], details['range'][1], details['reach'], details['damage'][0]['dmg_type'], details['damage'][0]['dmg_die_count'], details['damage'][0]['dmg_die_sides'], action['text']).items(): + action['attack'][name] = value + if action['attack']['type'] != 'unknown': + desc['inventory'].append(action['attack']) + # Remove weapon actions from features (they were just added to inventory) + desc['features'] = [a for a in desc['features'] if 'attack' not in a or a['attack']['type'] == 'unknown'] + # Get rid of precalculated passive perception + # It's always the last item in senses + passivePercep = int(desc['senses'].split(' ')[-1]) + shouldBe = 10 + getBonus('wis') + if 'Perception' in desc['skills']: + shouldBe += desc['skills']['Perception'] * desc['prof'] + if passivePercep != shouldBe: + print('Passive perception didn\'t come out right for {}: is {}, but should be {}'.format(desc['name'], passivePercep, shouldBe)) + desc['senses'] = desc['senses'].split(', ')[:-1] + return desc + +from pathlib import Path +weapons = utils.getWeapons() +armors = utils.getArmor() +Path('items/weapons/').mkdir(parents=True, exist_ok=True) +for weapon in weapons: + with open('items/weapons/' + weapon['name'].replace(' ', '_') + '.json', 'w') as f: + json.dump(weapon, f, indent=2) +# Also do armors +Path('items/armor/').mkdir(parents=True, exist_ok=True) +for armor in armors: + with open('items/armor/' + armor['name'].replace(' ', '_') + '.json', 'w') as f: + json.dump(armor, f, indent=2) + +for monster in Path('../../5thSRD/docs/gamemaster_rules/monsters/').glob('*.md'): + #print('Processing {}'.format(monster)) + with monster.open() as f: + data = f.read() + Path('monsters/').mkdir(exist_ok=True) + with open('monsters/' + monster.stem + '.json', 'w') as f: + json.dump(processMonster(data, weapons, armors), f, indent=2) -- cgit v1.2.3