aboutsummaryrefslogtreecommitdiff
path: root/parser/scrapeToJson.py
diff options
context:
space:
mode:
Diffstat (limited to 'parser/scrapeToJson.py')
-rwxr-xr-xparser/scrapeToJson.py234
1 files changed, 234 insertions, 0 deletions
diff --git a/parser/scrapeToJson.py b/parser/scrapeToJson.py
new file mode 100755
index 0000000..57fcbbc
--- /dev/null
+++ b/parser/scrapeToJson.py
@@ -0,0 +1,234 @@
+#!/usr/bin/env python3
+
+import json
+import re
+import utils
+
+def processMonster(data, weapons, armors):
+ names2names = {'ac': 'Armor Class', 'hp': 'Hit Points', 'speed': 'Speed', 'saves': 'Saving Throws', 'd_resistances': 'Damage Resistances?', 'd_vulnerabilities': 'Damage Vulnerabilities', 'd_immunities': 'Damage Immunities', 'c_immunities': 'Condition Immunities', 'senses': 'Senses', 'langs': 'Languages', 'skills': 'Skills'}
+ desc = {}
+ for name in names2names:
+ m = re.search('(\*\*{}\.?\*\*)(.*)'.format(names2names[name]), data)
+ if m:
+ desc[name] = m.group(2).strip()
+ else:
+ desc[name] = ""
+ for name in ['d_resistances', 'd_vulnerabilities', 'd_immunities', 'c_immunities']:
+ # Formatted a, b, c[; d, e, and f from nonmagical attacks[ that aren't g]]
+ # Maybe without the a, b, c part
+ parts = [desc[name]]
+ if '; ' in desc[name]:
+ parts = desc[name].split('; ')
+ desc[name] = []
+ for part in parts:
+ part = part.strip()
+ # Look for "nonmagical", and "that aren't x"
+ qualifiers = []
+ if 'nonmagical' in part:
+ qualifiers.append('nonmagical')
+ if 'that aren\'t' in part:
+ qualifiers.append('non-' + re.search('(?<=that aren\'t ).*$', part).group(0))
+ for typ in re.findall('([a-z]+(?=,)|^[a-z]+$|(?<=, )[a-z]+$|(?<=and )[a-z]+(?= from))', part):
+ desc[name].append({'type': typ, 'qualifiers': qualifiers})
+ # Calc things about hp
+ hitdieMatch = re.search('(\d+)d(\d+)', desc['hp'])
+ desc['hit_die_count'] = int(hitdieMatch.group(1))
+ desc['hit_die_sides'] = int(hitdieMatch.group(2))
+ del desc['hp']
+ desc['name'] = re.search('(?<=name: ).*', data).group(0).strip()
+ desc['type'] = re.search('(?<=type: ).*', data).group(0).strip()
+ desc['cr'] = float(re.search('(?<=cr: ).*', data).group(0).strip())
+ description = re.search('(?<=_).*(?=_)', data).group(0).strip()
+ desc['size'] = description.split(' ')[0]
+ desc['alignment'] = description.split(', ')[1]
+ desc['stats'] = {ability: int(score.strip().split(' ')[0]) for ability, score in zip(['str', 'dex', 'con', 'int', 'wis', 'cha'], re.findall('(?<=\|) *\d.*?(?=\|)', data))}
+ def getBonus(ability):
+ return (desc['stats'][ability] - 10) // 2
+ desc['inventory'] = [] # Fill with weapons and armor
+ # Modify ac stuff
+ desc['natural_armor'] = []
+ correctAC = int(desc['ac'].split(' ')[0] if ' ' in desc['ac'] else desc['ac'])
+ natural = ''
+ armorBonus = 0
+ armor = re.search('(?<=\().*(?=\))', desc['ac'])
+ if armor:
+ armor = armor.group(0).lower()
+ if ',' in armor:
+ armor = armor.split(',')
+ else:
+ armor = [armor]
+ for a in armor:
+ a = a.strip()
+ # If it has "armor" in it, remove that
+ a = re.search('^(.*?)(?: armor)?$', a).group(1)
+ #print('Working with {}'.format(a))
+ if a == 'natural' or a == 'patchwork' or 'scraps' in a:
+ natural = a
+ continue
+ if 'with' in a:
+ continue
+ # Search for it in armors
+ found = False
+ for armorDict in armors:
+ if armorDict['name'] == a:
+ found = True
+ bonus = armorDict['ac']
+ typ = armorDict['type']
+ desc['inventory'].append(armorDict)
+ break
+ if not found:
+ print('Cound not identify armor: {}'.format(a))
+ continue
+ #else:
+ # print('Found {} armor {} (bonus = {})'.format(typ, name, bonus))
+ if typ == 'light':
+ armorBonus = bonus + getBonus('dex')
+ elif typ == 'medium':
+ armorBonus = bonus + min(2, getBonus('dex'))
+ elif typ == 'heavy':
+ armorBonus = bonus
+ elif typ == 'misc' or typ == 'shield':
+ armorBonus += bonus
+ if armorBonus == 0 and not natural: # Got through all that and came up dry
+ armorBonus = 10 + getBonus('dex')
+ if natural:
+ desc['natural_armor'].append({'name': natural, 'bonus': correctAC - armorBonus})
+ elif armorBonus != correctAC:
+ print('Got a bad result for {}: armor string is {}, but we calculated {}'.format(desc['name'], desc['ac'], armorBonus))
+ del desc['ac']
+
+ # Search for a description section
+ desc['description'] = ''
+ description = re.search('(?s)(?<={}).*?(?=###|$)'.format('### Description'), data)
+ if description:
+ desc['description'] = description.group(0).strip()
+
+ # Next do sections
+ names2sectHeads = {'feature': '\*\*Challenge\*\*', 'action': '### Actions', 'legendary_action': '### Legendary Actions', 'reaction': '### Reactions'}
+ # We put them all in "features" list
+ desc['features'] = []
+ for name in names2sectHeads:
+ section = re.search('(?s)(?<={}).*?(?=###|$)'.format(names2sectHeads[name]), data)
+ if section:
+ # There might be a special section text as the first new line after the header
+ #text = re.match('(?s)(\s*\w[^\*].*?)([\r\n]+[\*#]|$)', '\n'.join(section.group(0).split('\n')[1:]))
+ #if text and re.search('\w', text.group(1)):
+ # desc[name]['_text'] = text.group(1).strip()
+ for m in re.findall('(?s)\n\*\*(.*?)\.?\*\*(.*?)(?=\n\*\*|$)', section.group(0)):
+ desc['features'].append({'name': m[0].lower(), 'text': m[1].strip(), 'type': name})
+ # Next, simplify and codify a few things
+ # Guess the proficiency bonus
+ desc['prof'] = int(max(0, (desc['cr']-1) // 4) + 2)
+ # Now convert skills, saves, and attacks to be based on proficiency and abilities rather than raw numbers
+ skillStr = desc['skills']
+ desc['skills'] = {} # Map skill to plurality of proficiency
+ if skillStr:
+ for skill in skillStr.split(','):
+ skillName, skillBonus, ability = utils.procSkill(skill)
+ abilityBonus = getBonus(ability)
+ profTimes = (skillBonus - abilityBonus) / desc['prof']
+ if round(profTimes) != profTimes:
+ print('Things came out funny for {}; skill {} has bonus {}, but proficiency is {} and the relevant ability ({}) gets {}'.format(desc['name'], skillName, skillBonus, desc['prof'], ability, getBonus(ability)))
+ desc['skills'][skillName] = round(profTimes)
+ savesStr = desc['saves']
+ desc['saves'] = []
+ if savesStr:
+ for save in savesStr.split(', '):
+ ability = save.split(' ')[0].lower()
+ if int(save.split('+')[1]) != getBonus(ability) + desc['prof']:
+ print('Things came out funny for {}; {} save has bonus {}, but proficiency is {} and the relevant ability ({}) gets {}'.format(desc['name'], ability, int(save.split('+')[1]), desc['prof'], ability, getBonus(ability)))
+ desc['saves'].append(ability)
+ for action in desc['features']:
+ if re.match('.*Attack:', action['text']):
+ #toHit = int(re.search('\+(\d+) to hit', action['text']).group(1))
+ #selectedAbility = None
+ #for ability in ['str', 'dex', 'int', 'wis', 'cha', 'con']:
+ # if desc['prof'] + getBonus(ability) == toHit:
+ # selectedAbility = ability
+ # break
+ #if not selectedAbility:
+ # print('Cannot find relevant ability for {}, proficiency = {}'.format(desc['name'], desc['prof']))
+ # continue
+ #action['details']['ability'] = selectedAbility
+ details = {}
+ details['range'] = [0, 0]
+ details['reach'] = 0
+ for rangereach in ['range', 'reach']:
+ #rangeMatch = re.search('{} (\d+(?:/\d+)?) ft'.format(rangereach), action['text'])
+ rangeMatch = re.search('{} ([^,]*),'.format(rangereach), action['text'])
+ if rangeMatch:
+ distance = rangeMatch.group(1)
+ if '/' in distance and rangereach == 'range':
+ distance = [int(part.split('ft')[0].strip()) for part in distance.split('/')]
+ else:
+ distance = int(distance.split('ft')[0].strip())
+ if rangereach == 'range':
+ distance = [distance, distance]
+ details[rangereach] = distance
+ details['properties'] = []
+ details['damage'] = []
+ # It could be something like "1 piecring damage" (see sprite).
+ dmgSection = re.search('_Hit:_ .*?\.', action['text']).group(0)
+ for dmgMatch in re.findall('(?:plus |or )?\d+(?: \(\d+d\d+[\+− \d]*\))? [a-z]* damage', dmgSection):
+ isOr = dmgMatch.split(' ')[0] == 'or'
+ if re.match('\d+ [a-z]* damage', dmgMatch):
+ details['damage'].append({
+ 'dmg_die_count': int(dmgMatch.split(' ')[0]),
+ 'dmg_die_sides': int(dmgMatch.split(' ')[0]),
+ 'dmg_type': re.search('[a-z]+(?= damage)', dmgMatch).group(0),
+ 'is_or': isOr # Always false
+ })
+ else:
+ toAppend = {
+ 'dmg_die_count': int(re.search('\d+(?=d\d)', dmgMatch).group(0)),
+ 'dmg_die_sides': int(re.search('(?<=\dd)\d+', dmgMatch).group(0)),
+ 'dmg_type': re.search('[a-z]+(?= damage)', dmgMatch).group(0),
+ 'is_or': isOr
+ }
+ if isOr and toAppend['dmg_type'] == details['damage'][-1]['dmg_type'] and toAppend['dmg_die_sides'] == details['damage'][-1]['dmg_die_sides'] + 2:
+ details['properties'].append('versatile')
+ else:
+ if isOr:
+ print('We got here for {}!!!!!!!!!!!!!!'.format(desc['name']))
+ details['damage'].append(toAppend)
+ details['text'] = re.search('(?s)(_Hit:_ (?:\d+ [^\.]*\.)?)(.*)', action['text']).group(2).strip()
+ if len(details['damage']) == 0:
+ details['damage'].append({'dmg_die_count': 0, 'dmg_die_sides': 0, 'dmg_type': '-'})
+ action['attack'] = {}
+ for name, value in utils.formatWeapon(action['name'], details['range'][0], details['range'][1], details['reach'], details['damage'][0]['dmg_type'], details['damage'][0]['dmg_die_count'], details['damage'][0]['dmg_die_sides'], action['text']).items():
+ action['attack'][name] = value
+ if action['attack']['type'] != 'unknown':
+ desc['inventory'].append(action['attack'])
+ # Remove weapon actions from features (they were just added to inventory)
+ desc['features'] = [a for a in desc['features'] if 'attack' not in a or a['attack']['type'] == 'unknown']
+ # Get rid of precalculated passive perception
+ # It's always the last item in senses
+ passivePercep = int(desc['senses'].split(' ')[-1])
+ shouldBe = 10 + getBonus('wis')
+ if 'Perception' in desc['skills']:
+ shouldBe += desc['skills']['Perception'] * desc['prof']
+ if passivePercep != shouldBe:
+ print('Passive perception didn\'t come out right for {}: is {}, but should be {}'.format(desc['name'], passivePercep, shouldBe))
+ desc['senses'] = desc['senses'].split(', ')[:-1]
+ return desc
+
+from pathlib import Path
+weapons = utils.getWeapons()
+armors = utils.getArmor()
+Path('items/weapons/').mkdir(parents=True, exist_ok=True)
+for weapon in weapons:
+ with open('items/weapons/' + weapon['name'].replace(' ', '_') + '.json', 'w') as f:
+ json.dump(weapon, f, indent=2)
+# Also do armors
+Path('items/armor/').mkdir(parents=True, exist_ok=True)
+for armor in armors:
+ with open('items/armor/' + armor['name'].replace(' ', '_') + '.json', 'w') as f:
+ json.dump(armor, f, indent=2)
+
+for monster in Path('../../5thSRD/docs/gamemaster_rules/monsters/').glob('*.md'):
+ #print('Processing {}'.format(monster))
+ with monster.open() as f:
+ data = f.read()
+ Path('monsters/').mkdir(exist_ok=True)
+ with open('monsters/' + monster.stem + '.json', 'w') as f:
+ json.dump(processMonster(data, weapons, armors), f, indent=2)