Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| # Configuration | |
| name = "chs.json" | |
| outputFolder = "database" | |
| deleteKeys = [ | |
| "images", | |
| "tags", | |
| "html" | |
| ] | |
| typeScrape = { | |
| "article": "text", | |
| "event": "description", | |
| "list": "items" | |
| } | |
| data = json.load(open(name, "r")) | |
| i = -1 | |
| k = 0 | |
| try: | |
| os.mkdir(outputFolder) | |
| except: pass | |
| for item in data: | |
| i += 1 | |
| for key in deleteKeys: | |
| if key in item: | |
| item[key] | |
| del item[key] | |
| data[i] = item | |
| if "type" in item: | |
| for typeKey, scrapeText in typeScrape.items(): | |
| try: | |
| if item["type"] == typeKey: | |
| k += 1 | |
| file = open(f"{outputFolder}/chs-{typeKey}-{k}.txt", "a") | |
| if item["type"] == "list": | |
| text = "" | |
| if "title" in item: | |
| text = item["title"] | |
| file.write(text) | |
| for pair in item[scrapeText]: | |
| text = "" | |
| if "title" in pair: | |
| text = "\n" + pair["title"] | |
| if "summary" in pair: | |
| if pair["summary"].replace(" ", "") != pair["title"].replace(" ", ""): | |
| text += "\n" + pair["summary"].replace(pair["title"], "") | |
| if "fsElementContent" in pair: | |
| if pair["fsElementContent"].replace(" ", "") != pair["title"].replace(" ", ""): | |
| text += "\n" + pair["fsElementContent"] | |
| if "fsElementFooterContent" in pair: | |
| if pair["fsElementFooterContent"].replace(" ", "") != pair["title"].replace(" ", ""): | |
| text += "\n" + pair["fsElementFooterContent"] | |
| if "fsElementHeaderContent" in pair: | |
| if pair["fsElementHeaderContent"].replace(" ", "") != pair["title"].replace(" ", ""): | |
| text += "\n" + pair["fsElementHeaderContent"] | |
| if text != "": | |
| file.write(text) | |
| else: | |
| text = item[scrapeText] | |
| if text != "": | |
| file.write(text) | |
| except: pass | |
| json.dump(data, open(name, "w"), indent = 6) |