JoelWester's picture
Upload folder using huggingface_hub
d2dba9e
import re
import zlib
import json
import base64
def load_texts():
with open('data.txt', 'r', encoding='utf8') as f:
content = f.read()
content = '\n'.join([line for line in content.splitlines() if not line.startswith('#')])
instruction_texts = dict()
system_prompts = dict()
for match in re.finditer(r'\[(\w+),(\w+),(\w+)(?:,(\w+))?\]([^\[+]+)', content):
kind, domain, topic, style, text = match.groups()
if kind == 'instruction':
instruction_texts[(domain, topic)] = text.replace('\n', ' ').strip()
elif kind == 'prompt':
system_prompts[(domain, topic, style)] = text.replace('\n', ' ').strip()
else:
print('Error:', kind)
return (instruction_texts, system_prompts)
if __name__ == '__main__':
with open('data.txt', 'r', encoding='utf8') as f:
content = f.read()
content = '\n'.join([line for line in content.splitlines() if not line.startswith('#')])
data = base64.b64encode(zlib.compress(content.encode('utf8')))
print(data)
back = zlib.decompress(base64.b64decode(data)).decode('utf8')