import re import zlib import json import base64 def load_texts(): with open('data.txt', 'r', encoding='utf8') as f: content = f.read() content = '\n'.join([line for line in content.splitlines() if not line.startswith('#')]) instruction_texts = dict() system_prompts = dict() for match in re.finditer(r'\[(\w+),(\w+),(\w+)(?:,(\w+))?\]([^\[+]+)', content): kind, domain, topic, style, text = match.groups() if kind == 'instruction': instruction_texts[(domain, topic)] = text.replace('\n', ' ').strip() elif kind == 'prompt': system_prompts[(domain, topic, style)] = text.replace('\n', ' ').strip() else: print('Error:', kind) return (instruction_texts, system_prompts) if __name__ == '__main__': with open('data.txt', 'r', encoding='utf8') as f: content = f.read() content = '\n'.join([line for line in content.splitlines() if not line.startswith('#')]) data = base64.b64encode(zlib.compress(content.encode('utf8'))) print(data) back = zlib.decompress(base64.b64decode(data)).decode('utf8')