import re
import zlib
import json
import base64

def load_texts():
    with open('data.txt', 'r', encoding='utf8') as f:
        content = f.read()
    content = '\n'.join([line for line in content.splitlines() if not line.startswith('#')])

    instruction_texts = dict()
    system_prompts = dict()

    for match in re.finditer(r'\[(\w+),(\w+),(\w+)(?:,(\w+))?\]([^\[+]+)', content):
        kind, domain, topic, style, text = match.groups()
        if kind == 'instruction':
            instruction_texts[(domain, topic)] = text.replace('\n', ' ').strip()
        elif kind == 'prompt':
            system_prompts[(domain, topic, style)] = text.replace('\n', ' ').strip()
        else:
            print('Error:', kind)
    return (instruction_texts, system_prompts)

if __name__ == '__main__':
    with open('data.txt', 'r', encoding='utf8') as f:
        content = f.read()
    content = '\n'.join([line for line in content.splitlines() if not line.startswith('#')])
    data = base64.b64encode(zlib.compress(content.encode('utf8')))
    print(data)
    back = zlib.decompress(base64.b64decode(data)).decode('utf8')