File size: 444 Bytes
767f47f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
import json

with open('data.txt', 'r') as file:
    corpus = file.read()

lines = []
with open('data.jsonl', 'r') as file:
    for line in file:
        try:
            data = json.loads(line.strip())
            ai_response = data.get('text', '')
            url = data.get('url', '')
            lines.append(f"User: {url}\nAI: {ai_response}\n<|endoftext|>\n")
        except json.JSONDecodeError:
            pass

corpus += ''.join(lines)