hari15prasad commited on
Commit
eec0e7c
·
1 Parent(s): f1ecb4e

Data Prep: Successfully converted training data to JSONL format for AutoTrain

Browse files
convert_to_jsonl.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ input_path = 'src/environment/training_samples.jsonl'
5
+ output_path = 'src/environment/training_samples_final.jsonl'
6
+
7
+ print(f"Reading {input_path}...")
8
+ with open(input_path, 'r', encoding='utf-8') as f:
9
+ content = f.read().strip()
10
+
11
+ # Restore brackets if missing
12
+ if not content.startswith('['):
13
+ content = '[' + content
14
+ if not content.endswith(']'):
15
+ content = content + ']'
16
+
17
+ try:
18
+ data = json.loads(content)
19
+ except Exception as e:
20
+ # If the trailing comma exists, try to remove it
21
+ if content.endswith(',]'):
22
+ content = content[:-2] + ']'
23
+ data = json.loads(content)
24
+ else:
25
+ raise e
26
+
27
+ print(f"Converting {len(data)} entries...")
28
+ with open(output_path, 'w', encoding='utf-8') as f:
29
+ for entry in data:
30
+ text_content = (
31
+ f"### State\n"
32
+ f"Confusion: {entry.get('confusion')}\n"
33
+ f"Action: {entry.get('action')}\n"
34
+ f"Reward: {entry.get('reward')}\n"
35
+ f"Next Confusion: {entry.get('next_confusion')}"
36
+ )
37
+ json.dump({"text": text_content}, f, ensure_ascii=False)
38
+ f.write('\n')
39
+
40
+ print(f"Done! Successfully created {output_path}")
src/environment/training_samples.jsonl ADDED
The diff for this file is too large to render. See raw diff