p1k0 commited on
Commit
379170c
·
verified ·
1 Parent(s): 025c2ea

Upload folder using huggingface_hub

Browse files
eval/llama3_normal/test_result_v2.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/llama3_normal/test_result_v4_lora.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/llama3_r1/test_result_v1.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/llama3_r1/test_result_v1_parsed.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/llama3_r1/test_result_v2.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/llama3_r1/test_result_v2_parsed.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/parse.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import re
3
+
4
+ data = json.load(open("/mnt/data/users/liamding/data/sft_zh_tox/eval/qwen3_r1/test_result_v2.json", "r", encoding="utf-8"))
5
+
6
+ for item in data:
7
+ output = item["output"]
8
+ think_content = re.search(r"<think>(.*?)</think>", output, re.DOTALL)
9
+ answer_content = re.search(r"<answer>(.*?)</answer>", output, re.DOTALL)
10
+ if think_content is None or answer_content is None:
11
+ print(f"Error in item {item['idx']}: think or answer content not found")
12
+ think_content = output
13
+ answer_content = item["neutral"]
14
+ else:
15
+ think_content = think_content.group(1).strip()
16
+ answer_content = answer_content.group(1).strip()
17
+ item["output"] = answer_content
18
+ item["reasoning"] = think_content
19
+
20
+ json.dump(data, open("/mnt/data/users/liamding/data/sft_zh_tox/eval/qwen3_r1/test_result_v2_parsed.json", "w", encoding="utf-8"), ensure_ascii=False, indent=4)
eval/qwen3_normal/test_result_v1.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/qwen3_r1/test_result_v2.json ADDED
The diff for this file is too large to render. See raw diff
 
eval/qwen3_r1/test_result_v2_parsed.json ADDED
The diff for this file is too large to render. See raw diff