Upload folder using huggingface_hub
Browse files- eval/llama3_normal/test_result_v2.json +0 -0
- eval/llama3_normal/test_result_v4_lora.json +0 -0
- eval/llama3_r1/test_result_v1.json +0 -0
- eval/llama3_r1/test_result_v1_parsed.json +0 -0
- eval/llama3_r1/test_result_v2.json +0 -0
- eval/llama3_r1/test_result_v2_parsed.json +0 -0
- eval/parse.py +20 -0
- eval/qwen3_normal/test_result_v1.json +0 -0
- eval/qwen3_r1/test_result_v2.json +0 -0
- eval/qwen3_r1/test_result_v2_parsed.json +0 -0
eval/llama3_normal/test_result_v2.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/llama3_normal/test_result_v4_lora.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/llama3_r1/test_result_v1.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/llama3_r1/test_result_v1_parsed.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/llama3_r1/test_result_v2.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/llama3_r1/test_result_v2_parsed.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/parse.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
data = json.load(open("/mnt/data/users/liamding/data/sft_zh_tox/eval/qwen3_r1/test_result_v2.json", "r", encoding="utf-8"))
|
| 5 |
+
|
| 6 |
+
for item in data:
|
| 7 |
+
output = item["output"]
|
| 8 |
+
think_content = re.search(r"<think>(.*?)</think>", output, re.DOTALL)
|
| 9 |
+
answer_content = re.search(r"<answer>(.*?)</answer>", output, re.DOTALL)
|
| 10 |
+
if think_content is None or answer_content is None:
|
| 11 |
+
print(f"Error in item {item['idx']}: think or answer content not found")
|
| 12 |
+
think_content = output
|
| 13 |
+
answer_content = item["neutral"]
|
| 14 |
+
else:
|
| 15 |
+
think_content = think_content.group(1).strip()
|
| 16 |
+
answer_content = answer_content.group(1).strip()
|
| 17 |
+
item["output"] = answer_content
|
| 18 |
+
item["reasoning"] = think_content
|
| 19 |
+
|
| 20 |
+
json.dump(data, open("/mnt/data/users/liamding/data/sft_zh_tox/eval/qwen3_r1/test_result_v2_parsed.json", "w", encoding="utf-8"), ensure_ascii=False, indent=4)
|
eval/qwen3_normal/test_result_v1.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/qwen3_r1/test_result_v2.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
eval/qwen3_r1/test_result_v2_parsed.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|