Delete convert_wo_docs_into_json.py
Browse files- convert_wo_docs_into_json.py +0 -134
convert_wo_docs_into_json.py
DELETED
|
@@ -1,134 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import json
|
| 3 |
-
|
| 4 |
-
filename_to_args = {
|
| 5 |
-
"gpt-5_reasoning_high_search_0.jsonl": ["OpenAI/GPT-5 (high)", "Closed-source Reasoning", 1.25, 10],
|
| 6 |
-
"gpt-5-mini_reasoning_high_search_0.jsonl": ["OpenAI/GPT-5-mini (high)","Closed-source Reasoning", 0.25, 2],
|
| 7 |
-
"gpt-5-nano_reasoning_high_search_0.jsonl": ["OpenAI/GPT-5-nano (high)","Closed-source Reasoning", 0.05, 0.4],
|
| 8 |
-
"o3_reasoning_medium_search_0.jsonl": ["OpenAI/o3","Closed-source Reasoning", 2, 8],
|
| 9 |
-
"o4-mini_reasoning_medium_search_0.jsonl": ["OpenAI/o4-mini","Closed-source Reasoning", 1.1, 4.4],
|
| 10 |
-
"gemini-2.5-pro_reasoning_high_search_0.jsonl": ["Google/Gemini-2.5-Pro","Closed-source Reasoning", 1.25, 10],
|
| 11 |
-
"gemini-2.5-flash_reasoning_high_search_0.jsonl": ["Google/Gemini-2.5-Flash (Thinking)","Closed-source Reasoning", 0.3, 2.5],
|
| 12 |
-
"gemini-2.5-flash-lite_reasoning_high_search_0.jsonl": ["Google/Gemini-2.5-Flash-Lite (Thinking)","Closed-source Reasoning", 0.1, 0.4],
|
| 13 |
-
"x-ai_grok-4_reasoning_high_search_0.jsonl": ["xAI/grok-4-0709","Closed-source Reasoning", 3, 15],
|
| 14 |
-
"anthropic_claude-sonnet-4_reasoning_high_search_0.jsonl": ["Anthropic/claude-sonnet-4 (Thinking)","Closed-source Reasoning", 3, 15],
|
| 15 |
-
"openai_gpt-oss-120b_reasoning_high_search_0.jsonl": ["OpenAI/gpt-oss-120b (high)", "Open-weight Reasoning", 0.04, 0.4],
|
| 16 |
-
"openai_gpt-oss-20b_reasoning_high_search_0.jsonl": ["OpenAI/gpt-oss-20b (high)", "Open-weight Reasoning", 0.03, 0.14],
|
| 17 |
-
"deepseek_deepseek-chat-v3.1_reasoning_high_search_0.jsonl": ["DeepSeek-AI/DeepSeek-V3.1 (Thinking)", "Open-weight Reasoning", 0.2, 0.8],
|
| 18 |
-
"qwen_qwen3-235b-a22b-thinking-2507_reasoning_high_search_0.jsonl": ["Qwen/Qwen3-235B-A22B-Thinking-2507", "Open-weight Reasoning", 0.11, 0.6],
|
| 19 |
-
"qwen_qwen3-30b-a3b-thinking-2507_reasoning_high_search_0.jsonl": ["Qwen/Qwen3-30B-A3B-Thinking-2507", "Open-weight Reasoning", 0.08, 0.29],
|
| 20 |
-
"gpt-4.1_reasoning_False_search_0.jsonl":["OpenAI/GPT-4.1", "Closed-source Instruct", 2, 8],
|
| 21 |
-
"gpt-4.1-mini_reasoning_False_search_0.jsonl":["OpenAI/GPT-4.1-mini", "Closed-source Instruct", 0.4, 1.6],
|
| 22 |
-
"gpt-4.1-nano_reasoning_False_search_0.jsonl":["OpenAI/GPT-4.1-nano", "Closed-source Instruct", 0.1, 0.4],
|
| 23 |
-
"gemini-2.5-flash_reasoning_False_search_0.jsonl": ["Google/Gemini-2.5-Flash","Closed-source Instruct", 0.3, 2.5],
|
| 24 |
-
"gemini-2.5-flash-lite_reasoning_False_search_0.jsonl": ["Google/Gemini-2.5-Flash-Lite","Closed-source Instruct", 0.1, 0.4],
|
| 25 |
-
"anthropic_claude-sonnet-4_reasoning_False_search_0.jsonl": ["Anthropic/claude-sonnet-4","Closed-source Instruct", 3, 15],
|
| 26 |
-
"anthropic_claude-3.5-haiku_reasoning_False_search_0.jsonl": ["Anthropic/claude-3.5-haiku", "Closed-source Instruct", 0.8, 4],
|
| 27 |
-
"qwen_qwen3-235b-a22b-2507_reasoning_False_search_0.jsonl": ["Qwen/Qwen3-235B-A22B-Instruct-2507", "Open-weight Instruct", 0.08, 0.55],
|
| 28 |
-
"qwen_qwen3-30b-a3b-instruct-2507_reasoning_False_search_0.jsonl": ["Qwen/Qwen3-30B-A3B-Instruct-2507", "Open-weight Instruct", 0.08, 0.33],
|
| 29 |
-
"deepseek_deepseek-chat-v3.1_reasoning_False_search_0.jsonl": ["DeepSeek-AI/DeepSeek-V3.1", "Open-weight Instruct", 0.2, 0.8],
|
| 30 |
-
"moonshotai_kimi-k2-0905_reasoning_high_search_0.jsonl": ["MoonshotAI/Kimi-K2-Instruct-0905", "Open-weight Instruct", 0.39, 1.9],
|
| 31 |
-
"meta-llama_llama-4-maverick_reasoning_False_search_0.jsonl": ["Meta/llama-4-maverick", "Open-weight Instruct", 0.15, 0.6],
|
| 32 |
-
"meta-llama_llama-4-scout_reasoning_False_search_0.jsonl": ["Meta/llama-4-scout", "Open-weight Instruct", 0.08, 0.3],
|
| 33 |
-
"x-ai_grok-4-fast_reasoning_high_search_0.jsonl":["xAI/grok-4-fast (Thinking)", "Closed-source Reasoning", 0.2, 0.5],
|
| 34 |
-
"x-ai_grok-4-fast_reasoning_False_search_0.jsonl":["xAI/grok-4-fast", "Closed-source Instruct", 0.2, 0.5],
|
| 35 |
-
"anthropic_claude-haiku-4.5_reasoning_high_search_0.jsonl": ["Anthropic/claude-haiku-4.5 (Thinking)","Closed-source Reasoning", 1, 5],
|
| 36 |
-
"anthropic_claude-haiku-4.5_reasoning_False_search_0.jsonl": ["Anthropic/claude-haiku-4.5","Closed-source Instruct", 1, 5],
|
| 37 |
-
"anthropic_claude-sonnet-4.5_reasoning_high_search_0.jsonl": ["Anthropic/claude-sonnet-4.5 (Thinking)","Closed-source Reasoning", 3, 15],
|
| 38 |
-
"anthropic_claude-sonnet-4.5_reasoning_False_search_0.jsonl": ["Anthropic/claude-sonnet-4.5","Closed-source Instruct", 3, 15],
|
| 39 |
-
"minimax_minimax-m2_reasoning_high_search_0.jsonl": ["MiniMax/M2 (Thinking)","Open-weight Reasoning", 0.15, 0.45],
|
| 40 |
-
"minimax_minimax-m2_reasoning_False_search_0.jsonl": ["MiniMax/M2","Open-weight Instruct", 0.15, 0.45],
|
| 41 |
-
"moonshotai_kimi-k2-thinking_reasoning_high_search_0.jsonl": ["MoonshotAI/Kimi-K2-Thinking","Open-weight Reasoning", 0.55, 2.25],
|
| 42 |
-
"deepseek_deepseek-v3.2-exp_reasoning_high_search_0.jsonl": ["DeepSeek-AI/DeepSeek-V3.2-Exp (Thinking)", "Open-weight Reasoning", 0.27, 0.4],
|
| 43 |
-
"deepseek_deepseek-v3.2-exp_reasoning_False_search_0.jsonl": ["DeepSeek-AI/DeepSeek-V3.2-Exp", "Open-weight Instruct", 0.27, 0.4],
|
| 44 |
-
"gemini-3-pro-preview_reasoning_high_search_0.jsonl": ["Google/Gemini-3-Pro-Preview","Closed-source Reasoning", 2, 12],
|
| 45 |
-
"x-ai_grok-4.1-fast_reasoning_high_search_0.jsonl":["xAI/grok-4.1-fast (Thinking)", "Closed-source Reasoning", 0.2, 0.5],
|
| 46 |
-
"x-ai_grok-4.1-fast_reasoning_False_search_0.jsonl":["xAI/grok-4.1-fast", "Closed-source Instruct", 0.2, 0.5],
|
| 47 |
-
"gpt-5.1_reasoning_high_search_0.jsonl": ["OpenAI/GPT-5.1 (high)", "Closed-source Reasoning", 1.25, 10],
|
| 48 |
-
"anthropic_claude-opus-4.5_reasoning_high_search_0.jsonl": ["Anthropic/claude-opus-4.5 (Thinking)","Closed-source Reasoning", 5, 25],
|
| 49 |
-
"deepseek_deepseek-v3.2_reasoning_high_search_0.jsonl": ["DeepSeek-AI/DeepSeek-V3.2 (Thinking)", "Open-weight Reasoning", 0.27, 0.4],
|
| 50 |
-
"deepseek_deepseek-v3.2_reasoning_False_search_0.jsonl": ["DeepSeek-AI/DeepSeek-V3.2", "Open-weight Instruct", 0.27, 0.4],
|
| 51 |
-
"gpt-5.2_reasoning_xhigh_search_0.jsonl": ["OpenAI/GPT-5.2 (xhigh)", "Closed-source Reasoning", 1.75, 14],
|
| 52 |
-
"gemini-3-flash-preview_reasoning_high_search_0.jsonl": ["Google/Gemini-3-Flash-Preview (Thinking)","Closed-source Reasoning", 0.5, 3],
|
| 53 |
-
"gemini-3-flash-preview_reasoning_False_search_0.jsonl": ["Google/Gemini-3-Flash-Preview","Closed-source Instruct", 0.5, 3],
|
| 54 |
-
"z-ai_glm-4.7_reasoning_high_search_0.jsonl":["Z-AI/GLM-4.7 (Thinking)", "Open-weight Reasoning", 0.4, 1.5],
|
| 55 |
-
"z-ai_glm-4.7_reasoning_False_search_0.jsonl":["Z-AI/GLM-4.7", "Open-weight Instruct", 0.4, 1.5],
|
| 56 |
-
"minimax_minimax-m2.1_reasoning_high_search_0.jsonl": ["MiniMax/MiniMax-M2.1 (Thinking)", "Open-weight Reasoning", 0.3, 1.2],
|
| 57 |
-
"minimax_minimax-m2.1_reasoning_False_search_0.jsonl": ["MiniMax/MiniMax-M2.1", "Open-weight Instruct", 0.3, 1.2],
|
| 58 |
-
"anthropic_claude-opus-4.6_reasoning_False_search_0.jsonl": ["Anthropic/claude-opus-4.6 (Thinking)","Closed-source Reasoning", 5, 25],
|
| 59 |
-
"moonshotai_kimi-k2.5_reasoning_high_search_0.jsonl": ["MoonshotAI/Kimi-K2.5 (Thinking)","Open-weight Reasoning", 0.45, 2.5],
|
| 60 |
-
"moonshotai_kimi-k2.5_reasoning_False_search_0.jsonl": ["MoonshotAI/Kimi-K2.5","Open-weight Instruct", 0.45, 2.5],
|
| 61 |
-
"z-ai_glm-5_reasoning_high_search_0.jsonl":["Z-AI/GLM-5 (Thinking)", "Open-weight Reasoning", 1.0, 3.2],
|
| 62 |
-
"z-ai_glm-5_reasoning_False_search_0.jsonl":["Z-AI/GLM-5", "Open-weight Instruct", 1.0, 3.2],
|
| 63 |
-
"qwen_qwen3.5-397b-a17b_reasoning_high_search_0.jsonl": ["Qwen/Qwen3.5-397B-A17B (Thinking)", "Open-weight Reasoning", 0.6, 3.6],
|
| 64 |
-
"qwen_qwen3.5-397b-a17b_reasoning_False_search_0.jsonl": ["Qwen/Qwen3.5-397B-A17B", "Open-weight Instruct", 0.6, 3.6],
|
| 65 |
-
"minimax_minimax-m2.5_reasoning_high_search_0.jsonl": ["MiniMax/MiniMax-M2.5 (Thinking)", "Open-weight Reasoning", 0.3, 1.2],
|
| 66 |
-
"minimax_minimax-m2.5_reasoning_False_search_0.jsonl": ["MiniMax/MiniMax-M2.5", "Open-weight Instruct", 0.3, 1.2],
|
| 67 |
-
"anthropic_claude-sonnet-4.6_reasoning_False_search_0.jsonl": ["Anthropic/claude-sonnet-4.6 (Thinking)","Closed-source Reasoning", 3, 15],
|
| 68 |
-
"gemini-3.1-pro-preview_reasoning_high_search_0.jsonl": ["Google/Gemini-3.1-Pro-Preview","Closed-source Reasoning", 2, 12],
|
| 69 |
-
"gpt-5.4_reasoning_xhigh_search_0.jsonl": ["OpenAI/GPT-5.4 (xhigh)", "Closed-source Reasoning", 2.5, 15],
|
| 70 |
-
"gpt-5.3-codex_reasoning_xhigh_search_0.jsonl":["OpenAI/GPT-5.3-Codex (xhigh)", "Closed-source Reasoning", 1.75, 14],
|
| 71 |
-
"gpt-5.3-chat-latest_reasoning_False_search_0.jsonl":["OpenAI/GPT-5.3-Chat", "Closed-source Instruct", 1.75, 14],
|
| 72 |
-
"x-ai_grok-4.20-beta_reasoning_high_search_0.jsonl":["xAI/grok-4.20 Beta (Thinking)", "Closed-source Reasoning", 2, 6],
|
| 73 |
-
"minimax_minimax-m2.7_reasoning_high_search_0.jsonl": ["MiniMax/MiniMax-M2.7 (Thinking)", "Open-weight Reasoning", 0.3, 1.2],
|
| 74 |
-
"minimax_minimax-m2.7_reasoning_False_search_0.jsonl": ["MiniMax/MiniMax-M2.7", "Open-weight Instruct", 0.3, 1.2],
|
| 75 |
-
"google_gemma-4-31b-it_reasoning_high_search_0.jsonl": ["Google/Gemma-4-31B-It (Thinking)", "Open-weight Reasoning", 0.14, 0.4],
|
| 76 |
-
"google_gemma-4-31b-it_reasoning_False_search_0.jsonl": ["Google/Gemma-4-31B-It", "Open-weight Instruct", 0.14, 0.4],
|
| 77 |
-
"anthropic_claude-opus-4.7_reasoning_False_search_0.jsonl": ["Anthropic/claude-opus-4.7 (Thinking)","Closed-source Reasoning", 5, 25],
|
| 78 |
-
"z-ai_glm-5.1_reasoning_high_search_0.jsonl":["Z-AI/GLM-5.1 (Thinking)", "Open-weight Reasoning", 0.95, 3.15],
|
| 79 |
-
"z-ai_glm-5.1_reasoning_False_search_0.jsonl":["Z-AI/GLM-5.1", "Open-weight Instruct", 0.95, 3.15],
|
| 80 |
-
"moonshotai_kimi-k2.6_reasoning_high_search_0.jsonl": ["MoonshotAI/Kimi-K2.6 (Thinking)","Open-weight Reasoning", 0.6, 2.8],
|
| 81 |
-
"moonshotai_kimi-k2.6_reasoning_False_search_0.jsonl": ["MoonshotAI/Kimi-K2.6","Open-weight Instruct", 0.6, 2.8],
|
| 82 |
-
|
| 83 |
-
}
|
| 84 |
-
|
| 85 |
-
names = "Model & Physics & Chemistry & Finance & Consulting & Overall & Extraction & Reasoning & Style & Response Characters & Input Tokens & Output Tokens & Cost "
|
| 86 |
-
|
| 87 |
-
columns = [i.strip() for i in names.split("&")]
|
| 88 |
-
|
| 89 |
-
output_filename = "report_generation.jsonl"
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
folder = "../ProfBench/scores/"
|
| 93 |
-
|
| 94 |
-
with open(output_filename, "w") as fw:
|
| 95 |
-
for filename in filename_to_args:
|
| 96 |
-
if not os.path.exists(folder+filename):
|
| 97 |
-
raise ValueError(filename + " is not found")
|
| 98 |
-
# continue
|
| 99 |
-
with open(folder+filename, "r") as f:
|
| 100 |
-
one_row = json.load(f)
|
| 101 |
-
args = filename_to_args[filename]
|
| 102 |
-
|
| 103 |
-
new_dp = {}
|
| 104 |
-
print(args)
|
| 105 |
-
model =args[0]
|
| 106 |
-
category = args[1]
|
| 107 |
-
in_cost = args[2]
|
| 108 |
-
out_cost = args[3]
|
| 109 |
-
print(model)
|
| 110 |
-
# model, category, in_cost, out_cost = args[0]
|
| 111 |
-
|
| 112 |
-
new_dp["Model"] = model
|
| 113 |
-
new_dp["Category"] = category
|
| 114 |
-
|
| 115 |
-
new_dp["Overall"] = one_row["Overall"]
|
| 116 |
-
|
| 117 |
-
new_dp["Physics"] = one_row["Physics PhD"]
|
| 118 |
-
new_dp["Chemistry"] = one_row["Chemistry PhD"]
|
| 119 |
-
new_dp["Finance"] = one_row["Finance MBA"]
|
| 120 |
-
new_dp["Consulting"] = one_row["Consulting MBA"]
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
new_dp["Extraction"] = one_row["Extraction (recall)"]
|
| 124 |
-
new_dp["Reasoning"] = one_row["Reasoning"]
|
| 125 |
-
new_dp["Style"] = one_row["Style"]
|
| 126 |
-
|
| 127 |
-
new_dp["Response Characters"] = one_row["response_len_chars"]
|
| 128 |
-
new_dp["Input Tokens"] = one_row["prompt_tokens"]
|
| 129 |
-
new_dp["Output Tokens"] = one_row["completion_tokens"]
|
| 130 |
-
new_dp["Cost"] = round(160 / 1000000 * (in_cost * one_row["prompt_tokens"] + out_cost * one_row["completion_tokens"]),2)
|
| 131 |
-
fw.write(json.dumps(new_dp)+'\n')
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|