Cbphcr commited on
Commit
6201931
·
verified ·
1 Parent(s): df7b57a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +178 -177
app.py CHANGED
@@ -1,177 +1,178 @@
1
- import os
2
- import time
3
- import json
4
- import shutil
5
- import zipfile
6
- import gradio as gr
7
- from eval_exp import evaluate
8
- from datetime import datetime
9
- from apscheduler.schedulers.background import BackgroundScheduler
10
-
11
-
12
- def load_splits():
13
- splits_dir = "chinatravel/evaluation/default_splits"
14
- splits = []
15
- for filename in os.listdir(splits_dir):
16
- if filename.endswith(".txt"):
17
- splits.append(filename.replace(".txt", ""))
18
- return splits
19
-
20
-
21
- SPLITS_LIST = load_splits()
22
- # SUBMIT_DIR = "./submissions"
23
- # OUTPUT_DIR = "./outputs"
24
- SUBMIT_DIR = os.path.abspath("submissions")
25
- OUTPUT_DIR = os.path.abspath("outputs")
26
-
27
- shutil.rmtree(SUBMIT_DIR, ignore_errors=True)
28
- shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
29
- os.makedirs(SUBMIT_DIR, exist_ok=True)
30
- os.makedirs(OUTPUT_DIR, exist_ok=True)
31
- print(f"Submission directory: {SUBMIT_DIR}")
32
- print(f"Output directory: {OUTPUT_DIR}")
33
- # clear directories if they already exist
34
-
35
-
36
-
37
- def clean_old_outputs(folder, keep_hours=24):
38
- now = time.time()
39
- for fname in os.listdir(folder):
40
- fpath = os.path.join(folder, fname)
41
- if os.path.isfile(fpath) and now - os.path.getmtime(fpath) > keep_hours * 3600:
42
- os.remove(fpath)
43
-
44
-
45
- scheduler = BackgroundScheduler()
46
- scheduler.add_job(lambda: clean_old_outputs(OUTPUT_DIR), "interval", hours=6)
47
- scheduler.start()
48
-
49
-
50
- class Arguments:
51
- def __init__(self, splits, result_dir):
52
- self.splits = splits
53
- self.result_dir = result_dir
54
-
55
-
56
- def handle_submission(zip_file, dataset_choice):
57
- if zip_file is None:
58
- # yield "❌ 请上传 zip 文件!", 0, 0, 0, None
59
- yield "❌ Please upload a zip file!", 0, 0, 0, None
60
- return
61
-
62
- shutil.rmtree(SUBMIT_DIR, ignore_errors=True)
63
- os.makedirs(SUBMIT_DIR, exist_ok=True)
64
-
65
- with zipfile.ZipFile(zip_file.name, "r") as zip_ref:
66
- # print(f"正在解压缩 {zip_file.name} 到 {SUBMIT_DIR}...")
67
- print(f"Extracting {zip_file.name} to {SUBMIT_DIR}...")
68
- zip_ref.extractall(SUBMIT_DIR)
69
-
70
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
71
- print(f"Submission dir: {SUBMIT_DIR}")
72
- print(os.path.splitext(zip_file.name))
73
- unzipped_dir = os.path.join(
74
- SUBMIT_DIR, os.path.basename(zip_file.name).replace(".zip", "")
75
- )
76
- print(f"Unzipped directory: {unzipped_dir}")
77
- output_path = os.path.join(OUTPUT_DIR, f"result_main_{timestamp}.json")
78
- args = Arguments(splits=dataset_choice, result_dir=unzipped_dir)
79
-
80
- try:
81
- # yield "🚀 开始测评...", 0, 0, 0, None
82
- yield "🚀 Starting evaluation...", 0, 0, 0, None
83
-
84
- result = {}
85
- for progress in evaluate(args, result):
86
- stage = progress.get("stage", "")
87
- progress_value = progress.get("progress", 0)
88
-
89
- if stage == "schema":
90
- # yield "Schema 阶段测评中...", progress_value, 0, 0, None
91
- yield "Schema evaluation in progress...", 100, progress_value, 0, None
92
- elif stage == "commonsense":
93
- # yield "Commonsense 阶段测评中...", 100, progress_value, 0, None
94
- yield "Commonsense evaluation in progress...", 100, 100, progress_value, None
95
- elif stage == "logic":
96
- # yield "Logic 阶段测评中...", 100, 100, progress_value, None
97
- yield "Logic evaluation in progress...", 100, 100, 100, None
98
- elif stage == "final":
99
- result.update(progress.get("result", {}))
100
- # yield "测评完成,正在保存结果...", 100, 100, 100, None
101
- yield "Evaluation completed, saving results...", 100, 100, 100, None
102
-
103
- # 保存结果到文件
104
- with open(output_path, "w", encoding="utf-8") as f:
105
- json.dump(result, f, ensure_ascii=False, indent=4)
106
-
107
- # 在测评完成后更新结果文件的值和可见性
108
- result_file.value = output_path
109
- result_file.visible = True
110
- yield "✅ 测评完成!", 100, 100, 100, output_path
111
-
112
- except Exception as e:
113
- import traceback
114
-
115
- traceback.print_exc()
116
- # yield f"❌ 测评异常:{e}", 0, 0, 0, None
117
- yield f"❌ Evaluation error: {e}", 0, 0, 0, None
118
-
119
-
120
- with gr.Blocks() as demo:
121
- # gr.Markdown("# 📊 ChinaTravel 模型测评")
122
- gr.Markdown(
123
- "# 📊 ChinaTravel Benchmark Evaluation"
124
- )
125
-
126
- # with gr.Row():
127
- # zip_input = gr.File(label="上传模型预测 zip 文件", file_types=[".zip"])
128
- # dataset_choice = gr.Radio(
129
- # SPLITS_LIST, label="选择评估数据集", value="validation"
130
- # )
131
- zip_input = gr.File(label="Upload zip file of results", file_types=[".zip"])
132
- dataset_choice = gr.Radio(
133
- SPLITS_LIST, label="Select evaluation dataset", value="validation"
134
- )
135
-
136
-
137
- # submit_btn = gr.Button("开始测评")
138
- submit_btn = gr.Button("Start Evaluation")
139
-
140
-
141
- # 添加三个进度条
142
- # schema_progress = gr.Slider(
143
- # label="Schema 阶段进度", minimum=0, maximum=100, value=0, interactive=False
144
- # )
145
- # commonsense_progress = gr.Slider(
146
- # label="Commonsense 阶段进度", minimum=0, maximum=100, value=0, interactive=False
147
- # )
148
- # logic_progress = gr.Slider(
149
- # label="Logic 阶段进度", minimum=0, maximum=100, value=0, interactive=False
150
- # )
151
-
152
- schema_progress = gr.Slider(
153
- label="Schema Stage Progress", minimum=0, maximum=100, value=0, interactive=False
154
- )
155
- commonsense_progress = gr.Slider(
156
- label="Commonsense Stage Progress", minimum=0, maximum=100, value=0, interactive=False
157
- )
158
- logic_progress = gr.Slider(
159
- label="Logic Stage Progress", minimum=0, maximum=100, value=0, interactive=False
160
- )
161
- output_msg = gr.Markdown()
162
- # result_file = gr.File(label="结果文件下载") # , visible=False)
163
- result_file = gr.File(label="Result File Download")
164
-
165
- submit_btn.click(
166
- handle_submission,
167
- inputs=[zip_input, dataset_choice],
168
- outputs=[
169
- output_msg,
170
- schema_progress,
171
- commonsense_progress,
172
- logic_progress,
173
- result_file,
174
- ],
175
- )
176
-
177
- demo.launch(debug=True)
 
 
1
+ import os
2
+ import time
3
+ import json
4
+ import shutil
5
+ import zipfile
6
+ import gradio as gr
7
+ from eval_exp import evaluate
8
+ from datetime import datetime
9
+ from apscheduler.schedulers.background import BackgroundScheduler
10
+
11
+
12
+ def load_splits():
13
+ splits_dir = "chinatravel/evaluation/default_splits"
14
+ splits = []
15
+ for filename in os.listdir(splits_dir):
16
+ if filename.endswith(".txt"):
17
+ splits.append(filename.replace(".txt", ""))
18
+ return splits
19
+
20
+
21
+ SPLITS_LIST = load_splits()
22
+ # SUBMIT_DIR = "./submissions"
23
+ # OUTPUT_DIR = "./outputs"
24
+ SUBMIT_DIR = os.path.abspath("submissions")
25
+ OUTPUT_DIR = os.path.abspath("outputs")
26
+
27
+ shutil.rmtree(SUBMIT_DIR, ignore_errors=True)
28
+ shutil.rmtree(OUTPUT_DIR, ignore_errors=True)
29
+ os.makedirs(SUBMIT_DIR, exist_ok=True)
30
+ os.makedirs(OUTPUT_DIR, exist_ok=True)
31
+ print(f"Submission directory: {SUBMIT_DIR}")
32
+ print(f"Output directory: {OUTPUT_DIR}")
33
+ # clear directories if they already exist
34
+
35
+
36
+
37
+ def clean_old_outputs(folder, keep_hours=24):
38
+ now = time.time()
39
+ for fname in os.listdir(folder):
40
+ fpath = os.path.join(folder, fname)
41
+ if os.path.isfile(fpath) and now - os.path.getmtime(fpath) > keep_hours * 3600:
42
+ os.remove(fpath)
43
+
44
+
45
+ scheduler = BackgroundScheduler()
46
+ scheduler.add_job(lambda: clean_old_outputs(OUTPUT_DIR), "interval", hours=6)
47
+ scheduler.start()
48
+
49
+
50
+ class Arguments:
51
+ def __init__(self, splits, result_dir):
52
+ self.splits = splits
53
+ self.result_dir = result_dir
54
+
55
+
56
+ def handle_submission(zip_file, dataset_choice):
57
+ if zip_file is None:
58
+ # yield "❌ 请上传 zip 文件!", 0, 0, 0, None
59
+ yield "❌ Please upload a zip file!", 0, 0, 0, None
60
+ return
61
+
62
+ shutil.rmtree(SUBMIT_DIR, ignore_errors=True)
63
+ os.makedirs(SUBMIT_DIR, exist_ok=True)
64
+
65
+ with zipfile.ZipFile(zip_file.name, "r") as zip_ref:
66
+ # print(f"正在解压缩 {zip_file.name} 到 {SUBMIT_DIR}...")
67
+ print(f"Extracting {zip_file.name} to {SUBMIT_DIR}...")
68
+ zip_ref.extractall(SUBMIT_DIR)
69
+
70
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
71
+ print(f"Submission dir: {SUBMIT_DIR}")
72
+ print(os.path.splitext(zip_file.name))
73
+ unzipped_dir = os.path.join(
74
+ SUBMIT_DIR, os.path.basename(zip_file.name).replace(".zip", "")
75
+ )
76
+ print(f"Unzipped directory: {unzipped_dir}")
77
+ output_path = os.path.join(OUTPUT_DIR, f"result_main_{timestamp}.json")
78
+ args = Arguments(splits=dataset_choice, result_dir=unzipped_dir)
79
+
80
+ try:
81
+ # yield "🚀 开始测评...", 0, 0, 0, None
82
+ yield "🚀 Starting evaluation...", 0, 0, 0, None
83
+
84
+ result = {}
85
+ for progress in evaluate(args, result):
86
+ stage = progress.get("stage", "")
87
+ progress_value = progress.get("progress", 0)
88
+
89
+ if stage == "schema":
90
+ # yield "Schema 阶段测评中...", progress_value, 0, 0, None
91
+ yield "Schema evaluation in progress...", 100, progress_value, 0, None
92
+ elif stage == "commonsense":
93
+ # yield "Commonsense 阶段测评中...", 100, progress_value, 0, None
94
+ yield "Commonsense evaluation in progress...", 100, 100, progress_value, None
95
+ elif stage == "logic":
96
+ # yield "Logic 阶段测评中...", 100, 100, progress_value, None
97
+ yield "Logic evaluation in progress...", 100, 100, 100, None
98
+ elif stage == "final":
99
+ result.update(progress.get("result", {}))
100
+ # yield "测评完成,正在保存结果...", 100, 100, 100, None
101
+ yield "Evaluation completed, saving results...", 100, 100, 100, None
102
+
103
+ # 保存结果到文件
104
+ with open(output_path, "w", encoding="utf-8") as f:
105
+ json.dump(result, f, ensure_ascii=False, indent=4)
106
+
107
+ # 在测评完成后更新结果文件的值和可见性
108
+ result_file.value = output_path
109
+ result_file.visible = True
110
+ # yield "✅ 测评完成!", 100, 100, 100, output_path
111
+ yield "✅ Evaluation completed!", 100, 100, 100, output_path
112
+
113
+ except Exception as e:
114
+ import traceback
115
+
116
+ traceback.print_exc()
117
+ # yield f"❌ 测评异常:{e}", 0, 0, 0, None
118
+ yield f"❌ Evaluation error: {e}", 0, 0, 0, None
119
+
120
+
121
+ with gr.Blocks() as demo:
122
+ # gr.Markdown("# 📊 ChinaTravel 模型测评")
123
+ gr.Markdown(
124
+ "# 📊 ChinaTravel Benchmark Evaluation"
125
+ )
126
+
127
+ # with gr.Row():
128
+ # zip_input = gr.File(label="上传模型预测 zip 文件", file_types=[".zip"])
129
+ # dataset_choice = gr.Radio(
130
+ # SPLITS_LIST, label="选择评估数据集", value="validation"
131
+ # )
132
+ zip_input = gr.File(label="Upload zip file of results", file_types=[".zip"])
133
+ dataset_choice = gr.Radio(
134
+ SPLITS_LIST, label="Select evaluation dataset", value="validation"
135
+ )
136
+
137
+
138
+ # submit_btn = gr.Button("开始测评")
139
+ submit_btn = gr.Button("Start Evaluation")
140
+
141
+
142
+ # 添加三个进度条
143
+ # schema_progress = gr.Slider(
144
+ # label="Schema 阶段进度", minimum=0, maximum=100, value=0, interactive=False
145
+ # )
146
+ # commonsense_progress = gr.Slider(
147
+ # label="Commonsense 阶段进度", minimum=0, maximum=100, value=0, interactive=False
148
+ # )
149
+ # logic_progress = gr.Slider(
150
+ # label="Logic 阶段进度", minimum=0, maximum=100, value=0, interactive=False
151
+ # )
152
+
153
+ schema_progress = gr.Slider(
154
+ label="Schema Stage Progress", minimum=0, maximum=100, value=0, interactive=False
155
+ )
156
+ commonsense_progress = gr.Slider(
157
+ label="Commonsense Stage Progress", minimum=0, maximum=100, value=0, interactive=False
158
+ )
159
+ logic_progress = gr.Slider(
160
+ label="Logic Stage Progress", minimum=0, maximum=100, value=0, interactive=False
161
+ )
162
+ output_msg = gr.Markdown()
163
+ # result_file = gr.File(label="结果文件下载") # , visible=False)
164
+ result_file = gr.File(label="Result File Download")
165
+
166
+ submit_btn.click(
167
+ handle_submission,
168
+ inputs=[zip_input, dataset_choice],
169
+ outputs=[
170
+ output_msg,
171
+ schema_progress,
172
+ commonsense_progress,
173
+ logic_progress,
174
+ result_file,
175
+ ],
176
+ )
177
+
178
+ demo.launch(debug=True)