Spaces:
Sleeping
Sleeping
“pangjh3”
commited on
Commit
·
f652754
1
Parent(s):
4126a18
modified: src/about.py
Browse filesmodified: src/oss/oss_submission_handler.py
modified: src/submission/submit.py
- src/about.py +2 -1
- src/oss/oss_submission_handler.py +12 -17
- src/submission/submit.py +11 -13
src/about.py
CHANGED
|
@@ -92,7 +92,8 @@ Results can be submitted as evaluation outputs in JSON format. Each submission s
|
|
| 92 |
|
| 93 |
### Required JSON Format:
|
| 94 |
```json
|
| 95 |
-
{
|
|
|
|
| 96 |
"submission_org": "Your Organization",
|
| 97 |
"submission_email": "contact@example.com",
|
| 98 |
"predictions": [
|
|
|
|
| 92 |
|
| 93 |
### Required JSON Format:
|
| 94 |
```json
|
| 95 |
+
{
|
| 96 |
+
"model_name": "Your Model Name",
|
| 97 |
"submission_org": "Your Organization",
|
| 98 |
"submission_email": "contact@example.com",
|
| 99 |
"predictions": [
|
src/oss/oss_submission_handler.py
CHANGED
|
@@ -90,8 +90,14 @@ class OSSSubmissionHandler:
|
|
| 90 |
def generate_submission_filename(self, submission_data: Dict[str, Any]) -> str:
|
| 91 |
"""生成提交文件名"""
|
| 92 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
org_name = submission_data["submission_org"].replace(" ", "_").replace("/", "_").replace("\\", "_")
|
| 94 |
-
|
|
|
|
|
|
|
| 95 |
|
| 96 |
def upload_to_oss(self, submission_data: Dict[str, Any], filename: str) -> Tuple[bool, str]:
|
| 97 |
"""上传提交文件到OSS"""
|
|
@@ -174,23 +180,12 @@ class OSSSubmissionHandler:
|
|
| 174 |
success_msg = self.format_success(f"""
|
| 175 |
🎉 <strong>Submission successful!</strong><br><br>
|
| 176 |
📋 <strong>Submission Information:</strong><br>
|
| 177 |
-
•
|
| 178 |
-
•
|
| 179 |
-
•
|
| 180 |
-
•
|
| 181 |
-
🚀 <strong>Storage Location:</strong><br>
|
| 182 |
-
{result}<br><br>
|
| 183 |
⚡ <strong>Evaluation Status:</strong><br>
|
| 184 |
-
Your submission has been successfully uploaded to cloud storage
|
| 185 |
-
⏳ <strong>Evaluation Process:</strong><br>
|
| 186 |
-
1. 🔍 System automatically detects new submission<br>
|
| 187 |
-
2. ⬇️ Downloads and validates submission format<br>
|
| 188 |
-
3. 🔬 Performs comprehensive evaluation using LLM-as-Judge<br>
|
| 189 |
-
4. 📊 Calculates accuracy for each subject and overall<br>
|
| 190 |
-
5. 🏆 Automatically updates to leaderboard<br><br>
|
| 191 |
-
🕐 <strong>Estimated Time:</strong><br>
|
| 192 |
-
Evaluation completion time is approximately 5-15 minutes, depending on current queue length.<br>
|
| 193 |
-
Please refresh the leaderboard later to view results.<br><br>
|
| 194 |
🧪 Thank you for participating in the ATLAS scientific reasoning benchmark!
|
| 195 |
""")
|
| 196 |
|
|
|
|
| 90 |
def generate_submission_filename(self, submission_data: Dict[str, Any]) -> str:
|
| 91 |
"""生成提交文件名"""
|
| 92 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 93 |
+
|
| 94 |
+
# 获取模型名和组织名
|
| 95 |
+
model_name = submission_data.get("model_name", "UnknownModel")
|
| 96 |
+
model_name = model_name.replace(" ", "_").replace("/", "_").replace("\\", "_").replace("-", "_")
|
| 97 |
org_name = submission_data["submission_org"].replace(" ", "_").replace("/", "_").replace("\\", "_")
|
| 98 |
+
|
| 99 |
+
# 格式: submission_模型名_组织_时间戳.json
|
| 100 |
+
return f"submission_{model_name}_{org_name}_{timestamp}.json"
|
| 101 |
|
| 102 |
def upload_to_oss(self, submission_data: Dict[str, Any], filename: str) -> Tuple[bool, str]:
|
| 103 |
"""上传提交文件到OSS"""
|
|
|
|
| 180 |
success_msg = self.format_success(f"""
|
| 181 |
🎉 <strong>Submission successful!</strong><br><br>
|
| 182 |
📋 <strong>Submission Information:</strong><br>
|
| 183 |
+
• Organization: {org}<br>
|
| 184 |
+
• Email: {email_addr}<br>
|
| 185 |
+
• Number of predictions: {num_predictions} questions<br>
|
| 186 |
+
• Filename: {filename}<br><br>
|
|
|
|
|
|
|
| 187 |
⚡ <strong>Evaluation Status:</strong><br>
|
| 188 |
+
Your submission has been successfully uploaded to cloud storage.<br><br>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 189 |
🧪 Thank you for participating in the ATLAS scientific reasoning benchmark!
|
| 190 |
""")
|
| 191 |
|
src/submission/submit.py
CHANGED
|
@@ -82,8 +82,14 @@ def save_submission_file(submission_data: Dict[str, Any], submissions_dir: str =
|
|
| 82 |
|
| 83 |
# 生成文件名
|
| 84 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
org_name = submission_data["submission_org"].replace(" ", "_").replace("/", "_").replace("\\", "_")
|
| 86 |
-
|
|
|
|
|
|
|
| 87 |
|
| 88 |
# 完整文件路径
|
| 89 |
file_path = os.path.join(submissions_dir, filename)
|
|
@@ -162,18 +168,10 @@ def process_sage_submission_simple(submission_file, model_name=None, org_name=No
|
|
| 162 |
success_msg = format_success(f"""
|
| 163 |
🎉 <strong>Submission successful!</strong><br><br>
|
| 164 |
📋 <strong>Submission Information:</strong><br>
|
| 165 |
-
•
|
| 166 |
-
•
|
| 167 |
-
•
|
| 168 |
-
•
|
| 169 |
-
🚀 <strong>Storage Status:</strong><br>
|
| 170 |
-
File saved to local storage, awaiting system sync to evaluation environment.<br><br>
|
| 171 |
-
⏳ <strong>Evaluation Process:</strong><br>
|
| 172 |
-
Your submission will be automatically evaluated using LLM-as-Judge, including comprehensive testing of scientific reasoning capabilities.<br>
|
| 173 |
-
Results will appear automatically on the leaderboard after evaluation is complete.<br><br>
|
| 174 |
-
🕐 <strong>Estimated Time:</strong><br>
|
| 175 |
-
• Normal case: 5-15 minutes<br>
|
| 176 |
-
• Sync delay: 15-60 minutes<br><br>
|
| 177 |
🧪 Thank you for participating in the ATLAS scientific reasoning benchmark!
|
| 178 |
""")
|
| 179 |
|
|
|
|
| 82 |
|
| 83 |
# 生成文件名
|
| 84 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 85 |
+
|
| 86 |
+
# 获取模型名和组织名
|
| 87 |
+
model_name = submission_data.get("model_name", "UnknownModel")
|
| 88 |
+
model_name = model_name.replace(" ", "_").replace("/", "_").replace("\\", "_").replace("-", "_")
|
| 89 |
org_name = submission_data["submission_org"].replace(" ", "_").replace("/", "_").replace("\\", "_")
|
| 90 |
+
|
| 91 |
+
# 格式: submission_模型名_组织_时间戳.json
|
| 92 |
+
filename = f"submission_{model_name}_{org_name}_{timestamp}.json"
|
| 93 |
|
| 94 |
# 完整文件路径
|
| 95 |
file_path = os.path.join(submissions_dir, filename)
|
|
|
|
| 168 |
success_msg = format_success(f"""
|
| 169 |
🎉 <strong>Submission successful!</strong><br><br>
|
| 170 |
📋 <strong>Submission Information:</strong><br>
|
| 171 |
+
• Organization: {org}<br>
|
| 172 |
+
• Email: {email_addr}<br>
|
| 173 |
+
• Number of predictions: {num_predictions} questions<br>
|
| 174 |
+
• Filename: {filename}<br><br>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
🧪 Thank you for participating in the ATLAS scientific reasoning benchmark!
|
| 176 |
""")
|
| 177 |
|