Update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,7 @@ import re
|
|
| 2 |
import requests
|
| 3 |
import pandas as pd
|
| 4 |
import torch
|
| 5 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 6 |
import json
|
| 7 |
import logging
|
| 8 |
import time
|
|
@@ -77,7 +77,6 @@ class GAIAEvaluator:
|
|
| 77 |
|
| 78 |
# Обработка вопросов
|
| 79 |
answers = []
|
| 80 |
-
correct = 0
|
| 81 |
|
| 82 |
for i, q in enumerate(questions):
|
| 83 |
task_id = q.get("task_id", f"task_{i}")
|
|
@@ -112,7 +111,7 @@ class GAIAEvaluator:
|
|
| 112 |
except Exception as e:
|
| 113 |
return f"Connection error: {str(e)}"
|
| 114 |
|
| 115 |
-
def _submit_answers(self, username: str, agent_code: str, answers: list)
|
| 116 |
"""Отправка ответов на сервер"""
|
| 117 |
try:
|
| 118 |
payload = {
|
|
@@ -150,4 +149,8 @@ if __name__ == "__main__":
|
|
| 150 |
elapsed = time.time() - start_time
|
| 151 |
logger.info(f"Оценка завершена за {elapsed:.1f} сек")
|
| 152 |
logger.info(f"Результат: {score}/{total} правильных ответов")
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import requests
|
| 3 |
import pandas as pd
|
| 4 |
import torch
|
| 5 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 6 |
import json
|
| 7 |
import logging
|
| 8 |
import time
|
|
|
|
| 77 |
|
| 78 |
# Обработка вопросов
|
| 79 |
answers = []
|
|
|
|
| 80 |
|
| 81 |
for i, q in enumerate(questions):
|
| 82 |
task_id = q.get("task_id", f"task_{i}")
|
|
|
|
| 111 |
except Exception as e:
|
| 112 |
return f"Connection error: {str(e)}"
|
| 113 |
|
| 114 |
+
def _submit_answers(self, username: str, agent_code: str, answers: list):
|
| 115 |
"""Отправка ответов на сервер"""
|
| 116 |
try:
|
| 117 |
payload = {
|
|
|
|
| 149 |
elapsed = time.time() - start_time
|
| 150 |
logger.info(f"Оценка завершена за {elapsed:.1f} сек")
|
| 151 |
logger.info(f"Результат: {score}/{total} правильных ответов")
|
| 152 |
+
|
| 153 |
+
if total > 0:
|
| 154 |
+
logger.info(f"Точность: {score/total*100:.1f}%")
|
| 155 |
+
else:
|
| 156 |
+
logger.error("Не удалось обработать ни одного вопроса")
|