Spaces:
Runtime error
Runtime error
Commit
·
cec147a
1
Parent(s):
f73020a
feat: enhance evaluation system and space management
Browse files- Add evaluator_runner for continuous evaluation processing
- Reduce evaluation cycle time from 3 to 1 minute
- Implement space auto-restart every 2 minutes
- Add threading for parallel execution
- Improve error handling and logging
- Add pydantic dependency for data validation
- Add result validation in evaluation process
- Clean up imports and remove unused code
- app.py +6 -5
- src/evaluator/evaluate.py +4 -0
- src/evaluator/run_evaluator.py +4 -4
app.py
CHANGED
|
@@ -36,7 +36,8 @@ from src.evaluator.run_evaluator import evaluator_runner
|
|
| 36 |
def restart_space():
|
| 37 |
try:
|
| 38 |
print("Restarting space...")
|
| 39 |
-
API.restart_space(repo_id=REPO_ID,token=TOKEN)
|
|
|
|
| 40 |
except Exception as e:
|
| 41 |
print(f"Error restarting space: {str(e)}")
|
| 42 |
try:
|
|
@@ -115,7 +116,7 @@ try:
|
|
| 115 |
except Exception as e:
|
| 116 |
print(f"\n=== Error during space initialization ===")
|
| 117 |
print(f"Error: {str(e)}")
|
| 118 |
-
|
| 119 |
|
| 120 |
|
| 121 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
|
@@ -238,10 +239,10 @@ with demo:
|
|
| 238 |
|
| 239 |
|
| 240 |
|
| 241 |
-
thread = threading.Thread(target=evaluator_runner)
|
| 242 |
-
thread.start()
|
| 243 |
|
| 244 |
scheduler = BackgroundScheduler()
|
| 245 |
-
scheduler.add_job(restart_space, "interval", seconds=
|
|
|
|
| 246 |
scheduler.start()
|
|
|
|
| 247 |
demo.queue(default_concurrency_limit=40).launch()
|
|
|
|
| 36 |
def restart_space():
|
| 37 |
try:
|
| 38 |
print("Restarting space...")
|
| 39 |
+
space_runtime = API.restart_space(repo_id=REPO_ID,token=TOKEN)
|
| 40 |
+
print(f"Space restarted successfully: {space_runtime}")
|
| 41 |
except Exception as e:
|
| 42 |
print(f"Error restarting space: {str(e)}")
|
| 43 |
try:
|
|
|
|
| 116 |
except Exception as e:
|
| 117 |
print(f"\n=== Error during space initialization ===")
|
| 118 |
print(f"Error: {str(e)}")
|
| 119 |
+
restart_space()
|
| 120 |
|
| 121 |
|
| 122 |
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
|
|
|
|
| 239 |
|
| 240 |
|
| 241 |
|
|
|
|
|
|
|
| 242 |
|
| 243 |
scheduler = BackgroundScheduler()
|
| 244 |
+
scheduler.add_job(restart_space, "interval", seconds=120)
|
| 245 |
+
thread = threading.Thread(target=evaluator_runner)
|
| 246 |
scheduler.start()
|
| 247 |
+
thread.start()
|
| 248 |
demo.queue(default_concurrency_limit=40).launch()
|
src/evaluator/evaluate.py
CHANGED
|
@@ -204,6 +204,10 @@ def process_evaluation_queue():
|
|
| 204 |
weight_type=eval_entry['weight_type']
|
| 205 |
)
|
| 206 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
print("\n=== Evaluation completed ===")
|
| 208 |
|
| 209 |
# --- Step 3: Update file with final status and results locally ---
|
|
|
|
| 204 |
weight_type=eval_entry['weight_type']
|
| 205 |
)
|
| 206 |
|
| 207 |
+
for v in eval_result.results.values():
|
| 208 |
+
if v is None:
|
| 209 |
+
eval_result.error += f"Evaluation failed for {eval_entry['model']}: {v} is None"
|
| 210 |
+
|
| 211 |
print("\n=== Evaluation completed ===")
|
| 212 |
|
| 213 |
# --- Step 3: Update file with final status and results locally ---
|
src/evaluator/run_evaluator.py
CHANGED
|
@@ -9,10 +9,10 @@ def evaluator_runner():
|
|
| 9 |
while True:
|
| 10 |
try:
|
| 11 |
process_evaluation_queue()
|
| 12 |
-
print("Evaluation queue processed. Sleeping for
|
| 13 |
-
time.sleep(
|
| 14 |
except Exception as e:
|
| 15 |
print(f"Error in evaluation process: {e}")
|
| 16 |
-
print("Retrying in
|
| 17 |
-
time.sleep(
|
| 18 |
|
|
|
|
| 9 |
while True:
|
| 10 |
try:
|
| 11 |
process_evaluation_queue()
|
| 12 |
+
print("Evaluation queue processed. Sleeping for 1 minutes...")
|
| 13 |
+
time.sleep(60) # Sleep for 1 minutes
|
| 14 |
except Exception as e:
|
| 15 |
print(f"Error in evaluation process: {e}")
|
| 16 |
+
print("Retrying in 1 minutes...")
|
| 17 |
+
time.sleep(60)
|
| 18 |
|