TunisianEncodersArena

Runtime error

hamzabouajila commited on Aug 5

Commit

cec147a

1 Parent(s): f73020a

feat: enhance evaluation system and space management

- Add evaluator_runner for continuous evaluation processing
- Reduce evaluation cycle time from 3 to 1 minute
- Implement space auto-restart every 2 minutes
- Add threading for parallel execution
- Improve error handling and logging
- Add pydantic dependency for data validation
- Add result validation in evaluation process
- Clean up imports and remove unused code

Files changed (3) hide show

app.py +6 -5
src/evaluator/evaluate.py +4 -0
src/evaluator/run_evaluator.py +4 -4

app.py CHANGED Viewed

@@ -36,7 +36,8 @@ from src.evaluator.run_evaluator import evaluator_runner
 def restart_space():
     try:
         print("Restarting space...")
-        API.restart_space(repo_id=REPO_ID,token=TOKEN)
     except Exception as e:
         print(f"Error restarting space: {str(e)}")
         try:
@@ -115,7 +116,7 @@ try:
 except Exception as e:
     print(f"\n=== Error during space initialization ===")
     print(f"Error: {str(e)}")
-    # restart_space()
 LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
@@ -238,10 +239,10 @@ with demo:
-thread = threading.Thread(target=evaluator_runner)
-thread.start()
 scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=300)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch()

 def restart_space():
     try:
         print("Restarting space...")
+        space_runtime = API.restart_space(repo_id=REPO_ID,token=TOKEN)
+        print(f"Space restarted successfully: {space_runtime}")
     except Exception as e:
         print(f"Error restarting space: {str(e)}")
         try:
 except Exception as e:
     print(f"\n=== Error during space initialization ===")
     print(f"Error: {str(e)}")
+    restart_space()
 LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=120)
+thread = threading.Thread(target=evaluator_runner)
 scheduler.start()
+thread.start()
 demo.queue(default_concurrency_limit=40).launch()

src/evaluator/evaluate.py CHANGED Viewed

@@ -204,6 +204,10 @@ def process_evaluation_queue():
                             weight_type=eval_entry['weight_type']
                         )
                         print("\n=== Evaluation completed ===")
                         # --- Step 3: Update file with final status and results locally ---

                             weight_type=eval_entry['weight_type']
                         )
+                        for v in eval_result.results.values():
+                            if v is None:
+                                eval_result.error += f"Evaluation failed for {eval_entry['model']}: {v} is None"
                         print("\n=== Evaluation completed ===")
                         # --- Step 3: Update file with final status and results locally ---

src/evaluator/run_evaluator.py CHANGED Viewed

@@ -9,10 +9,10 @@ def evaluator_runner():
     while True:
         try:
             process_evaluation_queue()
-            print("Evaluation queue processed. Sleeping for 3 minutes...")
-            time.sleep(180)  # Sleep for 3 minutes
         except Exception as e:
             print(f"Error in evaluation process: {e}")
-            print("Retrying in 3 minutes...")
-            time.sleep(180)

     while True:
         try:
             process_evaluation_queue()
+            print("Evaluation queue processed. Sleeping for 1 minutes...")
+            time.sleep(60)  # Sleep for 1 minutes
         except Exception as e:
             print(f"Error in evaluation process: {e}")
+            print("Retrying in 1 minutes...")
+            time.sleep(60)