hamzabouajila commited on
Commit
cec147a
·
1 Parent(s): f73020a

feat: enhance evaluation system and space management

Browse files

- Add evaluator_runner for continuous evaluation processing
- Reduce evaluation cycle time from 3 to 1 minute
- Implement space auto-restart every 2 minutes
- Add threading for parallel execution
- Improve error handling and logging
- Add pydantic dependency for data validation
- Add result validation in evaluation process
- Clean up imports and remove unused code

app.py CHANGED
@@ -36,7 +36,8 @@ from src.evaluator.run_evaluator import evaluator_runner
36
  def restart_space():
37
  try:
38
  print("Restarting space...")
39
- API.restart_space(repo_id=REPO_ID,token=TOKEN)
 
40
  except Exception as e:
41
  print(f"Error restarting space: {str(e)}")
42
  try:
@@ -115,7 +116,7 @@ try:
115
  except Exception as e:
116
  print(f"\n=== Error during space initialization ===")
117
  print(f"Error: {str(e)}")
118
- # restart_space()
119
 
120
 
121
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
@@ -238,10 +239,10 @@ with demo:
238
 
239
 
240
 
241
- thread = threading.Thread(target=evaluator_runner)
242
- thread.start()
243
 
244
  scheduler = BackgroundScheduler()
245
- scheduler.add_job(restart_space, "interval", seconds=300)
 
246
  scheduler.start()
 
247
  demo.queue(default_concurrency_limit=40).launch()
 
36
  def restart_space():
37
  try:
38
  print("Restarting space...")
39
+ space_runtime = API.restart_space(repo_id=REPO_ID,token=TOKEN)
40
+ print(f"Space restarted successfully: {space_runtime}")
41
  except Exception as e:
42
  print(f"Error restarting space: {str(e)}")
43
  try:
 
116
  except Exception as e:
117
  print(f"\n=== Error during space initialization ===")
118
  print(f"Error: {str(e)}")
119
+ restart_space()
120
 
121
 
122
  LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
 
239
 
240
 
241
 
 
 
242
 
243
  scheduler = BackgroundScheduler()
244
+ scheduler.add_job(restart_space, "interval", seconds=120)
245
+ thread = threading.Thread(target=evaluator_runner)
246
  scheduler.start()
247
+ thread.start()
248
  demo.queue(default_concurrency_limit=40).launch()
src/evaluator/evaluate.py CHANGED
@@ -204,6 +204,10 @@ def process_evaluation_queue():
204
  weight_type=eval_entry['weight_type']
205
  )
206
 
 
 
 
 
207
  print("\n=== Evaluation completed ===")
208
 
209
  # --- Step 3: Update file with final status and results locally ---
 
204
  weight_type=eval_entry['weight_type']
205
  )
206
 
207
+ for v in eval_result.results.values():
208
+ if v is None:
209
+ eval_result.error += f"Evaluation failed for {eval_entry['model']}: {v} is None"
210
+
211
  print("\n=== Evaluation completed ===")
212
 
213
  # --- Step 3: Update file with final status and results locally ---
src/evaluator/run_evaluator.py CHANGED
@@ -9,10 +9,10 @@ def evaluator_runner():
9
  while True:
10
  try:
11
  process_evaluation_queue()
12
- print("Evaluation queue processed. Sleeping for 3 minutes...")
13
- time.sleep(180) # Sleep for 3 minutes
14
  except Exception as e:
15
  print(f"Error in evaluation process: {e}")
16
- print("Retrying in 3 minutes...")
17
- time.sleep(180)
18
 
 
9
  while True:
10
  try:
11
  process_evaluation_queue()
12
+ print("Evaluation queue processed. Sleeping for 1 minutes...")
13
+ time.sleep(60) # Sleep for 1 minutes
14
  except Exception as e:
15
  print(f"Error in evaluation process: {e}")
16
+ print("Retrying in 1 minutes...")
17
+ time.sleep(60)
18