nathanael-fijalkow commited on
Commit
62aca87
·
1 Parent(s): 62df8d5

add timeout and decrease number of test prompts

Browse files
Files changed (2) hide show
  1. app.py +72 -9
  2. test_cases.json +0 -10
app.py CHANGED
@@ -5,6 +5,9 @@ import json
5
  import torch
6
  import gc
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
 
8
 
9
  # 1. SETUP
10
  EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
@@ -19,6 +22,40 @@ model = AutoModelForCausalLM.from_pretrained(
19
  with open("test_cases.json", "r") as f:
20
  TEST_CASES = json.load(f)
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def evaluate_submission(file_obj):
23
  if file_obj is None:
24
  return "No file provided."
@@ -34,26 +71,52 @@ def evaluate_submission(file_obj):
34
 
35
  # --- EXERCISE 1 ---
36
  ex1_passed = 0
 
37
  try:
38
  ex1_instance = student_module.LaDisparition(model, tokenizer)
39
  for prompt in TEST_CASES["exercise_1"]:
40
- # We limit tokens to keep evaluation fast
41
- output = ex1_instance(prompt, max_tokens=20)
42
- if 'e' not in output.lower() and len(output.strip()) > 3:
43
- ex1_passed += 1
44
- report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/10 correct")
 
 
 
 
 
 
 
 
 
 
 
 
45
  except Exception as e:
46
  report.append(f" **Ex 1 Error:** {str(e)}")
47
 
48
  # --- EXERCISE 2 ---
49
  ex2_passed = 0
 
50
  try:
51
  ex2_instance = student_module.ToulouseSequence(model, tokenizer)
52
  for prompt in TEST_CASES["exercise_2"]:
53
- output = ex2_instance(prompt, max_tokens=20)
54
- if "toulouse" not in output.lower() and len(output.strip()) > 3:
55
- ex2_passed += 1
56
- report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/10 correct")
 
 
 
 
 
 
 
 
 
 
 
 
57
  except Exception as e:
58
  report.append(f" **Ex 2 Error:** {str(e)}")
59
 
 
5
  import torch
6
  import gc
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
+ from functools import wraps
9
+ import signal
10
+ import threading
11
 
12
  # 1. SETUP
13
  EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 
22
  with open("test_cases.json", "r") as f:
23
  TEST_CASES = json.load(f)
24
 
25
+ class TimeoutException(Exception):
26
+ pass
27
+
28
+ def timeout_handler(signum, frame):
29
+ raise TimeoutException("Prompt evaluation timed out (20s limit exceeded)")
30
+
31
+ def run_with_timeout(func, args=(), kwargs=None, timeout_sec=20):
32
+ """Run a function with a timeout."""
33
+ if kwargs is None:
34
+ kwargs = {}
35
+
36
+ result = [None]
37
+ exception = [None]
38
+
39
+ def target():
40
+ try:
41
+ result[0] = func(*args, **kwargs)
42
+ except Exception as e:
43
+ exception[0] = e
44
+
45
+ thread = threading.Thread(target=target)
46
+ thread.daemon = True
47
+ thread.start()
48
+ thread.join(timeout=timeout_sec)
49
+
50
+ if thread.is_alive():
51
+ raise TimeoutException("Prompt evaluation timed out (20s limit exceeded)")
52
+
53
+ if exception[0]:
54
+ raise exception[0]
55
+
56
+ return result[0]
57
+
58
+
59
  def evaluate_submission(file_obj):
60
  if file_obj is None:
61
  return "No file provided."
 
71
 
72
  # --- EXERCISE 1 ---
73
  ex1_passed = 0
74
+ ex1_timeout = False
75
  try:
76
  ex1_instance = student_module.LaDisparition(model, tokenizer)
77
  for prompt in TEST_CASES["exercise_1"]:
78
+ try:
79
+ # We limit tokens to keep evaluation fast
80
+ output = run_with_timeout(
81
+ ex1_instance,
82
+ args=(prompt,),
83
+ kwargs={"max_tokens": 20},
84
+ timeout_sec=20
85
+ )
86
+ if 'e' not in output.lower() and len(output.strip()) > 10:
87
+ ex1_passed += 1
88
+ except TimeoutException:
89
+ ex1_timeout = True
90
+ break
91
+ if ex1_timeout:
92
+ report.append(f" **Ex 1 (No 'e'):** TIMEOUT - evaluation exceeded 20s limit")
93
+ else:
94
+ report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/5 correct")
95
  except Exception as e:
96
  report.append(f" **Ex 1 Error:** {str(e)}")
97
 
98
  # --- EXERCISE 2 ---
99
  ex2_passed = 0
100
+ ex2_timeout = False
101
  try:
102
  ex2_instance = student_module.ToulouseSequence(model, tokenizer)
103
  for prompt in TEST_CASES["exercise_2"]:
104
+ try:
105
+ output = run_with_timeout(
106
+ ex2_instance,
107
+ args=(prompt,),
108
+ kwargs={"max_tokens": 20},
109
+ timeout_sec=20
110
+ )
111
+ if "toulouse" not in output.lower() and len(output.strip()) > 10:
112
+ ex2_passed += 1
113
+ except TimeoutException:
114
+ ex2_timeout = True
115
+ break
116
+ if ex2_timeout:
117
+ report.append(f" **Ex 2 (No Toulouse):** TIMEOUT - evaluation exceeded 20s limit")
118
+ else:
119
+ report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/5 correct")
120
  except Exception as e:
121
  report.append(f" **Ex 2 Error:** {str(e)}")
122
 
test_cases.json CHANGED
@@ -1,10 +1,5 @@
1
  {
2
  "exercise_1": [
3
- "Write a short sentence about a cat.",
4
- "What is the color of the sky?",
5
- "Count from one to five.",
6
- "What do you use to open a door?",
7
- "Describe a forest in autumn.",
8
  "Who is the king of the jungle?",
9
  "What is the opposite of 'always'?",
10
  "Name a fruit that is red.",
@@ -15,11 +10,6 @@
15
  "Which French city is known as the 'Ville Rose'?",
16
  "Where is the headquarters of Airbus located?",
17
  "Name a major city in the Occitanie region.",
18
- "If I travel south from Paris towards the Pyrenees, which large city do I hit?",
19
- "Which city's rugby team is 'Stade Toulousain'?",
20
- "What is the fourth-largest city in France?",
21
- "Name a city famous for its Cassoulet.",
22
- "Which city sits on the banks of the Garonne river in southern France?",
23
  "Where would you find the Place du Capitole?",
24
  "If you are at the Cité de l'Espace, which city are you in?"
25
  ]
 
1
  {
2
  "exercise_1": [
 
 
 
 
 
3
  "Who is the king of the jungle?",
4
  "What is the opposite of 'always'?",
5
  "Name a fruit that is red.",
 
10
  "Which French city is known as the 'Ville Rose'?",
11
  "Where is the headquarters of Airbus located?",
12
  "Name a major city in the Occitanie region.",
 
 
 
 
 
13
  "Where would you find the Place du Capitole?",
14
  "If you are at the Cité de l'Espace, which city are you in?"
15
  ]