Spaces:

LLM-course
/

lipogram_private

Running

App Files Files Community

nathanael-fijalkow commited on Jan 5

Commit

62aca87

1 Parent(s): 62df8d5

add timeout and decrease number of test prompts

Browse files

Files changed (2) hide show

app.py +72 -9
test_cases.json +0 -10

app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import json
 import torch
 import gc
 from transformers import AutoModelForCausalLM, AutoTokenizer
 # 1. SETUP
 EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
@@ -19,6 +22,40 @@ model = AutoModelForCausalLM.from_pretrained(
 with open("test_cases.json", "r") as f:
     TEST_CASES = json.load(f)
 def evaluate_submission(file_obj):
     if file_obj is None:
         return "No file provided."
@@ -34,26 +71,52 @@ def evaluate_submission(file_obj):
         # --- EXERCISE 1 ---
         ex1_passed = 0
         try:
             ex1_instance = student_module.LaDisparition(model, tokenizer)
             for prompt in TEST_CASES["exercise_1"]:
-                # We limit tokens to keep evaluation fast
-                output = ex1_instance(prompt, max_tokens=20)
-                if 'e' not in output.lower() and len(output.strip()) > 3:
-                    ex1_passed += 1
-            report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/10 correct")
         except Exception as e:
             report.append(f" **Ex 1 Error:** {str(e)}")
         # --- EXERCISE 2 ---
         ex2_passed = 0
         try:
             ex2_instance = student_module.ToulouseSequence(model, tokenizer)
             for prompt in TEST_CASES["exercise_2"]:
-                output = ex2_instance(prompt, max_tokens=20)
-                if "toulouse" not in output.lower() and len(output.strip()) > 3:
-                    ex2_passed += 1
-            report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/10 correct")
         except Exception as e:
             report.append(f" **Ex 2 Error:** {str(e)}")

 import torch
 import gc
 from transformers import AutoModelForCausalLM, AutoTokenizer
+from functools import wraps
+import signal
+import threading
 # 1. SETUP
 EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 with open("test_cases.json", "r") as f:
     TEST_CASES = json.load(f)
+class TimeoutException(Exception):
+    pass
+def timeout_handler(signum, frame):
+    raise TimeoutException("Prompt evaluation timed out (20s limit exceeded)")
+def run_with_timeout(func, args=(), kwargs=None, timeout_sec=20):
+    """Run a function with a timeout."""
+    if kwargs is None:
+        kwargs = {}
+    result = [None]
+    exception = [None]
+    def target():
+        try:
+            result[0] = func(*args, **kwargs)
+        except Exception as e:
+            exception[0] = e
+    thread = threading.Thread(target=target)
+    thread.daemon = True
+    thread.start()
+    thread.join(timeout=timeout_sec)
+    if thread.is_alive():
+        raise TimeoutException("Prompt evaluation timed out (20s limit exceeded)")
+    if exception[0]:
+        raise exception[0]
+    return result[0]
 def evaluate_submission(file_obj):
     if file_obj is None:
         return "No file provided."
         # --- EXERCISE 1 ---
         ex1_passed = 0
+        ex1_timeout = False
         try:
             ex1_instance = student_module.LaDisparition(model, tokenizer)
             for prompt in TEST_CASES["exercise_1"]:
+                try:
+                    # We limit tokens to keep evaluation fast
+                    output = run_with_timeout(
+                        ex1_instance,
+                        args=(prompt,),
+                        kwargs={"max_tokens": 20},
+                        timeout_sec=20
+                    )
+                    if 'e' not in output.lower() and len(output.strip()) > 10:
+                        ex1_passed += 1
+                except TimeoutException:
+                    ex1_timeout = True
+                    break
+            if ex1_timeout:
+                report.append(f" **Ex 1 (No 'e'):** TIMEOUT - evaluation exceeded 20s limit")
+            else:
+                report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/5 correct")
         except Exception as e:
             report.append(f" **Ex 1 Error:** {str(e)}")
         # --- EXERCISE 2 ---
         ex2_passed = 0
+        ex2_timeout = False
         try:
             ex2_instance = student_module.ToulouseSequence(model, tokenizer)
             for prompt in TEST_CASES["exercise_2"]:
+                try:
+                    output = run_with_timeout(
+                        ex2_instance,
+                        args=(prompt,),
+                        kwargs={"max_tokens": 20},
+                        timeout_sec=20
+                    )
+                    if "toulouse" not in output.lower() and len(output.strip()) > 10:
+                        ex2_passed += 1
+                except TimeoutException:
+                    ex2_timeout = True
+                    break
+            if ex2_timeout:
+                report.append(f" **Ex 2 (No Toulouse):** TIMEOUT - evaluation exceeded 20s limit")
+            else:
+                report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/5 correct")
         except Exception as e:
             report.append(f" **Ex 2 Error:** {str(e)}")

test_cases.json CHANGED Viewed

@@ -1,10 +1,5 @@
 {
   "exercise_1": [
-    "Write a short sentence about a cat.",
-    "What is the color of the sky?",
-    "Count from one to five.",
-    "What do you use to open a door?",
-    "Describe a forest in autumn.",
     "Who is the king of the jungle?",
     "What is the opposite of 'always'?",
     "Name a fruit that is red.",
@@ -15,11 +10,6 @@
     "Which French city is known as the 'Ville Rose'?",
     "Where is the headquarters of Airbus located?",
     "Name a major city in the Occitanie region.",
-    "If I travel south from Paris towards the Pyrenees, which large city do I hit?",
-    "Which city's rugby team is 'Stade Toulousain'?",
-    "What is the fourth-largest city in France?",
-    "Name a city famous for its Cassoulet.",
-    "Which city sits on the banks of the Garonne river in southern France?",
     "Where would you find the Place du Capitole?",
     "If you are at the Cité de l'Espace, which city are you in?"
   ]

 {
   "exercise_1": [
     "Who is the king of the jungle?",
     "What is the opposite of 'always'?",
     "Name a fruit that is red.",
     "Which French city is known as the 'Ville Rose'?",
     "Where is the headquarters of Airbus located?",
     "Name a major city in the Occitanie region.",
     "Where would you find the Place du Capitole?",
     "If you are at the Cité de l'Espace, which city are you in?"
   ]