Spaces:
Running
Running
Commit ·
62aca87
1
Parent(s): 62df8d5
add timeout and decrease number of test prompts
Browse files- app.py +72 -9
- test_cases.json +0 -10
app.py
CHANGED
|
@@ -5,6 +5,9 @@ import json
|
|
| 5 |
import torch
|
| 6 |
import gc
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# 1. SETUP
|
| 10 |
EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
|
|
@@ -19,6 +22,40 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 19 |
with open("test_cases.json", "r") as f:
|
| 20 |
TEST_CASES = json.load(f)
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
def evaluate_submission(file_obj):
|
| 23 |
if file_obj is None:
|
| 24 |
return "No file provided."
|
|
@@ -34,26 +71,52 @@ def evaluate_submission(file_obj):
|
|
| 34 |
|
| 35 |
# --- EXERCISE 1 ---
|
| 36 |
ex1_passed = 0
|
|
|
|
| 37 |
try:
|
| 38 |
ex1_instance = student_module.LaDisparition(model, tokenizer)
|
| 39 |
for prompt in TEST_CASES["exercise_1"]:
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
except Exception as e:
|
| 46 |
report.append(f" **Ex 1 Error:** {str(e)}")
|
| 47 |
|
| 48 |
# --- EXERCISE 2 ---
|
| 49 |
ex2_passed = 0
|
|
|
|
| 50 |
try:
|
| 51 |
ex2_instance = student_module.ToulouseSequence(model, tokenizer)
|
| 52 |
for prompt in TEST_CASES["exercise_2"]:
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
except Exception as e:
|
| 58 |
report.append(f" **Ex 2 Error:** {str(e)}")
|
| 59 |
|
|
|
|
| 5 |
import torch
|
| 6 |
import gc
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
+
from functools import wraps
|
| 9 |
+
import signal
|
| 10 |
+
import threading
|
| 11 |
|
| 12 |
# 1. SETUP
|
| 13 |
EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
|
|
|
|
| 22 |
with open("test_cases.json", "r") as f:
|
| 23 |
TEST_CASES = json.load(f)
|
| 24 |
|
| 25 |
+
class TimeoutException(Exception):
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
def timeout_handler(signum, frame):
|
| 29 |
+
raise TimeoutException("Prompt evaluation timed out (20s limit exceeded)")
|
| 30 |
+
|
| 31 |
+
def run_with_timeout(func, args=(), kwargs=None, timeout_sec=20):
|
| 32 |
+
"""Run a function with a timeout."""
|
| 33 |
+
if kwargs is None:
|
| 34 |
+
kwargs = {}
|
| 35 |
+
|
| 36 |
+
result = [None]
|
| 37 |
+
exception = [None]
|
| 38 |
+
|
| 39 |
+
def target():
|
| 40 |
+
try:
|
| 41 |
+
result[0] = func(*args, **kwargs)
|
| 42 |
+
except Exception as e:
|
| 43 |
+
exception[0] = e
|
| 44 |
+
|
| 45 |
+
thread = threading.Thread(target=target)
|
| 46 |
+
thread.daemon = True
|
| 47 |
+
thread.start()
|
| 48 |
+
thread.join(timeout=timeout_sec)
|
| 49 |
+
|
| 50 |
+
if thread.is_alive():
|
| 51 |
+
raise TimeoutException("Prompt evaluation timed out (20s limit exceeded)")
|
| 52 |
+
|
| 53 |
+
if exception[0]:
|
| 54 |
+
raise exception[0]
|
| 55 |
+
|
| 56 |
+
return result[0]
|
| 57 |
+
|
| 58 |
+
|
| 59 |
def evaluate_submission(file_obj):
|
| 60 |
if file_obj is None:
|
| 61 |
return "No file provided."
|
|
|
|
| 71 |
|
| 72 |
# --- EXERCISE 1 ---
|
| 73 |
ex1_passed = 0
|
| 74 |
+
ex1_timeout = False
|
| 75 |
try:
|
| 76 |
ex1_instance = student_module.LaDisparition(model, tokenizer)
|
| 77 |
for prompt in TEST_CASES["exercise_1"]:
|
| 78 |
+
try:
|
| 79 |
+
# We limit tokens to keep evaluation fast
|
| 80 |
+
output = run_with_timeout(
|
| 81 |
+
ex1_instance,
|
| 82 |
+
args=(prompt,),
|
| 83 |
+
kwargs={"max_tokens": 20},
|
| 84 |
+
timeout_sec=20
|
| 85 |
+
)
|
| 86 |
+
if 'e' not in output.lower() and len(output.strip()) > 10:
|
| 87 |
+
ex1_passed += 1
|
| 88 |
+
except TimeoutException:
|
| 89 |
+
ex1_timeout = True
|
| 90 |
+
break
|
| 91 |
+
if ex1_timeout:
|
| 92 |
+
report.append(f" **Ex 1 (No 'e'):** TIMEOUT - evaluation exceeded 20s limit")
|
| 93 |
+
else:
|
| 94 |
+
report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/5 correct")
|
| 95 |
except Exception as e:
|
| 96 |
report.append(f" **Ex 1 Error:** {str(e)}")
|
| 97 |
|
| 98 |
# --- EXERCISE 2 ---
|
| 99 |
ex2_passed = 0
|
| 100 |
+
ex2_timeout = False
|
| 101 |
try:
|
| 102 |
ex2_instance = student_module.ToulouseSequence(model, tokenizer)
|
| 103 |
for prompt in TEST_CASES["exercise_2"]:
|
| 104 |
+
try:
|
| 105 |
+
output = run_with_timeout(
|
| 106 |
+
ex2_instance,
|
| 107 |
+
args=(prompt,),
|
| 108 |
+
kwargs={"max_tokens": 20},
|
| 109 |
+
timeout_sec=20
|
| 110 |
+
)
|
| 111 |
+
if "toulouse" not in output.lower() and len(output.strip()) > 10:
|
| 112 |
+
ex2_passed += 1
|
| 113 |
+
except TimeoutException:
|
| 114 |
+
ex2_timeout = True
|
| 115 |
+
break
|
| 116 |
+
if ex2_timeout:
|
| 117 |
+
report.append(f" **Ex 2 (No Toulouse):** TIMEOUT - evaluation exceeded 20s limit")
|
| 118 |
+
else:
|
| 119 |
+
report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/5 correct")
|
| 120 |
except Exception as e:
|
| 121 |
report.append(f" **Ex 2 Error:** {str(e)}")
|
| 122 |
|
test_cases.json
CHANGED
|
@@ -1,10 +1,5 @@
|
|
| 1 |
{
|
| 2 |
"exercise_1": [
|
| 3 |
-
"Write a short sentence about a cat.",
|
| 4 |
-
"What is the color of the sky?",
|
| 5 |
-
"Count from one to five.",
|
| 6 |
-
"What do you use to open a door?",
|
| 7 |
-
"Describe a forest in autumn.",
|
| 8 |
"Who is the king of the jungle?",
|
| 9 |
"What is the opposite of 'always'?",
|
| 10 |
"Name a fruit that is red.",
|
|
@@ -15,11 +10,6 @@
|
|
| 15 |
"Which French city is known as the 'Ville Rose'?",
|
| 16 |
"Where is the headquarters of Airbus located?",
|
| 17 |
"Name a major city in the Occitanie region.",
|
| 18 |
-
"If I travel south from Paris towards the Pyrenees, which large city do I hit?",
|
| 19 |
-
"Which city's rugby team is 'Stade Toulousain'?",
|
| 20 |
-
"What is the fourth-largest city in France?",
|
| 21 |
-
"Name a city famous for its Cassoulet.",
|
| 22 |
-
"Which city sits on the banks of the Garonne river in southern France?",
|
| 23 |
"Where would you find the Place du Capitole?",
|
| 24 |
"If you are at the Cité de l'Espace, which city are you in?"
|
| 25 |
]
|
|
|
|
| 1 |
{
|
| 2 |
"exercise_1": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
"Who is the king of the jungle?",
|
| 4 |
"What is the opposite of 'always'?",
|
| 5 |
"Name a fruit that is red.",
|
|
|
|
| 10 |
"Which French city is known as the 'Ville Rose'?",
|
| 11 |
"Where is the headquarters of Airbus located?",
|
| 12 |
"Name a major city in the Occitanie region.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
"Where would you find the Place du Capitole?",
|
| 14 |
"If you are at the Cité de l'Espace, which city are you in?"
|
| 15 |
]
|