nathanael-fijalkow commited on
Commit
62df8d5
·
1 Parent(s): 28ff637
Files changed (2) hide show
  1. app.py +48 -32
  2. requirements.txt +2 -1
app.py CHANGED
@@ -3,11 +3,11 @@ import importlib.util
3
  import os
4
  import json
5
  import torch
 
6
  from transformers import AutoModelForCausalLM, AutoTokenizer
7
 
8
- # --- EVALUATION MODEL: SmolLM2 ---
9
  EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
10
-
11
  tokenizer = AutoTokenizer.from_pretrained(EVAL_MODEL)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  EVAL_MODEL,
@@ -15,49 +15,65 @@ model = AutoModelForCausalLM.from_pretrained(
15
  device_map="auto"
16
  )
17
 
 
18
  with open("test_cases.json", "r") as f:
19
  TEST_CASES = json.load(f)
20
 
21
  def evaluate_submission(file_obj):
22
- if file_obj is None: return "Missing file."
23
-
 
24
  try:
25
- spec = importlib.util.spec_from_file_location("student_code", file_obj.name)
26
- module = importlib.util.module_from_spec(spec)
27
- spec.loader.exec_module(module)
 
 
28
 
29
- feedback = [f"### Evaluated with {EVAL_MODEL}"]
30
 
31
  # --- EXERCISE 1 ---
32
- ex1_class = getattr(module, "LaDisparition")(model, tokenizer)
33
  ex1_passed = 0
34
- for prompt in TEST_CASES["exercise_1"]:
35
- output = ex1_class(prompt, max_tokens=25)
36
- if 'e' not in output.lower() and len(output.strip()) > 5:
37
- ex1_passed += 1
38
-
39
- ex1_score = (ex1_passed / 10) * 100
40
- feedback.append(f"**Ex 1 (No 'e'):** {ex1_score}% ({ex1_passed}/10)")
 
 
 
41
 
42
  # --- EXERCISE 2 ---
43
- ex2_class = getattr(module, "ToulouseSequence")(model, tokenizer)
44
  ex2_passed = 0
45
- for prompt in TEST_CASES["exercise_2"]:
46
- output = ex2_class(prompt, max_tokens=25)
47
- if "toulouse" not in output.lower() and len(output.strip()) > 5:
48
- ex2_passed += 1
49
-
50
- ex2_score = (ex2_passed / 10) * 100
51
- feedback.append(f"**Ex 2 (No Toulouse):** {ex2_score}% ({ex2_passed}/10)")
 
 
52
 
53
- total = (ex1_score + ex2_score) / 2
54
- return f"# Final Score: {total}%\n\n" + "\n".join(feedback)
 
 
 
 
55
 
56
  except Exception as e:
57
- return f"Submission Error: {str(e)}"
 
 
 
 
 
 
 
 
58
 
59
- demo = gr.Interface(fn=evaluate_submission, inputs=gr.File(), outputs="markdown")
60
- demo.queue(
61
- default_concurrency_limit=2, # Processes 2 students at a time
62
- max_size=50 # Holds up to 50 students in a waiting line
63
- ).launch()
 
3
  import os
4
  import json
5
  import torch
6
+ import gc
7
  from transformers import AutoModelForCausalLM, AutoTokenizer
8
 
9
+ # 1. SETUP
10
  EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 
11
  tokenizer = AutoTokenizer.from_pretrained(EVAL_MODEL)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  EVAL_MODEL,
 
15
  device_map="auto"
16
  )
17
 
18
+ # Load secret test cases
19
  with open("test_cases.json", "r") as f:
20
  TEST_CASES = json.load(f)
21
 
22
  def evaluate_submission(file_obj):
23
+ if file_obj is None:
24
+ return "No file provided."
25
+
26
  try:
27
+ # 2. ISOLATED LOADING
28
+ # We use a unique name for each import to avoid namespace collisions
29
+ spec = importlib.util.spec_from_file_location("student_module", file_obj.name)
30
+ student_module = importlib.util.module_from_spec(spec)
31
+ spec.loader.exec_module(student_module)
32
 
33
+ report = [f"## Results for {EVAL_MODEL}\n"]
34
 
35
  # --- EXERCISE 1 ---
 
36
  ex1_passed = 0
37
+ try:
38
+ ex1_instance = student_module.LaDisparition(model, tokenizer)
39
+ for prompt in TEST_CASES["exercise_1"]:
40
+ # We limit tokens to keep evaluation fast
41
+ output = ex1_instance(prompt, max_tokens=20)
42
+ if 'e' not in output.lower() and len(output.strip()) > 3:
43
+ ex1_passed += 1
44
+ report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/10 correct")
45
+ except Exception as e:
46
+ report.append(f" **Ex 1 Error:** {str(e)}")
47
 
48
  # --- EXERCISE 2 ---
 
49
  ex2_passed = 0
50
+ try:
51
+ ex2_instance = student_module.ToulouseSequence(model, tokenizer)
52
+ for prompt in TEST_CASES["exercise_2"]:
53
+ output = ex2_instance(prompt, max_tokens=20)
54
+ if "toulouse" not in output.lower() and len(output.strip()) > 3:
55
+ ex2_passed += 1
56
+ report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/10 correct")
57
+ except Exception as e:
58
+ report.append(f" **Ex 2 Error:** {str(e)}")
59
 
60
+ # 3. CLEANUP (Crucial for 200 students!)
61
+ del student_module
62
+ gc.collect()
63
+ torch.cuda.empty_cache()
64
+
65
+ return "\n".join(report)
66
 
67
  except Exception as e:
68
+ return f"### System Error during import:\n{str(e)}"
69
+
70
+ # 4. LAUNCH WITH CONCURRENCY CONTROL
71
+ demo = gr.Interface(
72
+ fn=evaluate_submission,
73
+ inputs=gr.File(label="Submission File"),
74
+ outputs="markdown",
75
+ api_name="predict" # This matches the 'api_name' used in the Public Portal
76
+ )
77
 
78
+ # We limit concurrency to 1 or 2 so the GPU doesn't OOM with 200 students
79
+ demo.queue(default_concurrency_limit=1).launch()
 
 
 
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torch
2
  transformers
3
- accelerate
 
 
1
  torch
2
  transformers
3
+ accelerate
4
+ gradio