Spaces:

LLM-course
/

lipogram_private

Sleeping

App Files Files Community

nathanael-fijalkow commited on Jan 5

Commit

62df8d5

1 Parent(s): 28ff637

fix

Browse files

Files changed (2) hide show

app.py +48 -32
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -3,11 +3,11 @@ import importlib.util
 import os
 import json
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# --- EVALUATION MODEL: SmolLM2 ---
 EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(EVAL_MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     EVAL_MODEL,
@@ -15,49 +15,65 @@ model = AutoModelForCausalLM.from_pretrained(
     device_map="auto"
 )
 with open("test_cases.json", "r") as f:
     TEST_CASES = json.load(f)
 def evaluate_submission(file_obj):
-    if file_obj is None: return "Missing file."
     try:
-        spec = importlib.util.spec_from_file_location("student_code", file_obj.name)
-        module = importlib.util.module_from_spec(spec)
-        spec.loader.exec_module(module)
-        feedback = [f"### Evaluated with {EVAL_MODEL}"]
         # --- EXERCISE 1 ---
-        ex1_class = getattr(module, "LaDisparition")(model, tokenizer)
         ex1_passed = 0
-        for prompt in TEST_CASES["exercise_1"]:
-            output = ex1_class(prompt, max_tokens=25)
-            if 'e' not in output.lower() and len(output.strip()) > 5:
-                ex1_passed += 1
-        ex1_score = (ex1_passed / 10) * 100
-        feedback.append(f"**Ex 1 (No 'e'):** {ex1_score}% ({ex1_passed}/10)")
         # --- EXERCISE 2 ---
-        ex2_class = getattr(module, "ToulouseSequence")(model, tokenizer)
         ex2_passed = 0
-        for prompt in TEST_CASES["exercise_2"]:
-            output = ex2_class(prompt, max_tokens=25)
-            if "toulouse" not in output.lower() and len(output.strip()) > 5:
-                ex2_passed += 1
-        ex2_score = (ex2_passed / 10) * 100
-        feedback.append(f"**Ex 2 (No Toulouse):** {ex2_score}% ({ex2_passed}/10)")
-        total = (ex1_score + ex2_score) / 2
-        return f"# Final Score: {total}%\n\n" + "\n".join(feedback)
     except Exception as e:
-        return f"Submission Error: {str(e)}"
-demo = gr.Interface(fn=evaluate_submission, inputs=gr.File(), outputs="markdown")
-demo.queue(
-    default_concurrency_limit=2, # Processes 2 students at a time
-    max_size=50                  # Holds up to 50 students in a waiting line
-).launch()

 import os
 import json
 import torch
+import gc
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# 1. SETUP
 EVAL_MODEL = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(EVAL_MODEL)
 model = AutoModelForCausalLM.from_pretrained(
     EVAL_MODEL,
     device_map="auto"
 )
+# Load secret test cases
 with open("test_cases.json", "r") as f:
     TEST_CASES = json.load(f)
 def evaluate_submission(file_obj):
+    if file_obj is None:
+        return "No file provided."
     try:
+        # 2. ISOLATED LOADING
+        # We use a unique name for each import to avoid namespace collisions
+        spec = importlib.util.spec_from_file_location("student_module", file_obj.name)
+        student_module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(student_module)
+        report = [f"## Results for {EVAL_MODEL}\n"]
         # --- EXERCISE 1 ---
         ex1_passed = 0
+        try:
+            ex1_instance = student_module.LaDisparition(model, tokenizer)
+            for prompt in TEST_CASES["exercise_1"]:
+                # We limit tokens to keep evaluation fast
+                output = ex1_instance(prompt, max_tokens=20)
+                if 'e' not in output.lower() and len(output.strip()) > 3:
+                    ex1_passed += 1
+            report.append(f" **Ex 1 (No 'e'):** {ex1_passed}/10 correct")
+        except Exception as e:
+            report.append(f" **Ex 1 Error:** {str(e)}")
         # --- EXERCISE 2 ---
         ex2_passed = 0
+        try:
+            ex2_instance = student_module.ToulouseSequence(model, tokenizer)
+            for prompt in TEST_CASES["exercise_2"]:
+                output = ex2_instance(prompt, max_tokens=20)
+                if "toulouse" not in output.lower() and len(output.strip()) > 3:
+                    ex2_passed += 1
+            report.append(f" **Ex 2 (No Toulouse):** {ex2_passed}/10 correct")
+        except Exception as e:
+            report.append(f" **Ex 2 Error:** {str(e)}")
+        # 3. CLEANUP (Crucial for 200 students!)
+        del student_module
+        gc.collect()
+        torch.cuda.empty_cache()
+        return "\n".join(report)
     except Exception as e:
+        return f"### System Error during import:\n{str(e)}"
+# 4. LAUNCH WITH CONCURRENCY CONTROL
+demo = gr.Interface(
+    fn=evaluate_submission,
+    inputs=gr.File(label="Submission File"),
+    outputs="markdown",
+    api_name="predict" # This matches the 'api_name' used in the Public Portal
+)
+# We limit concurrency to 1 or 2 so the GPU doesn't OOM with 200 students
+demo.queue(default_concurrency_limit=1).launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 torch
 transformers
-accelerate

 torch
 transformers
+accelerate
+gradio