Spaces:
Running
Running
Commit Β·
767a950
1
Parent(s): 4d8bbd9
Error message more explicit
Browse files
app.py
CHANGED
|
@@ -32,6 +32,7 @@ with open("test_cases.json", "r") as f:
|
|
| 32 |
# Each prompt has: unconstrained_logprob (baseline) and reference_delta (solution.py delta).
|
| 33 |
# Quality = 1 if student is as good or better than solution.py, decreasing for worse.
|
| 34 |
import csv
|
|
|
|
| 35 |
|
| 36 |
REFERENCE_SCORES = {} # key: (exercise, prompt_index) β dict
|
| 37 |
|
|
@@ -158,7 +159,7 @@ def run_with_timeout(func, args=(), kwargs=None, timeout_sec=TIMEOUT_SECONDS):
|
|
| 158 |
def target():
|
| 159 |
try:
|
| 160 |
result[0] = func(*args, **kwargs)
|
| 161 |
-
except
|
| 162 |
exception[0] = e
|
| 163 |
|
| 164 |
thread = threading.Thread(target=target)
|
|
@@ -169,7 +170,7 @@ def run_with_timeout(func, args=(), kwargs=None, timeout_sec=TIMEOUT_SECONDS):
|
|
| 169 |
if thread.is_alive():
|
| 170 |
raise TimeoutException(f"Prompt evaluation timed out ({TIMEOUT_SECONDS}s limit exceeded)")
|
| 171 |
|
| 172 |
-
if exception[0]:
|
| 173 |
raise exception[0]
|
| 174 |
|
| 175 |
return result[0]
|
|
@@ -378,7 +379,9 @@ def evaluate_submission(file_obj, debug=False):
|
|
| 378 |
lp_str = f"logprob={out['mean_logprob']:.2f}" if out['mean_logprob'] != float('-inf') else "logprob=N/A"
|
| 379 |
report.append(f"{i+1}. {'β' if out['passed'] else 'β'} [{lp_str}, q={out['quality']:.2f}] `{out['output']}`")
|
| 380 |
except Exception as e:
|
| 381 |
-
|
|
|
|
|
|
|
| 382 |
|
| 383 |
# --- EXERCISE 2 ---
|
| 384 |
ex2_passed = 0
|
|
@@ -445,7 +448,9 @@ def evaluate_submission(file_obj, debug=False):
|
|
| 445 |
lp_str = f"logprob={out['mean_logprob']:.2f}" if out['mean_logprob'] != float('-inf') else "logprob=N/A"
|
| 446 |
report.append(f"{i+1}. {'β' if out['passed'] else 'β'} [{lp_str}, q={out['quality']:.2f}] `{out['output']}`")
|
| 447 |
except Exception as e:
|
| 448 |
-
|
|
|
|
|
|
|
| 449 |
|
| 450 |
# 3. CLEANUP (Crucial for 200 students!)
|
| 451 |
del student_module
|
|
|
|
| 32 |
# Each prompt has: unconstrained_logprob (baseline) and reference_delta (solution.py delta).
|
| 33 |
# Quality = 1 if student is as good or better than solution.py, decreasing for worse.
|
| 34 |
import csv
|
| 35 |
+
import traceback
|
| 36 |
|
| 37 |
REFERENCE_SCORES = {} # key: (exercise, prompt_index) β dict
|
| 38 |
|
|
|
|
| 159 |
def target():
|
| 160 |
try:
|
| 161 |
result[0] = func(*args, **kwargs)
|
| 162 |
+
except BaseException as e:
|
| 163 |
exception[0] = e
|
| 164 |
|
| 165 |
thread = threading.Thread(target=target)
|
|
|
|
| 170 |
if thread.is_alive():
|
| 171 |
raise TimeoutException(f"Prompt evaluation timed out ({TIMEOUT_SECONDS}s limit exceeded)")
|
| 172 |
|
| 173 |
+
if exception[0] is not None:
|
| 174 |
raise exception[0]
|
| 175 |
|
| 176 |
return result[0]
|
|
|
|
| 379 |
lp_str = f"logprob={out['mean_logprob']:.2f}" if out['mean_logprob'] != float('-inf') else "logprob=N/A"
|
| 380 |
report.append(f"{i+1}. {'β' if out['passed'] else 'β'} [{lp_str}, q={out['quality']:.2f}] `{out['output']}`")
|
| 381 |
except Exception as e:
|
| 382 |
+
tb = traceback.format_exc()
|
| 383 |
+
print(f"Ex 1 outer exception:\n{tb}")
|
| 384 |
+
report.append(f" **Ex 1 Error:** {str(e) or type(e).__name__}\n```\n{tb}\n```")
|
| 385 |
|
| 386 |
# --- EXERCISE 2 ---
|
| 387 |
ex2_passed = 0
|
|
|
|
| 448 |
lp_str = f"logprob={out['mean_logprob']:.2f}" if out['mean_logprob'] != float('-inf') else "logprob=N/A"
|
| 449 |
report.append(f"{i+1}. {'β' if out['passed'] else 'β'} [{lp_str}, q={out['quality']:.2f}] `{out['output']}`")
|
| 450 |
except Exception as e:
|
| 451 |
+
tb = traceback.format_exc()
|
| 452 |
+
print(f"Ex 2 outer exception:\n{tb}")
|
| 453 |
+
report.append(f" **Ex 2 Error:** {str(e) or type(e).__name__}\n```\n{tb}\n```")
|
| 454 |
|
| 455 |
# 3. CLEANUP (Crucial for 200 students!)
|
| 456 |
del student_module
|