Spaces:
Running
Running
File size: 3,617 Bytes
2ce1061 d510c1d 2ce1061 d510c1d c21c4ee d510c1d 29c44a0 c21c4ee d510c1d c21c4ee d510c1d 29c44a0 d510c1d 29c44a0 c21c4ee 29c44a0 c21c4ee d510c1d c21c4ee d510c1d c21c4ee d510c1d c21c4ee d510c1d c21c4ee 29c44a0 c21c4ee d510c1d c21c4ee d510c1d 29c44a0 c21c4ee d510c1d c21c4ee d510c1d c21c4ee d510c1d 8485798 d510c1d c21c4ee | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 | # server/graders/grader_hard.py
# Grades hard tasks: algorithmic bug + explanation required.
# Reward = 0.7 * test_score + 0.3 * explanation_score
from typing import Tuple, List, Optional
from .grader_easy import grade_easy
def _score_explanation(explanation: Optional[str], keywords: List[str], instructions: str) -> Tuple[float, str]:
"""
Score explanation semantically:
- Length check (must be meaningful)
- Keyword matching (concept coverage)
- Partial credit for any relevant mention
"""
if not explanation or len(explanation.strip()) < 15:
return 0.01, "❌ No explanation provided. Hard tasks require explanation field."
exp_lower = explanation.lower()
hits = [kw for kw in keywords if kw.lower() in exp_lower]
# Also check for common synonyms
synonym_map = {
"visited": ["seen", "visited", "track", "memo"],
"iteration order": ["order", "direction", "forward", "backward", "reverse"],
"overwrite": ["overwrite", "override", "update", "modify"],
"reverse": ["reverse", "backward", "right to left", "descending"],
"0/1": ["0/1", "zero one", "binary", "knapsack"],
"high": ["high", "upper", "boundary", "bound"],
"return high": ["return high", "high boundary"],
"floor": ["floor", "integer", "truncat"],
}
synonym_hits = set(hits)
for kw in keywords:
kw_lower = kw.lower()
if kw_lower in synonym_map:
for syn in synonym_map[kw_lower]:
if syn in exp_lower:
synonym_hits.add(kw)
break
total_hits = len(synonym_hits)
needed = max(1, len(keywords) // 2)
if total_hits == 0:
score = 0.1 if len(explanation.strip()) > 50 else 0.01
elif total_hits >= needed:
score = 0.99
else:
score = total_hits / needed
if score >= 1.0:
feedback = f"✅ Explanation excellent! Covered: {', '.join(synonym_hits)}"
elif score > 0:
missing = [kw for kw in keywords if kw.lower() not in exp_lower]
feedback = (
f"⚠️ Partial explanation (score={score}). Covered: {', '.join(synonym_hits) or 'none'}. "
f"Also mention: {', '.join(missing[:3])}"
)
else:
feedback = f"❌ Explanation too vague. Explain: {', '.join(keywords[:3])}"
score = max(0.01, min(score, 0.99))
return round(score, 2), feedback
def grade_hard(fixed_code: str, task: dict, explanation: Optional[str] = None) -> Tuple[float, int, int, str, List[dict]]:
"""
Grade hard task: Reward = 0.7 × test_score + 0.3 × explanation_score
"""
test_reward, passed, total, code_feedback, results = grade_easy(fixed_code, task)
keywords = task.get("explanation_keywords", [])
instructions = task.get("instructions", "")
exp_score, exp_feedback = _score_explanation(explanation, keywords, instructions)
final_reward = 0.7 * test_reward + 0.3 * exp_score
final_reward = round(max(0.01, min(final_reward, 0.99)), 2)
feedback = (
f"--- Code Score (70%): {test_reward:.2f} ---\n"
f"{code_feedback}\n\n"
f"--- Explanation Score (30%): {exp_score:.2f} ---\n"
f"{exp_feedback}\n\n"
f"=== Final Reward: {final_reward:.2f} ==="
)
if passed == total and exp_score < 1.0:
feedback += f"\n💡 Code correct! Boost score by mentioning: {', '.join(keywords[:3])}"
elif passed < total and not explanation:
feedback += "\n💡 Fix the code AND add explanation for max reward."
return final_reward, passed, total, feedback, results
|