Spaces:
Running
Running
| # server/graders/grader_hard.py | |
| # Grades hard tasks: algorithmic bug + explanation required. | |
| # Reward = 0.7 * test_score + 0.3 * explanation_score | |
| from typing import Tuple, List, Optional | |
| from .grader_easy import grade_easy | |
| def _score_explanation(explanation: Optional[str], keywords: List[str], instructions: str) -> Tuple[float, str]: | |
| """ | |
| Score explanation semantically: | |
| - Length check (must be meaningful) | |
| - Keyword matching (concept coverage) | |
| - Partial credit for any relevant mention | |
| """ | |
| if not explanation or len(explanation.strip()) < 15: | |
| return 0.01, "❌ No explanation provided. Hard tasks require explanation field." | |
| exp_lower = explanation.lower() | |
| hits = [kw for kw in keywords if kw.lower() in exp_lower] | |
| # Also check for common synonyms | |
| synonym_map = { | |
| "visited": ["seen", "visited", "track", "memo"], | |
| "iteration order": ["order", "direction", "forward", "backward", "reverse"], | |
| "overwrite": ["overwrite", "override", "update", "modify"], | |
| "reverse": ["reverse", "backward", "right to left", "descending"], | |
| "0/1": ["0/1", "zero one", "binary", "knapsack"], | |
| "high": ["high", "upper", "boundary", "bound"], | |
| "return high": ["return high", "high boundary"], | |
| "floor": ["floor", "integer", "truncat"], | |
| } | |
| synonym_hits = set(hits) | |
| for kw in keywords: | |
| kw_lower = kw.lower() | |
| if kw_lower in synonym_map: | |
| for syn in synonym_map[kw_lower]: | |
| if syn in exp_lower: | |
| synonym_hits.add(kw) | |
| break | |
| total_hits = len(synonym_hits) | |
| needed = max(1, len(keywords) // 2) | |
| if total_hits == 0: | |
| score = 0.1 if len(explanation.strip()) > 50 else 0.01 | |
| elif total_hits >= needed: | |
| score = 0.99 | |
| else: | |
| score = total_hits / needed | |
| if score >= 1.0: | |
| feedback = f"✅ Explanation excellent! Covered: {', '.join(synonym_hits)}" | |
| elif score > 0: | |
| missing = [kw for kw in keywords if kw.lower() not in exp_lower] | |
| feedback = ( | |
| f"⚠️ Partial explanation (score={score}). Covered: {', '.join(synonym_hits) or 'none'}. " | |
| f"Also mention: {', '.join(missing[:3])}" | |
| ) | |
| else: | |
| feedback = f"❌ Explanation too vague. Explain: {', '.join(keywords[:3])}" | |
| score = max(0.01, min(score, 0.99)) | |
| return round(score, 2), feedback | |
| def grade_hard(fixed_code: str, task: dict, explanation: Optional[str] = None) -> Tuple[float, int, int, str, List[dict]]: | |
| """ | |
| Grade hard task: Reward = 0.7 × test_score + 0.3 × explanation_score | |
| """ | |
| test_reward, passed, total, code_feedback, results = grade_easy(fixed_code, task) | |
| keywords = task.get("explanation_keywords", []) | |
| instructions = task.get("instructions", "") | |
| exp_score, exp_feedback = _score_explanation(explanation, keywords, instructions) | |
| final_reward = 0.7 * test_reward + 0.3 * exp_score | |
| final_reward = round(max(0.01, min(final_reward, 0.99)), 2) | |
| feedback = ( | |
| f"--- Code Score (70%): {test_reward:.2f} ---\n" | |
| f"{code_feedback}\n\n" | |
| f"--- Explanation Score (30%): {exp_score:.2f} ---\n" | |
| f"{exp_feedback}\n\n" | |
| f"=== Final Reward: {final_reward:.2f} ===" | |
| ) | |
| if passed == total and exp_score < 1.0: | |
| feedback += f"\n💡 Code correct! Boost score by mentioning: {', '.join(keywords[:3])}" | |
| elif passed < total and not explanation: | |
| feedback += "\n💡 Fix the code AND add explanation for max reward." | |
| return final_reward, passed, total, feedback, results | |