Spaces:

TulasiSankar
/

code_assessment_env

Sleeping

App Files Files Community

Update server/code_assessment_environment.py

#16

by rsaibhargav - opened Apr 7

base: refs/heads/main

←

from: refs/pr/16

Discussion Files changed

+20

-15

Files changed (1) hide show

server/code_assessment_environment.py +20 -15

server/code_assessment_environment.py CHANGED Viewed

@@ -685,13 +685,16 @@ class CodeAssessmentEnvironment(Environment):
     # Grading dispatch
     # ------------------------------------------------------------------
     def _grade(self, task_type: str, answer: str, problem: Dict) -> Tuple[bool, float, str]:
-        if task_type == "correctness_check":
-            is_correct, score, fb = self._grade_correctness(answer, problem)
-        elif task_type == "tone_appropriateness":
-            is_correct, score, fb = self._grade_tone(answer, problem)
-        else:
-            is_correct, score, fb = self._grade_multi_dimensional(answer, problem)
-        return is_correct, self._clamp(score), fb
     # ── Task 1: Correctness Check ─────────────────────────────────────
     def _grade_correctness(self, answer: str, problem: Dict) -> Tuple[bool, float, str]:
@@ -729,17 +732,19 @@ class CodeAssessmentEnvironment(Environment):
         given_rating = parts[0] if parts else ""
         rating_match = expected_rating in given_rating or given_rating in expected_rating
-        # Parse issues
-        ALL_ISSUES = [
             "age-inappropriate", "too-technical", "insensitive",
             "tone-mismatch", "contains-pii", "harmful", "biased", "none",
-        ]
         found_issues: set = set()
-        for issue in ALL_ISSUES:
-            if issue in cleaned or issue.replace("-", " ") in cleaned:
-                found_issues.add(issue)
-        # Remove the rating word itself from issues if it crept in
-        found_issues -= {"appropriate", "needs-adjustment", "inappropriate"}
         # Score issues via F1
         if "none" in expected_issues:

     # Grading dispatch
     # ------------------------------------------------------------------
     def _grade(self, task_type: str, answer: str, problem: Dict) -> Tuple[bool, float, str]:
+        try:
+            if task_type == "correctness_check":
+                is_correct, score, fb = self._grade_correctness(answer, problem)
+            elif task_type == "tone_appropriateness":
+                is_correct, score, fb = self._grade_tone(answer, problem)
+            else:
+                is_correct, score, fb = self._grade_multi_dimensional(answer, problem)
+            return is_correct, self._clamp(score), fb
+        except Exception as e:
+            return False, 0.05, f"Grading error: {str(e)}"
     # ── Task 1: Correctness Check ─────────────────────────────────────
     def _grade_correctness(self, answer: str, problem: Dict) -> Tuple[bool, float, str]:
         given_rating = parts[0] if parts else ""
         rating_match = expected_rating in given_rating or given_rating in expected_rating
+        # Parse issues from comma-separated parts (skip first part which is the rating)
+        ALL_ISSUES = {
             "age-inappropriate", "too-technical", "insensitive",
             "tone-mismatch", "contains-pii", "harmful", "biased", "none",
+        }
+        answer_parts = [p.strip() for p in cleaned.split(",")]
         found_issues: set = set()
+        for part in answer_parts[1:]:  # skip the rating
+            normalized = part.strip()
+            if normalized in ALL_ISSUES:
+                found_issues.add(normalized)
+            elif normalized.replace(" ", "-") in ALL_ISSUES:
+                found_issues.add(normalized.replace(" ", "-"))
         # Score issues via F1
         if "none" in expected_issues: