AutoDataLab2.0 / subenvs /email /graders.py
uchihamadara1816's picture
Upload 172 files
d02bacd verified
def _compute_score(email: str, response: str, task_id: str = "task_1") -> float:
if not response or len(response.strip()) < 10:
return 0.01
response_lower = response.lower()
# START SAFE (never 0)
score = 0.01
# 1. APOLOGY CHECK
apology_words = ["sorry", "apologize", "apologies", "regret"]
if any(word in response_lower for word in apology_words):
score += 0.15
# 2. SOLUTION CHECK
solution_words = ["resolve", "fix", "refund", "replace", "escalate",
"help", "assist", "process", "arrange", "dispatch",
"investigate", "correct", "address"]
if any(word in response_lower for word in solution_words):
score += 0.15
# 3. POLITENESS CHECK
polite_words = ["thank", "appreciate", "understand", "valued", "pleased"]
if any(word in response_lower for word in polite_words):
score += 0.10
# 4. ISSUE ACKNOWLEDGEMENT
issue_words = ["delay", "delivery", "order", "issue", "problem",
"complaint", "refund", "billing", "inconvenience", "error"]
matches = sum(1 for word in issue_words if word in response_lower)
score += min(matches * 0.05, 0.10)
# 5. LENGTH SCORING
word_count = len(response.split())
if word_count < 20:
score += 0.01
elif word_count < 50:
score += 0.05
elif word_count <= 200:
score += 0.10
else:
score += 0.05
# 6. STRUCTURE
greetings = ["dear", "hello", "hi ", "good morning"]
if any(g in response_lower for g in greetings):
score += 0.05
closings = ["sincerely", "regards", "best wishes", "thank you", "warm regards"]
if any(c in response_lower for c in closings):
score += 0.05
if "\n" in response or len(response) > 100:
score += 0.05
# 7. RUDE PENALTY
rude_words = ["not my problem", "not our fault", "impossible",
"can't help", "cannot help", "ridiculous", "your fault"]
if any(word in response_lower for word in rude_words):
score -= 0.20
# 8. PROFESSIONAL BONUS
professional_words = ["please", "certainly", "absolutely", "immediately",
"priority", "dedicated", "committed", "ensure"]
prof_matches = sum(1 for word in professional_words if word in response_lower)
score += min(prof_matches * 0.03, 0.09)
# 9. TASK-SPECIFIC
if task_id == "task_1":
if any(w in response_lower for w in ["refund", "return", "reimburse"]):
score += 0.04
elif task_id == "task_2":
if any(w in response_lower for w in ["understand your frustration",
"completely understand",
"deeply sorry",
"sincerely apologize"]):
score += 0.04
elif task_id == "task_3":
issues_addressed = 0
if any(w in response_lower for w in ["wrong item", "incorrect item", "item"]):
issues_addressed += 1
if any(w in response_lower for w in ["billing", "charge", "payment"]):
issues_addressed += 1
if any(w in response_lower for w in ["support", "response", "team"]):
issues_addressed += 1
score += min(issues_addressed * 0.04, 0.09)
score = round(score, 2)
if score <= 0.0:
score = 0.01
elif score >= 1.0:
score = 0.99
score = max(0.01, min(0.99, score))
return score
def grade_response(email: str, response: str, task_id: str = "task_1") -> float:
return max(0.01, min(0.99, _compute_score(email, response, task_id)))
def grade_easy(email: str, response: str) -> float:
return max(0.01, min(0.99, _compute_score(email, response, "task_1")))
def grade_medium(email: str, response: str) -> float:
return max(0.01, min(0.99, _compute_score(email, response, "task_2")))
def grade_hard(email: str, response: str) -> float:
return max(0.01, min(0.99, _compute_score(email, response, "task_3")))