Spaces:
Sleeping
Sleeping
| import json | |
| import time | |
| import unicodedata | |
| import anthropic | |
| from models import Answer, Question, QuestionResult | |
| class KeywordMatcher: | |
| def normalize(text: str) -> str: | |
| text = unicodedata.normalize("NFKC", text) | |
| return text.lower() | |
| def match(cls, answer_text: str, keywords: list[str]) -> list[str]: | |
| normalized_answer = cls.normalize(answer_text) | |
| hits = [] | |
| for keyword in keywords: | |
| normalized_keyword = cls.normalize(keyword) | |
| if normalized_keyword in normalized_answer: | |
| hits.append(keyword) | |
| return hits | |
| def calculate_score( | |
| hits: list[str], | |
| total_keywords: list[str], | |
| weight: float, | |
| max_score: int, | |
| ) -> float: | |
| if not total_keywords: | |
| return 0.0 | |
| ratio = len(hits) / len(total_keywords) | |
| return round(ratio * weight * max_score, 2) | |
| class ClaudeEvaluator: | |
| def __init__(self, api_key: str, model: str = "claude-sonnet-4-20250514"): | |
| self.client = anthropic.Anthropic(api_key=api_key) | |
| self.model = model | |
| def evaluate(self, question: Question, answer_text: str) -> dict: | |
| prompt = self._build_evaluation_prompt(question, answer_text) | |
| for attempt in range(3): | |
| try: | |
| response = self.client.messages.create( | |
| model=self.model, | |
| max_tokens=1024, | |
| messages=[{"role": "user", "content": prompt}], | |
| ) | |
| text = response.content[0].text | |
| return self._parse_response(text) | |
| except anthropic.RateLimitError: | |
| if attempt < 2: | |
| wait = 2 ** (attempt + 1) | |
| print(f" ⏳ API制限中。{wait}秒待機...") | |
| time.sleep(wait) | |
| else: | |
| print(" ⚠️ API制限のため、デフォルトスコアを使用します。") | |
| return self._default_scores() | |
| except anthropic.APIError as e: | |
| print(f" ⚠️ API エラー: {e}") | |
| if attempt < 2: | |
| time.sleep(1) | |
| else: | |
| return self._default_scores() | |
| return self._default_scores() | |
| def _build_evaluation_prompt(self, question: Question, answer_text: str) -> str: | |
| return f"""あなたは採用面接の評価者です。以下の面接回答を評価してください。 | |
| ## 質問 | |
| {question.question_text} | |
| ## 評価基準 | |
| {question.scoring_criteria} | |
| ## 候補者の回答 | |
| {answer_text} | |
| ## 評価指示 | |
| 以下の観点で評価し、JSON形式のみで回答してください(他のテキストは不要です): | |
| 1. content_score (0-100): 回答内容の質。評価基準に対する適合度。 | |
| 2. improvisation_score (0-100): 即興対応力。論理的構成、具体性、説得力を評価。 | |
| 3. feedback: 日本語での評価コメント(2-3文)。 | |
| 4. is_vague: 回答が曖昧で追加質問が必要かどうか(true/false)。 | |
| 必ず以下のJSON形式で回答してください: | |
| {{"content_score": 整数, "improvisation_score": 整数, "feedback": "文字列", "is_vague": 真偽値}}""" | |
| def _parse_response(self, text: str) -> dict: | |
| text = text.strip() | |
| if text.startswith("```"): | |
| lines = text.split("\n") | |
| text = "\n".join(lines[1:-1]) if len(lines) > 2 else text | |
| try: | |
| data = json.loads(text) | |
| return { | |
| "content_score": int(data.get("content_score", 50)), | |
| "improvisation_score": int(data.get("improvisation_score", 50)), | |
| "feedback": str(data.get("feedback", "評価コメントなし")), | |
| "is_vague": bool(data.get("is_vague", False)), | |
| } | |
| except (json.JSONDecodeError, ValueError): | |
| # JSON抽出のフォールバック | |
| import re | |
| json_match = re.search(r'\{[^{}]+\}', text, re.DOTALL) | |
| if json_match: | |
| try: | |
| data = json.loads(json_match.group()) | |
| return { | |
| "content_score": int(data.get("content_score", 50)), | |
| "improvisation_score": int(data.get("improvisation_score", 50)), | |
| "feedback": str(data.get("feedback", "評価コメントなし")), | |
| "is_vague": bool(data.get("is_vague", False)), | |
| } | |
| except (json.JSONDecodeError, ValueError): | |
| pass | |
| print(f" ⚠️ AI応答のパースに失敗。デフォルトスコアを使用します。") | |
| return self._default_scores() | |
| def _default_scores() -> dict: | |
| return { | |
| "content_score": 50, | |
| "improvisation_score": 50, | |
| "feedback": "自動評価ができませんでした。手動での確認をお勧めします。", | |
| "is_vague": False, | |
| } | |
| def evaluate_answer( | |
| question: Question, | |
| answer: Answer, | |
| claude_evaluator: ClaudeEvaluator, | |
| ) -> QuestionResult: | |
| # キーワード評価 | |
| keyword_hits = KeywordMatcher.match(answer.transcribed_text, question.expected_keywords) | |
| keyword_score = KeywordMatcher.calculate_score( | |
| keyword_hits, question.expected_keywords, | |
| question.keyword_weight, question.max_score, | |
| ) | |
| # Claude API評価 | |
| ai_result = claude_evaluator.evaluate(question, answer.transcribed_text) | |
| ai_content_score = round( | |
| (ai_result["content_score"] / 100) * question.ai_weight * question.max_score, 2 | |
| ) | |
| improvisation_score = round( | |
| (ai_result["improvisation_score"] / 100) * question.improv_weight * question.max_score, 2 | |
| ) | |
| total_score = round(keyword_score + ai_content_score + improvisation_score, 2) | |
| return QuestionResult( | |
| question=question, | |
| answer=answer, | |
| keyword_hits=keyword_hits, | |
| keyword_score=keyword_score, | |
| ai_content_score=ai_content_score, | |
| improvisation_score=improvisation_score, | |
| total_score=total_score, | |
| ai_feedback=ai_result["feedback"], | |
| ) | |