ai-interview-system / evaluator.py
sugitora
AI面接システム - 初回リリース (Streamlit + Claude API)
6d1fe52
import json
import time
import unicodedata
import anthropic
from models import Answer, Question, QuestionResult
class KeywordMatcher:
@staticmethod
def normalize(text: str) -> str:
text = unicodedata.normalize("NFKC", text)
return text.lower()
@classmethod
def match(cls, answer_text: str, keywords: list[str]) -> list[str]:
normalized_answer = cls.normalize(answer_text)
hits = []
for keyword in keywords:
normalized_keyword = cls.normalize(keyword)
if normalized_keyword in normalized_answer:
hits.append(keyword)
return hits
@staticmethod
def calculate_score(
hits: list[str],
total_keywords: list[str],
weight: float,
max_score: int,
) -> float:
if not total_keywords:
return 0.0
ratio = len(hits) / len(total_keywords)
return round(ratio * weight * max_score, 2)
class ClaudeEvaluator:
def __init__(self, api_key: str, model: str = "claude-sonnet-4-20250514"):
self.client = anthropic.Anthropic(api_key=api_key)
self.model = model
def evaluate(self, question: Question, answer_text: str) -> dict:
prompt = self._build_evaluation_prompt(question, answer_text)
for attempt in range(3):
try:
response = self.client.messages.create(
model=self.model,
max_tokens=1024,
messages=[{"role": "user", "content": prompt}],
)
text = response.content[0].text
return self._parse_response(text)
except anthropic.RateLimitError:
if attempt < 2:
wait = 2 ** (attempt + 1)
print(f" ⏳ API制限中。{wait}秒待機...")
time.sleep(wait)
else:
print(" ⚠️ API制限のため、デフォルトスコアを使用します。")
return self._default_scores()
except anthropic.APIError as e:
print(f" ⚠️ API エラー: {e}")
if attempt < 2:
time.sleep(1)
else:
return self._default_scores()
return self._default_scores()
def _build_evaluation_prompt(self, question: Question, answer_text: str) -> str:
return f"""あなたは採用面接の評価者です。以下の面接回答を評価してください。
## 質問
{question.question_text}
## 評価基準
{question.scoring_criteria}
## 候補者の回答
{answer_text}
## 評価指示
以下の観点で評価し、JSON形式のみで回答してください(他のテキストは不要です):
1. content_score (0-100): 回答内容の質。評価基準に対する適合度。
2. improvisation_score (0-100): 即興対応力。論理的構成、具体性、説得力を評価。
3. feedback: 日本語での評価コメント(2-3文)。
4. is_vague: 回答が曖昧で追加質問が必要かどうか(true/false)。
必ず以下のJSON形式で回答してください:
{{"content_score": 整数, "improvisation_score": 整数, "feedback": "文字列", "is_vague": 真偽値}}"""
def _parse_response(self, text: str) -> dict:
text = text.strip()
if text.startswith("```"):
lines = text.split("\n")
text = "\n".join(lines[1:-1]) if len(lines) > 2 else text
try:
data = json.loads(text)
return {
"content_score": int(data.get("content_score", 50)),
"improvisation_score": int(data.get("improvisation_score", 50)),
"feedback": str(data.get("feedback", "評価コメントなし")),
"is_vague": bool(data.get("is_vague", False)),
}
except (json.JSONDecodeError, ValueError):
# JSON抽出のフォールバック
import re
json_match = re.search(r'\{[^{}]+\}', text, re.DOTALL)
if json_match:
try:
data = json.loads(json_match.group())
return {
"content_score": int(data.get("content_score", 50)),
"improvisation_score": int(data.get("improvisation_score", 50)),
"feedback": str(data.get("feedback", "評価コメントなし")),
"is_vague": bool(data.get("is_vague", False)),
}
except (json.JSONDecodeError, ValueError):
pass
print(f" ⚠️ AI応答のパースに失敗。デフォルトスコアを使用します。")
return self._default_scores()
@staticmethod
def _default_scores() -> dict:
return {
"content_score": 50,
"improvisation_score": 50,
"feedback": "自動評価ができませんでした。手動での確認をお勧めします。",
"is_vague": False,
}
def evaluate_answer(
question: Question,
answer: Answer,
claude_evaluator: ClaudeEvaluator,
) -> QuestionResult:
# キーワード評価
keyword_hits = KeywordMatcher.match(answer.transcribed_text, question.expected_keywords)
keyword_score = KeywordMatcher.calculate_score(
keyword_hits, question.expected_keywords,
question.keyword_weight, question.max_score,
)
# Claude API評価
ai_result = claude_evaluator.evaluate(question, answer.transcribed_text)
ai_content_score = round(
(ai_result["content_score"] / 100) * question.ai_weight * question.max_score, 2
)
improvisation_score = round(
(ai_result["improvisation_score"] / 100) * question.improv_weight * question.max_score, 2
)
total_score = round(keyword_score + ai_content_score + improvisation_score, 2)
return QuestionResult(
question=question,
answer=answer,
keyword_hits=keyword_hits,
keyword_score=keyword_score,
ai_content_score=ai_content_score,
improvisation_score=improvisation_score,
total_score=total_score,
ai_feedback=ai_result["feedback"],
)