testing / interview /rag_evaluation.py
Danielsz's picture
Update rag_evaluation.py
0b5ad12
Raw
History Blame Contribute Delete
5.9 kB
import json
from .models import InterviewSession, QuestionResponse, EvaluationCategory, EvaluationResult
from .llm_router import chat_completion
from .engine import engine # el Qdrant de openrag/engine original ya instanciado
# ponytail: prompt por defecto si no hay uno configurado
DEFAULT_PROMPT = """Eres un evaluador senior de admisiones universitarias (ITCA-FEPADE).
Analiza las respuestas de la entrevista del candidato.
Contexto t茅cnico adicional (Knowledge Base): {context}
Eval煤a la categor铆a: {category} ({description})
Respuestas del candidato:
{transcriptions}
Devuelve un JSON estricto con:
{
"score": <numero de 0 a 100>,
"feedback": "<texto breve de por qu茅 este score y recomendaci贸n>"
}
"""
def evaluate_session(session_id):
import os
from django.conf import settings
from .transcription_engine import transcribe_audio_file
from .models import TranscriptionProviderConfig, SystemPromptConfig
session = InterviewSession.objects.get(id=session_id)
responses = QuestionResponse.objects.filter(session=session).order_by('question_index')
if not responses.exists():
return
# Re-transcribir si hay audio guardado usando el modelo activo
provider_config = TranscriptionProviderConfig.objects.filter(is_active=True).first()
provider = provider_config.provider_type if provider_config else "whisper_local"
transcriptions_text = ""
for r in responses:
audio_path = os.path.join(settings.MEDIA_ROOT, f"answers/session_{session_id}_{r.question_index}.webm")
if os.path.exists(audio_path):
try:
text, _, _ = transcribe_audio_file(audio_path, provider_type=provider)
if text:
r.transcription = text
r.save()
except Exception as e:
print(f"Error re-transcribing {audio_path}: {e}")
transcriptions_text += f"Pregunta: {r.question_text}\nRespuesta: {r.transcription}\n\n"
# Extraer contexto de Qdrant
try:
vector = engine.model.encode([transcriptions_text[:512]])[0].tolist()
if engine.qdrant_client:
response = engine.qdrant_client.query_points(
collection_name="knowledge_base",
query=vector,
limit=3
)
hits = response.points
context = "\n".join([hit.payload.get("text", "") for hit in hits])
else:
context = "Sin contexto adicional de la base de conocimiento (Qdrant client unavailable)."
except Exception as e:
context = "Sin contexto adicional de la base de conocimiento."
categories = EvaluationCategory.objects.filter(active=True)
global_config = SystemPromptConfig.objects.first()
system_prompt_text = global_config.prompt_text if global_config else "Eres un evaluador de admisiones."
categories_list = "\n".join([f"- {cat.name}: {cat.description}" for cat in categories])
EVAL_PROMPT = f"""{system_prompt_text}
Contexto t茅cnico adicional (Knowledge Base): {context}
Debes evaluar al candidato en las siguientes categor铆as (Habilidades):
{categories_list}
Respuestas del candidato:
{transcriptions_text}
Basado en las respuestas del candidato, eval煤a cada categor铆a con un score de 0 a 100 y un feedback.
Adem谩s, extrae EXACTAMENTE 7 rasgos clave sobre su desempe帽o (ej. 'L贸gica', 'Comunicaci贸n', 'Resoluci贸n').
El nombre del rasgo debe ser de una o m谩ximo dos palabras, y con score de 0 a 100.
Genera un breve p谩rrafo de 'Notas del Entrevistador' (resumen de la entrevista) y un p谩rrafo de 'Recomendaci贸n HR'.
Devuelve UNICAMENTE un objeto JSON estricto con el siguiente formato:
{{
"categories": [
{{"name": "nombre de categoria", "score": 90, "feedback": "texto"}}
],
"traits": [
{{"name": "L贸gica", "score": 90}}
],
"interviewer_notes": "Candidato demostr贸...",
"hr_recommendation": "Altamente recomendado..."
}}
"""
try:
response = chat_completion([
{"role": "system", "content": "Eres un analista que solo devuelve JSON puro y procesa el prompt del usuario."},
{"role": "user", "content": EVAL_PROMPT}
])
content = response.choices[0].message.content
if content.startswith("```json"):
content = content[7:-3]
elif content.startswith("```"):
content = content[3:-3]
data = json.loads(content)
total_score = 0
total_weight = 0
for cat_data in data.get("categories", []):
cat_name = cat_data.get("name")
cat_obj = categories.filter(name__iexact=cat_name).first()
if cat_obj:
score = float(cat_data.get("score", 0))
EvaluationResult.objects.create(
session=session,
category=cat_obj,
score=score,
feedback=cat_data.get("feedback", "")
)
total_score += score * cat_obj.weight
total_weight += cat_obj.weight
if total_weight > 0:
session.overall_score = total_score / total_weight
session.dynamic_traits = data.get("traits", [])
session.interviewer_notes = data.get("interviewer_notes", "")
session.hr_recommendation = data.get("hr_recommendation", "")
except Exception as e:
print(f"Error evaluating session: {e}")
session.applicant.status = 'error'
session.status = 'evaluated'
session.save()
if session.applicant.status != 'error':
if session.overall_score is not None and session.overall_score >= 70:
session.applicant.status = 'approved'
else:
session.applicant.status = 'rejected'
session.applicant.save()