from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity from backend import run_llm embed_model = SentenceTransformer("all-MiniLM-L6-v2") def similarity_score(output: str, reference: str) -> float: if not reference.strip(): return 0.0 e1 = embed_model.encode([output]) e2 = embed_model.encode([reference]) return float(cosine_similarity(e1, e2)[0][0]) def conciseness_score(text: str) -> float: words = len(text.split()) return max(0.0, 1 - (words / 300)) def llm_judge_score(output: str, goal="overall quality") -> float: try: judge_prompt = f""" Score the following answer from 1 to 10 based on {goal}. Only return a single number. Answer: {output} """ score = run_llm(judge_prompt) return float(score.strip()) except: return 5.0 # safe fallback def final_score(output: str, reference: str = "") -> float: llm_score = llm_judge_score(output) / 10 conc = conciseness_score(output) if reference.strip(): sim = similarity_score(output, reference) score = 0.5 * llm_score + 0.3 * sim + 0.2 * conc else: score = 0.7 * llm_score + 0.3 * conc return round(score, 3)