Spaces:
Running
Running
| from sentence_transformers import SentenceTransformer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from backend import run_llm | |
| embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| def similarity_score(output: str, reference: str) -> float: | |
| if not reference.strip(): | |
| return 0.0 | |
| e1 = embed_model.encode([output]) | |
| e2 = embed_model.encode([reference]) | |
| return float(cosine_similarity(e1, e2)[0][0]) | |
| def conciseness_score(text: str) -> float: | |
| words = len(text.split()) | |
| return max(0.0, 1 - (words / 300)) | |
| def llm_judge_score(output: str, goal="overall quality") -> float: | |
| try: | |
| judge_prompt = f""" | |
| Score the following answer from 1 to 10 based on {goal}. | |
| Only return a single number. | |
| Answer: | |
| {output} | |
| """ | |
| score = run_llm(judge_prompt) | |
| return float(score.strip()) | |
| except: | |
| return 5.0 # safe fallback | |
| def final_score(output: str, reference: str = "") -> float: | |
| llm_score = llm_judge_score(output) / 10 | |
| conc = conciseness_score(output) | |
| if reference.strip(): | |
| sim = similarity_score(output, reference) | |
| score = 0.5 * llm_score + 0.3 * sim + 0.2 * conc | |
| else: | |
| score = 0.7 * llm_score + 0.3 * conc | |
| return round(score, 3) | |