prompt-optimizer / evaluator.py
SharvNey's picture
Create evaluator.py
be5a976 verified
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from backend import run_llm
embed_model = SentenceTransformer("all-MiniLM-L6-v2")
def similarity_score(output: str, reference: str) -> float:
if not reference.strip():
return 0.0
e1 = embed_model.encode([output])
e2 = embed_model.encode([reference])
return float(cosine_similarity(e1, e2)[0][0])
def conciseness_score(text: str) -> float:
words = len(text.split())
return max(0.0, 1 - (words / 300))
def llm_judge_score(output: str, goal="overall quality") -> float:
try:
judge_prompt = f"""
Score the following answer from 1 to 10 based on {goal}.
Only return a single number.
Answer:
{output}
"""
score = run_llm(judge_prompt)
return float(score.strip())
except:
return 5.0 # safe fallback
def final_score(output: str, reference: str = "") -> float:
llm_score = llm_judge_score(output) / 10
conc = conciseness_score(output)
if reference.strip():
sim = similarity_score(output, reference)
score = 0.5 * llm_score + 0.3 * sim + 0.2 * conc
else:
score = 0.7 * llm_score + 0.3 * conc
return round(score, 3)