File size: 649 Bytes
07a91a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
"""Lightweight relevancy scoring without heavy embedding backends."""

from __future__ import annotations

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


class RelevancyScorer:
    """Computes semantic relevancy between request and generated code."""

    def __init__(self):
        self.vectorizer = TfidfVectorizer(ngram_range=(1, 2), min_df=1)

    def score(self, query_text: str, generated_text: str) -> float:
        matrix = self.vectorizer.fit_transform([query_text, generated_text])
        return float(cosine_similarity(matrix[0], matrix[1])[0][0])