Spaces:
Sleeping
Sleeping
File size: 725 Bytes
e482865 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 |
# vector_utils.py
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def find_similar_docs(base_text, doc_dict, top_n=3):
"""
base_text: κΈ°μ€μ΄ λλ μμ½ ν
μ€νΈ
doc_dict: {filename: summary_text} ννμ λμ
λ리
top_n: μΆμ² κ°μ
"""
docs = list(doc_dict.values())
filenames = list(doc_dict.keys())
texts = [base_text] + docs
vectorizer = TfidfVectorizer().fit_transform(texts)
vectors = vectorizer.toarray()
similarities = cosine_similarity([vectors[0]], vectors[1:])[0]
top_indices = similarities.argsort()[::-1][:top_n]
return [(filenames[i], similarities[i]) for i in top_indices] |