ai-tree-lite / vector_utils.py
κΉ€λ―Όμš©
Deploy AI-Tree Lite to Hugging Face
e482865
raw
history blame contribute delete
725 Bytes
# vector_utils.py
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
def find_similar_docs(base_text, doc_dict, top_n=3):
"""
base_text: 기쀀이 λ˜λŠ” μš”μ•½ ν…μŠ€νŠΈ
doc_dict: {filename: summary_text} ν˜•νƒœμ˜ λ”•μ…”λ„ˆλ¦¬
top_n: μΆ”μ²œ 개수
"""
docs = list(doc_dict.values())
filenames = list(doc_dict.keys())
texts = [base_text] + docs
vectorizer = TfidfVectorizer().fit_transform(texts)
vectors = vectorizer.toarray()
similarities = cosine_similarity([vectors[0]], vectors[1:])[0]
top_indices = similarities.argsort()[::-1][:top_n]
return [(filenames[i], similarities[i]) for i in top_indices]