# vector_utils.py from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity def find_similar_docs(base_text, doc_dict, top_n=3): """ base_text: 기준이 되는 요약 텍스트 doc_dict: {filename: summary_text} 형태의 딕셔너리 top_n: 추천 개수 """ docs = list(doc_dict.values()) filenames = list(doc_dict.keys()) texts = [base_text] + docs vectorizer = TfidfVectorizer().fit_transform(texts) vectors = vectorizer.toarray() similarities = cosine_similarity([vectors[0]], vectors[1:])[0] top_indices = similarities.argsort()[::-1][:top_n] return [(filenames[i], similarities[i]) for i in top_indices]