import faiss import pandas as pd from sentence_transformers import SentenceTransformer import math from keybert import KeyBERT from kiwipiepy import Kiwi import urllib.parse import json def safe_int(val): """val이 None 또는 NaN이면 '' 반환, 아니면 int로 변환""" if val is None: return '' if isinstance(val, float) and math.isnan(val): return '' return int(val) def generate_dbpia_link(title): """논문 제목을 이용해 DBpia 검색 링크 생성""" base_url = "https://www.dbpia.co.kr/search/topSearch?searchOption=all&query=" encoded_title = urllib.parse.quote(title) return base_url + encoded_title def generate_reference(row): """ row: 딕셔너리 형태의 논문 정보 (예: DataFrame의 한 행) 반환값: 참고문헌 문자열 """ vol = safe_int(row.get('권')) issue = safe_int(row.get('호')) start_page = safe_int(row.get('시작페이지')) end_page = safe_int(row.get('끝페이지')) pages = f"{start_page}-{end_page}" if start_page != '' and end_page != '' else '' ref = f"{row.get('저자', '')}. ({safe_int(row.get('발행년'))}). {row.get('논문명(국문)', '')}. {row.get('학술지명(국문)', '')}" if vol != '' or issue != '': issue_str = f"({issue})" if issue != '' else '' ref += f", {vol}{issue_str}" if pages: ref += f", {pages}." else: ref += "." link = generate_dbpia_link(row.get('논문명(국문)', '')) return (ref,link) def refRecommend(model,kw_model,kiwi,text,df,index): nouns_list = [] for sentence in kiwi.analyze(text): nouns = [token.form for token in sentence[0] if token.tag.startswith('NN')] if nouns: nouns_list.extend(nouns) result_text = ' '.join(nouns_list) keywords = kw_model.extract_keywords(result_text, keyphrase_ngram_range=(1, 1), stop_words=None, top_n=5) query_vector = model.encode([keywords[0][0]+" "+keywords[1][0]]) D, I = index.search(query_vector, k=3) # k는 찾고 싶은 개수 (예: top-5) results = df.iloc[I[0]] # I[0]은 top-k 결과의 인덱스 리스트 name = [] link = [] # 6. 예시 출력 for i, row in results.iterrows(): name_result,link_result = generate_reference(row) print(name_result) name.append(name_result) link.append(link_result) # print(f"{i+1}. 제목: {row['논문명(국문)']} / 키워드: {row['키워드(국문)']}") # print(f"{row['저자']}. ({row['발행년']}). {row['논문명(국문)']}. {row['학술지명(국문)']}, {int(row['권'])}({int(row['호'])}), {int(row['시작페이지'])}-{int(row['끝페이지'])}") return name,link