|
|
import faiss
|
|
|
import pandas as pd
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
import math
|
|
|
from keybert import KeyBERT
|
|
|
from kiwipiepy import Kiwi
|
|
|
import urllib.parse
|
|
|
import json
|
|
|
|
|
|
def safe_int(val):
|
|
|
"""val์ด None ๋๋ NaN์ด๋ฉด '' ๋ฐํ, ์๋๋ฉด int๋ก ๋ณํ"""
|
|
|
if val is None:
|
|
|
return ''
|
|
|
if isinstance(val, float) and math.isnan(val):
|
|
|
return ''
|
|
|
return int(val)
|
|
|
|
|
|
def generate_dbpia_link(title):
|
|
|
"""๋
ผ๋ฌธ ์ ๋ชฉ์ ์ด์ฉํด DBpia ๊ฒ์ ๋งํฌ ์์ฑ"""
|
|
|
base_url = "https://www.dbpia.co.kr/search/topSearch?searchOption=all&query="
|
|
|
encoded_title = urllib.parse.quote(title)
|
|
|
return base_url + encoded_title
|
|
|
|
|
|
def generate_reference(row):
|
|
|
"""
|
|
|
row: ๋์
๋๋ฆฌ ํํ์ ๋
ผ๋ฌธ ์ ๋ณด (์: DataFrame์ ํ ํ)
|
|
|
๋ฐํ๊ฐ: ์ฐธ๊ณ ๋ฌธํ ๋ฌธ์์ด
|
|
|
"""
|
|
|
vol = safe_int(row.get('๊ถ'))
|
|
|
issue = safe_int(row.get('ํธ'))
|
|
|
start_page = safe_int(row.get('์์ํ์ด์ง'))
|
|
|
end_page = safe_int(row.get('๋ํ์ด์ง'))
|
|
|
|
|
|
pages = f"{start_page}-{end_page}" if start_page != '' and end_page != '' else ''
|
|
|
|
|
|
ref = f"{row.get('์ ์', '')}. ({safe_int(row.get('๋ฐํ๋
'))}). {row.get('๋
ผ๋ฌธ๋ช
(๊ตญ๋ฌธ)', '')}. {row.get('ํ์ ์ง๋ช
(๊ตญ๋ฌธ)', '')}"
|
|
|
|
|
|
if vol != '' or issue != '':
|
|
|
issue_str = f"({issue})" if issue != '' else ''
|
|
|
ref += f", {vol}{issue_str}"
|
|
|
|
|
|
if pages:
|
|
|
ref += f", {pages}."
|
|
|
else:
|
|
|
ref += "."
|
|
|
|
|
|
link = generate_dbpia_link(row.get('๋
ผ๋ฌธ๋ช
(๊ตญ๋ฌธ)', ''))
|
|
|
return (ref,link)
|
|
|
|
|
|
def refRecommend(model,kw_model,kiwi,text,df,index):
|
|
|
nouns_list = []
|
|
|
for sentence in kiwi.analyze(text):
|
|
|
nouns = [token.form for token in sentence[0] if token.tag.startswith('NN')]
|
|
|
if nouns:
|
|
|
nouns_list.extend(nouns)
|
|
|
result_text = ' '.join(nouns_list)
|
|
|
|
|
|
keywords = kw_model.extract_keywords(result_text, keyphrase_ngram_range=(1, 1), stop_words=None, top_n=5)
|
|
|
|
|
|
query_vector = model.encode([keywords[0][0]+" "+keywords[1][0]])
|
|
|
D, I = index.search(query_vector, k=3)
|
|
|
|
|
|
results = df.iloc[I[0]]
|
|
|
|
|
|
name = []
|
|
|
link = []
|
|
|
|
|
|
for i, row in results.iterrows():
|
|
|
name_result,link_result = generate_reference(row)
|
|
|
print(name_result)
|
|
|
name.append(name_result)
|
|
|
link.append(link_result)
|
|
|
|
|
|
|
|
|
return name,link
|
|
|
|