File size: 2,807 Bytes
5ea2b9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import faiss
import pandas as pd
from sentence_transformers import SentenceTransformer
import math
from keybert import KeyBERT
from kiwipiepy import Kiwi
import urllib.parse
import json

def safe_int(val):
    """val์ด None ๋˜๋Š” NaN์ด๋ฉด '' ๋ฐ˜ํ™˜, ์•„๋‹ˆ๋ฉด int๋กœ ๋ณ€ํ™˜"""
    if val is None:
        return ''
    if isinstance(val, float) and math.isnan(val):
        return ''
    return int(val)

def generate_dbpia_link(title):
    """๋…ผ๋ฌธ ์ œ๋ชฉ์„ ์ด์šฉํ•ด DBpia ๊ฒ€์ƒ‰ ๋งํฌ ์ƒ์„ฑ"""
    base_url = "https://www.dbpia.co.kr/search/topSearch?searchOption=all&query="
    encoded_title = urllib.parse.quote(title)
    return base_url + encoded_title

def generate_reference(row):
    """

    row: ๋”•์…”๋„ˆ๋ฆฌ ํ˜•ํƒœ์˜ ๋…ผ๋ฌธ ์ •๋ณด (์˜ˆ: DataFrame์˜ ํ•œ ํ–‰)

    ๋ฐ˜ํ™˜๊ฐ’: ์ฐธ๊ณ ๋ฌธํ—Œ ๋ฌธ์ž์—ด

    """
    vol = safe_int(row.get('๊ถŒ'))
    issue = safe_int(row.get('ํ˜ธ'))
    start_page = safe_int(row.get('์‹œ์ž‘ํŽ˜์ด์ง€'))
    end_page = safe_int(row.get('๋ํŽ˜์ด์ง€'))

    pages = f"{start_page}-{end_page}" if start_page != '' and end_page != '' else ''

    ref = f"{row.get('์ €์ž', '')}. ({safe_int(row.get('๋ฐœํ–‰๋…„'))}). {row.get('๋…ผ๋ฌธ๋ช…(๊ตญ๋ฌธ)', '')}. {row.get('ํ•™์ˆ ์ง€๋ช…(๊ตญ๋ฌธ)', '')}"
    
    if vol != '' or issue != '':
        issue_str = f"({issue})" if issue != '' else ''
        ref += f", {vol}{issue_str}"
    
    if pages:
        ref += f", {pages}."
    else:
        ref += "."

    link = generate_dbpia_link(row.get('๋…ผ๋ฌธ๋ช…(๊ตญ๋ฌธ)', ''))
    return (ref,link)

def refRecommend(model,kw_model,kiwi,text,df,index):
    nouns_list = []
    for sentence in kiwi.analyze(text):
        nouns = [token.form for token in sentence[0] if token.tag.startswith('NN')]
        if nouns:
            nouns_list.extend(nouns)
    result_text = ' '.join(nouns_list)

    keywords = kw_model.extract_keywords(result_text, keyphrase_ngram_range=(1, 1), stop_words=None, top_n=5)

    query_vector = model.encode([keywords[0][0]+" "+keywords[1][0]])
    D, I = index.search(query_vector, k=3)  # k๋Š” ์ฐพ๊ณ  ์‹ถ์€ ๊ฐœ์ˆ˜ (์˜ˆ: top-5)

    results = df.iloc[I[0]]  # I[0]์€ top-k ๊ฒฐ๊ณผ์˜ ์ธ๋ฑ์Šค ๋ฆฌ์ŠคํŠธ

    name = []
    link = []
# 6. ์˜ˆ์‹œ ์ถœ๋ ฅ
    for i, row in results.iterrows():
        name_result,link_result = generate_reference(row)
        print(name_result)
        name.append(name_result)
        link.append(link_result)
    # print(f"{i+1}. ์ œ๋ชฉ: {row['๋…ผ๋ฌธ๋ช…(๊ตญ๋ฌธ)']} / ํ‚ค์›Œ๋“œ: {row['ํ‚ค์›Œ๋“œ(๊ตญ๋ฌธ)']}")
    # print(f"{row['์ €์ž']}. ({row['๋ฐœํ–‰๋…„']}). {row['๋…ผ๋ฌธ๋ช…(๊ตญ๋ฌธ)']}. {row['ํ•™์ˆ ์ง€๋ช…(๊ตญ๋ฌธ)']}, {int(row['๊ถŒ'])}({int(row['ํ˜ธ'])}), {int(row['์‹œ์ž‘ํŽ˜์ด์ง€'])}-{int(row['๋ํŽ˜์ด์ง€'])}")
    return name,link