File size: 5,169 Bytes
4744234
 
 
 
 
 
1c98539
f7c13b9
1b15a9d
4dea241
4744234
 
 
 
 
 
4dea241
4744234
 
0c80d89
4744234
 
 
 
4dea241
 
 
 
1c98539
4dea241
 
4744234
1c98539
 
 
f7c13b9
1c98539
f7c13b9
 
 
 
4dea241
 
 
 
 
 
 
f7c13b9
4dea241
 
 
 
 
 
 
 
4744234
 
4dea241
4744234
f7c13b9
4744234
4dea241
 
 
4744234
4dea241
4744234
4dea241
4744234
 
 
4dea241
4744234
 
 
 
 
 
 
4dea241
 
 
 
 
 
 
 
 
 
 
4744234
 
80d2a33
4dea241
4744234
4dea241
 
 
4744234
4dea241
 
4687603
4744234
4dea241
4744234
 
4dea241
4744234
4dea241
 
 
 
 
 
 
 
 
 
4744234
 
4dea241
4744234
4dea241
 
4744234
4dea241
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# ํ•„์š”ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
import streamlit as st
import pandas as pd
from dotenv import load_dotenv
import os
import openpyxl
from soynlp.noun import LRNounExtractor_v2
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import openai

# ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")

# OpenAI ํด๋ผ์ด์–ธํŠธ ์ƒ์„ฑ
openai.api_key = openai_api_key

# ์—‘์…€ ํŒŒ์ผ ๊ฒฝ๋กœ
file_path = 'contents.xlsx'

# ์—‘์…€ ํŒŒ์ผ ๋กœ๋“œ
wb = openpyxl.load_workbook(file_path)

# ๋ช…์‚ฌ ์ถ”์ถœ๊ธฐ๋Š” ์•ฑ ์‹œ์ž‘ ์‹œ ํ•œ ๋ฒˆ๋งŒ ์ดˆ๊ธฐํ™”
noun_extractor = LRNounExtractor_v2()

@st.cache(allow_output_mutation=True)
def calculate_similarity_with_soynlp(text1, text2):
    nouns1 = noun_extractor.train_extract([text1])
    nouns2 = noun_extractor.train_extract([text2])

    text1_nouns = ' '.join(list(nouns1.keys()))
    text2_nouns = ' '.join(list(nouns2.keys()))

    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([text1_nouns, text2_nouns])
    cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])

    return cosine_sim[0][0]

@st.cache
def load_sheet_data(sheet_name):
    ws = wb[sheet_name]
    data = list(ws.values)
    columns = data[0]
    df = pd.DataFrame(data[1:], columns=columns)
    return df

def filter_internal_contents(df, search_criteria, threshold=0.1):
    filtered_contents = []
    for _, row in df.iterrows():
        intro_text = row['์†Œ๊ฐœ']
        if intro_text:
            similarity_score = calculate_similarity_with_soynlp(search_criteria, intro_text.lower())
            if similarity_score >= threshold:
                filtered_contents.append({'์ œ๋ชฉ': row['์ œ๋ชฉ'], '์†Œ๊ฐœ': row['์†Œ๊ฐœ'], '์‹œ๊ฐ„': row['์‹œ๊ฐ„']})
    return filtered_contents

# ์ถ”๊ฐ€๋œ ํ‚ค์›Œ๋“œ ์ถ”์ถœ ํ•จ์ˆ˜
def extract_keywords(purpose, target):
    prompt = f"ํ•™์Šต ๋ชฉ์  '{purpose}'๊ณผ ๋Œ€์ƒ '{target}'๋ฅผ ์ข…ํ•ฉํ•˜์—ฌ ์–ด๋–ค ์ปค๋ฆฌํ˜๋Ÿผ์˜ ํ•™์Šต ์ปจํ…์ธ ๊ฐ€ ํ•„์š”ํ•œ์ง€๋ฅผ ์„œ์ˆ ํ•ด์ฃผ์„ธ์š”."
    try:
        response = openai.ChatCompletion.create(
            model="gpt-4-0613",  # gpt-4 ๋ชจ๋ธ ์‚ฌ์šฉ
            messages=[{"role": "user", "content": prompt}]
        )
        return response['choices'][0]['message']['content'].strip().split(', ')
    except Exception as e:
        st.error(f"OpenAI API error: {e}")
        return []

def recommend_with_gpt(sheet_name, purpose, target, time, internal_contents):
    summary = ", ".join([f"{content['์ œ๋ชฉ']}: {content['์†Œ๊ฐœ']}" for content in internal_contents])
    
    prompt = f"""
        '{sheet_name}' ์นดํ…Œ๊ณ ๋ฆฌ ๋‚ด์—์„œ '{purpose}' ํ•™์Šต ๋ชฉ์ ์„ ๋‹ฌ์„ฑํ•˜๊ณ ์ž ํ•˜๋Š” '{target}' ๋Œ€์ƒ์„ ์œ„ํ•ด,
        ์ฃผ์–ด์ง„ '{time}' ์‹œ๊ฐ„ ๋‚ด์— ๋‹ค๋ฃฐ ์ˆ˜ ์žˆ๋Š” ์ปจํ…์ธ ๋ฅผ ์ถ”์ฒœํ•ด์ฃผ์„ธ์š”.  
        ํ˜„์žฌ ํ•„ํ„ฐ๋ง๋œ ๋‚ด๋ถ€ ์ปจํ…์ธ ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค: {summary}
        ์ถ”๊ฐ€๋กœ, ์ด ๋‚ด๋ถ€ ์ปจํ…์ธ ๋ฅผ ๋ณด์™„ํ•˜๊ฑฐ๋‚˜ ํ™•์žฅํ•  ์ˆ˜ ์žˆ๋Š” ์™ธ๋ถ€ ์ปจํ…์ธ (์˜ˆ: ์ฑ…, ์˜์ƒ, ์˜จ๋ผ์ธ ๊ฐ•์˜ ๋“ฑ)๋„ ํ•จ๊ป˜ ์ถ”์ฒœํ•ด์ฃผ์„ธ์š”.
    """
    
    response = openai.Completion.create(
        model="gpt-4-0613",
        prompt=prompt,
        temperature=0.6,
        max_tokens=150,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )
    return response.choices[0].text.strip()

# ์ŠคํŠธ๋ฆผ๋ฆฟ UI ๊ตฌ์„ฑ
st.set_page_config(page_title="Curation", page_icon="๐ŸŒท")
st.title("์ปจํ…์ธ  ํ๋ ˆ์ด์…˜")

purpose = st.text_area("ํ•™์Šต ๋ชฉ์ ์„ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”", placeholder="์˜ˆ: ํ–‰๋ณต ์ดํ•ด ๋ฐ ์ ์šฉ")
target = st.text_area("ํ•™์Šต ๋Œ€์ƒ์„ ์ž…๋ ฅํ•˜์„ธ์š”", placeholder="์˜ˆ: ์ผ๋ฐ˜ ๊ตฌ์„ฑ์›")
time = st.number_input("ํ•™์Šต ์‹œ๊ฐ„์„ ์ž…๋ ฅํ•˜์„ธ์š”", min_value=1, max_value=120, value=30, step=1)

sheet_names = wb.sheetnames
sheet_name = st.selectbox("์ปจํ…์ธ  ๋ฒ”์œ„๋ฅผ ์„ ํƒํ•ด์ฃผ์„ธ์š”", sheet_names)

if st.button('์ปจํ…์ธ  ์ถ”์ฒœ๋ฐ›๊ธฐ'):
    if not purpose or not target or not time:
        st.error("๋ชจ๋“  ํ•„๋“œ๋ฅผ ์˜ฌ๋ฐ”๋ฅด๊ฒŒ ์ฑ„์›Œ์ฃผ์„ธ์š”.")
    else:
        # ํ‚ค์›Œ๋“œ ์ถ”์ถœ ํ•จ์ˆ˜ ํ˜ธ์ถœ ๋ฐ ๊ฒฐ๊ณผ ํ‘œ์‹œ
        keywords = extract_keywords(purpose, target)
        if keywords:
            st.markdown("### ์ถ”์ถœ๋œ ํ‚ค์›Œ๋“œ")
            st.write(", ".join(keywords))
        else:
            st.write("ํ‚ค์›Œ๋“œ๋ฅผ ์ถ”์ถœํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.")

        # ๋‚ด๋ถ€ ์ปจํ…์ธ  ํ•„ํ„ฐ๋ง ๋ฐ ํ‘œ์‹œ
        df = load_sheet_data(sheet_name)
        search_criteria = f"{purpose} {target}".lower()
        internal_contents = filter_internal_contents(df, search_criteria, threshold=0.75)

        if internal_contents:
            st.subheader("๋‚ด๋ถ€ ์ปจํ…์ธ  ํ•„ํ„ฐ๋ง ๊ฒฐ๊ณผ")
            for content in internal_contents:
                with st.expander(f"{content['์ œ๋ชฉ']} ({content['์‹œ๊ฐ„']}๋ถ„)"):
                    st.write(content['์†Œ๊ฐœ'])
        else:
            st.write("๋‚ด๋ถ€ ์ปจํ…์ธ ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")

        external_recommendation = recommend_with_gpt(sheet_name, purpose, target, str(time), internal_contents)
        st.subheader("์ถ”์ฒœ ์ปจํ…์ธ  ๋ฆฌ์ŠคํŠธ")
        st.write(external_recommendation)