Spaces:
Build error
Build error
File size: 5,169 Bytes
4744234 1c98539 f7c13b9 1b15a9d 4dea241 4744234 4dea241 4744234 0c80d89 4744234 4dea241 1c98539 4dea241 4744234 1c98539 f7c13b9 1c98539 f7c13b9 4dea241 f7c13b9 4dea241 4744234 4dea241 4744234 f7c13b9 4744234 4dea241 4744234 4dea241 4744234 4dea241 4744234 4dea241 4744234 4dea241 4744234 80d2a33 4dea241 4744234 4dea241 4744234 4dea241 4687603 4744234 4dea241 4744234 4dea241 4744234 4dea241 4744234 4dea241 4744234 4dea241 4744234 4dea241 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 | # ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ถ๋ฌ์ค๊ธฐ
import streamlit as st
import pandas as pd
from dotenv import load_dotenv
import os
import openpyxl
from soynlp.noun import LRNounExtractor_v2
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
import openai
# ํ๊ฒฝ ๋ณ์ ๋ก๋
load_dotenv()
openai_api_key = os.getenv("OPENAI_API_KEY")
# OpenAI ํด๋ผ์ด์ธํธ ์์ฑ
openai.api_key = openai_api_key
# ์์
ํ์ผ ๊ฒฝ๋ก
file_path = 'contents.xlsx'
# ์์
ํ์ผ ๋ก๋
wb = openpyxl.load_workbook(file_path)
# ๋ช
์ฌ ์ถ์ถ๊ธฐ๋ ์ฑ ์์ ์ ํ ๋ฒ๋ง ์ด๊ธฐํ
noun_extractor = LRNounExtractor_v2()
@st.cache(allow_output_mutation=True)
def calculate_similarity_with_soynlp(text1, text2):
nouns1 = noun_extractor.train_extract([text1])
nouns2 = noun_extractor.train_extract([text2])
text1_nouns = ' '.join(list(nouns1.keys()))
text2_nouns = ' '.join(list(nouns2.keys()))
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform([text1_nouns, text2_nouns])
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
return cosine_sim[0][0]
@st.cache
def load_sheet_data(sheet_name):
ws = wb[sheet_name]
data = list(ws.values)
columns = data[0]
df = pd.DataFrame(data[1:], columns=columns)
return df
def filter_internal_contents(df, search_criteria, threshold=0.1):
filtered_contents = []
for _, row in df.iterrows():
intro_text = row['์๊ฐ']
if intro_text:
similarity_score = calculate_similarity_with_soynlp(search_criteria, intro_text.lower())
if similarity_score >= threshold:
filtered_contents.append({'์ ๋ชฉ': row['์ ๋ชฉ'], '์๊ฐ': row['์๊ฐ'], '์๊ฐ': row['์๊ฐ']})
return filtered_contents
# ์ถ๊ฐ๋ ํค์๋ ์ถ์ถ ํจ์
def extract_keywords(purpose, target):
prompt = f"ํ์ต ๋ชฉ์ '{purpose}'๊ณผ ๋์ '{target}'๋ฅผ ์ข
ํฉํ์ฌ ์ด๋ค ์ปค๋ฆฌํ๋ผ์ ํ์ต ์ปจํ
์ธ ๊ฐ ํ์ํ์ง๋ฅผ ์์ ํด์ฃผ์ธ์."
try:
response = openai.ChatCompletion.create(
model="gpt-4-0613", # gpt-4 ๋ชจ๋ธ ์ฌ์ฉ
messages=[{"role": "user", "content": prompt}]
)
return response['choices'][0]['message']['content'].strip().split(', ')
except Exception as e:
st.error(f"OpenAI API error: {e}")
return []
def recommend_with_gpt(sheet_name, purpose, target, time, internal_contents):
summary = ", ".join([f"{content['์ ๋ชฉ']}: {content['์๊ฐ']}" for content in internal_contents])
prompt = f"""
'{sheet_name}' ์นดํ
๊ณ ๋ฆฌ ๋ด์์ '{purpose}' ํ์ต ๋ชฉ์ ์ ๋ฌ์ฑํ๊ณ ์ ํ๋ '{target}' ๋์์ ์ํด,
์ฃผ์ด์ง '{time}' ์๊ฐ ๋ด์ ๋ค๋ฃฐ ์ ์๋ ์ปจํ
์ธ ๋ฅผ ์ถ์ฒํด์ฃผ์ธ์.
ํ์ฌ ํํฐ๋ง๋ ๋ด๋ถ ์ปจํ
์ธ ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค: {summary}
์ถ๊ฐ๋ก, ์ด ๋ด๋ถ ์ปจํ
์ธ ๋ฅผ ๋ณด์ํ๊ฑฐ๋ ํ์ฅํ ์ ์๋ ์ธ๋ถ ์ปจํ
์ธ (์: ์ฑ
, ์์, ์จ๋ผ์ธ ๊ฐ์ ๋ฑ)๋ ํจ๊ป ์ถ์ฒํด์ฃผ์ธ์.
"""
response = openai.Completion.create(
model="gpt-4-0613",
prompt=prompt,
temperature=0.6,
max_tokens=150,
top_p=1.0,
frequency_penalty=0.0,
presence_penalty=0.0
)
return response.choices[0].text.strip()
# ์คํธ๋ฆผ๋ฆฟ UI ๊ตฌ์ฑ
st.set_page_config(page_title="Curation", page_icon="๐ท")
st.title("์ปจํ
์ธ ํ๋ ์ด์
")
purpose = st.text_area("ํ์ต ๋ชฉ์ ์ ์
๋ ฅํด์ฃผ์ธ์", placeholder="์: ํ๋ณต ์ดํด ๋ฐ ์ ์ฉ")
target = st.text_area("ํ์ต ๋์์ ์
๋ ฅํ์ธ์", placeholder="์: ์ผ๋ฐ ๊ตฌ์ฑ์")
time = st.number_input("ํ์ต ์๊ฐ์ ์
๋ ฅํ์ธ์", min_value=1, max_value=120, value=30, step=1)
sheet_names = wb.sheetnames
sheet_name = st.selectbox("์ปจํ
์ธ ๋ฒ์๋ฅผ ์ ํํด์ฃผ์ธ์", sheet_names)
if st.button('์ปจํ
์ธ ์ถ์ฒ๋ฐ๊ธฐ'):
if not purpose or not target or not time:
st.error("๋ชจ๋ ํ๋๋ฅผ ์ฌ๋ฐ๋ฅด๊ฒ ์ฑ์์ฃผ์ธ์.")
else:
# ํค์๋ ์ถ์ถ ํจ์ ํธ์ถ ๋ฐ ๊ฒฐ๊ณผ ํ์
keywords = extract_keywords(purpose, target)
if keywords:
st.markdown("### ์ถ์ถ๋ ํค์๋")
st.write(", ".join(keywords))
else:
st.write("ํค์๋๋ฅผ ์ถ์ถํ์ง ๋ชปํ์ต๋๋ค.")
# ๋ด๋ถ ์ปจํ
์ธ ํํฐ๋ง ๋ฐ ํ์
df = load_sheet_data(sheet_name)
search_criteria = f"{purpose} {target}".lower()
internal_contents = filter_internal_contents(df, search_criteria, threshold=0.75)
if internal_contents:
st.subheader("๋ด๋ถ ์ปจํ
์ธ ํํฐ๋ง ๊ฒฐ๊ณผ")
for content in internal_contents:
with st.expander(f"{content['์ ๋ชฉ']} ({content['์๊ฐ']}๋ถ)"):
st.write(content['์๊ฐ'])
else:
st.write("๋ด๋ถ ์ปจํ
์ธ ๊ฐ ์์ต๋๋ค.")
external_recommendation = recommend_with_gpt(sheet_name, purpose, target, str(time), internal_contents)
st.subheader("์ถ์ฒ ์ปจํ
์ธ ๋ฆฌ์คํธ")
st.write(external_recommendation)
|