Spaces:
Build error
Build error
| # ํ์ํ ๋ผ์ด๋ธ๋ฌ๋ฆฌ ๋ถ๋ฌ์ค๊ธฐ | |
| import streamlit as st | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| import os | |
| import openpyxl | |
| from soynlp.noun import LRNounExtractor_v2 | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| import openai | |
| # ํ๊ฒฝ ๋ณ์ ๋ก๋ | |
| load_dotenv() | |
| openai_api_key = os.getenv("OPENAI_API_KEY") | |
| # OpenAI ํด๋ผ์ด์ธํธ ์์ฑ | |
| openai.api_key = openai_api_key | |
| # ์์ ํ์ผ ๊ฒฝ๋ก | |
| file_path = 'contents.xlsx' | |
| # ์์ ํ์ผ ๋ก๋ | |
| wb = openpyxl.load_workbook(file_path) | |
| # ๋ช ์ฌ ์ถ์ถ๊ธฐ๋ ์ฑ ์์ ์ ํ ๋ฒ๋ง ์ด๊ธฐํ | |
| noun_extractor = LRNounExtractor_v2() | |
| def calculate_similarity_with_soynlp(text1, text2): | |
| nouns1 = noun_extractor.train_extract([text1]) | |
| nouns2 = noun_extractor.train_extract([text2]) | |
| text1_nouns = ' '.join(list(nouns1.keys())) | |
| text2_nouns = ' '.join(list(nouns2.keys())) | |
| vectorizer = TfidfVectorizer() | |
| tfidf_matrix = vectorizer.fit_transform([text1_nouns, text2_nouns]) | |
| cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2]) | |
| return cosine_sim[0][0] | |
| def load_sheet_data(sheet_name): | |
| ws = wb[sheet_name] | |
| data = list(ws.values) | |
| columns = data[0] | |
| df = pd.DataFrame(data[1:], columns=columns) | |
| return df | |
| def filter_internal_contents(df, search_criteria, threshold=0.1): | |
| filtered_contents = [] | |
| for _, row in df.iterrows(): | |
| intro_text = row['์๊ฐ'] | |
| if intro_text: | |
| similarity_score = calculate_similarity_with_soynlp(search_criteria, intro_text.lower()) | |
| if similarity_score >= threshold: | |
| filtered_contents.append({'์ ๋ชฉ': row['์ ๋ชฉ'], '์๊ฐ': row['์๊ฐ'], '์๊ฐ': row['์๊ฐ']}) | |
| return filtered_contents | |
| # ์ถ๊ฐ๋ ํค์๋ ์ถ์ถ ํจ์ | |
| def extract_keywords(purpose, target): | |
| prompt = f"ํ์ต ๋ชฉ์ '{purpose}'๊ณผ ๋์ '{target}'๋ฅผ ์ข ํฉํ์ฌ ์ด๋ค ์ปค๋ฆฌํ๋ผ์ ํ์ต ์ปจํ ์ธ ๊ฐ ํ์ํ์ง๋ฅผ ์์ ํด์ฃผ์ธ์." | |
| try: | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4-0613", # gpt-4 ๋ชจ๋ธ ์ฌ์ฉ | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response['choices'][0]['message']['content'].strip().split(', ') | |
| except Exception as e: | |
| st.error(f"OpenAI API error: {e}") | |
| return [] | |
| def recommend_with_gpt(sheet_name, purpose, target, time, internal_contents): | |
| summary = ", ".join([f"{content['์ ๋ชฉ']}: {content['์๊ฐ']}" for content in internal_contents]) | |
| prompt = f""" | |
| '{sheet_name}' ์นดํ ๊ณ ๋ฆฌ ๋ด์์ '{purpose}' ํ์ต ๋ชฉ์ ์ ๋ฌ์ฑํ๊ณ ์ ํ๋ '{target}' ๋์์ ์ํด, | |
| ์ฃผ์ด์ง '{time}' ์๊ฐ ๋ด์ ๋ค๋ฃฐ ์ ์๋ ์ปจํ ์ธ ๋ฅผ ์ถ์ฒํด์ฃผ์ธ์. | |
| ํ์ฌ ํํฐ๋ง๋ ๋ด๋ถ ์ปจํ ์ธ ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค: {summary} | |
| ์ถ๊ฐ๋ก, ์ด ๋ด๋ถ ์ปจํ ์ธ ๋ฅผ ๋ณด์ํ๊ฑฐ๋ ํ์ฅํ ์ ์๋ ์ธ๋ถ ์ปจํ ์ธ (์: ์ฑ , ์์, ์จ๋ผ์ธ ๊ฐ์ ๋ฑ)๋ ํจ๊ป ์ถ์ฒํด์ฃผ์ธ์. | |
| """ | |
| response = openai.Completion.create( | |
| model="gpt-4-0613", | |
| prompt=prompt, | |
| temperature=0.6, | |
| max_tokens=150, | |
| top_p=1.0, | |
| frequency_penalty=0.0, | |
| presence_penalty=0.0 | |
| ) | |
| return response.choices[0].text.strip() | |
| # ์คํธ๋ฆผ๋ฆฟ UI ๊ตฌ์ฑ | |
| st.set_page_config(page_title="Curation", page_icon="๐ท") | |
| st.title("์ปจํ ์ธ ํ๋ ์ด์ ") | |
| purpose = st.text_area("ํ์ต ๋ชฉ์ ์ ์ ๋ ฅํด์ฃผ์ธ์", placeholder="์: ํ๋ณต ์ดํด ๋ฐ ์ ์ฉ") | |
| target = st.text_area("ํ์ต ๋์์ ์ ๋ ฅํ์ธ์", placeholder="์: ์ผ๋ฐ ๊ตฌ์ฑ์") | |
| time = st.number_input("ํ์ต ์๊ฐ์ ์ ๋ ฅํ์ธ์", min_value=1, max_value=120, value=30, step=1) | |
| sheet_names = wb.sheetnames | |
| sheet_name = st.selectbox("์ปจํ ์ธ ๋ฒ์๋ฅผ ์ ํํด์ฃผ์ธ์", sheet_names) | |
| if st.button('์ปจํ ์ธ ์ถ์ฒ๋ฐ๊ธฐ'): | |
| if not purpose or not target or not time: | |
| st.error("๋ชจ๋ ํ๋๋ฅผ ์ฌ๋ฐ๋ฅด๊ฒ ์ฑ์์ฃผ์ธ์.") | |
| else: | |
| # ํค์๋ ์ถ์ถ ํจ์ ํธ์ถ ๋ฐ ๊ฒฐ๊ณผ ํ์ | |
| keywords = extract_keywords(purpose, target) | |
| if keywords: | |
| st.markdown("### ์ถ์ถ๋ ํค์๋") | |
| st.write(", ".join(keywords)) | |
| else: | |
| st.write("ํค์๋๋ฅผ ์ถ์ถํ์ง ๋ชปํ์ต๋๋ค.") | |
| # ๋ด๋ถ ์ปจํ ์ธ ํํฐ๋ง ๋ฐ ํ์ | |
| df = load_sheet_data(sheet_name) | |
| search_criteria = f"{purpose} {target}".lower() | |
| internal_contents = filter_internal_contents(df, search_criteria, threshold=0.75) | |
| if internal_contents: | |
| st.subheader("๋ด๋ถ ์ปจํ ์ธ ํํฐ๋ง ๊ฒฐ๊ณผ") | |
| for content in internal_contents: | |
| with st.expander(f"{content['์ ๋ชฉ']} ({content['์๊ฐ']}๋ถ)"): | |
| st.write(content['์๊ฐ']) | |
| else: | |
| st.write("๋ด๋ถ ์ปจํ ์ธ ๊ฐ ์์ต๋๋ค.") | |
| external_recommendation = recommend_with_gpt(sheet_name, purpose, target, str(time), internal_contents) | |
| st.subheader("์ถ์ฒ ์ปจํ ์ธ ๋ฆฌ์คํธ") | |
| st.write(external_recommendation) | |