Spaces:
Sleeping
Sleeping
| import os | |
| import pandas as pd | |
| import gradio as gr | |
| from datetime import datetime | |
| import openai | |
| ############################## | |
| # [LLM์๋ฃ] ์์ ์ ๊ณต๋ ์ฝ๋ ์์ | |
| ############################## | |
| # OpenAI API ํด๋ผ์ด์ธํธ ์ค์ | |
| openai.api_key = os.getenv("OPENAI_API_KEY") | |
| def call_api(content, system_message, max_tokens, temperature, top_p): | |
| response = openai.ChatCompletion.create( | |
| model="gpt-4o-mini", # ๋ฐ๋์ gpt-4o-mini ๋ชจ๋ธ ์ฌ์ฉ | |
| messages=[ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": content}, | |
| ], | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| top_p=top_p, | |
| ) | |
| return response.choices[0].message['content'] | |
| ############################## | |
| # [LLM์๋ฃ] ์์ ์ ๊ณต๋ ์ฝ๋ ๋ | |
| ############################## | |
| ############################## | |
| # [๊ธฐ๋ณธ์ฝ๋] ์์ (์์ ๋ฐ ์ญ์ ๋ถ๊ฐ) | |
| ############################## | |
| def read_excel_data(file): | |
| df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1, | |
| names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl') | |
| df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date | |
| df['Year'] = df['Review Date'].astype(str).str.slice(0, 4) | |
| df['Option1'] = df['Option'].astype(str).str.split(" / ").str[0] # 1์ฐจ ์ต์ ์ถ์ถ | |
| df['Review Length'] = df['Review'].str.len() # ๋ฆฌ๋ทฐ ๊ธธ์ด ๊ณ์ฐ | |
| return df | |
| def get_positive_reviews(df): | |
| positive_reviews = df[df['ReviewScore'] >= 4].sort_values(by='Review Length', ascending=False) | |
| positive_reviews = positive_reviews.head(20) # ์์ 20๊ฐ ๋ฆฌ๋ทฐ ์ ํ | |
| positive_reviews.reset_index(drop=True, inplace=True) | |
| positive_reviews.index += 1 | |
| positive_reviews['์๋ฒ'] = positive_reviews.index | |
| positive_output = "\n\n".join(positive_reviews.apply( | |
| lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1)) | |
| return positive_output | |
| def get_negative_reviews(df): | |
| negative_reviews = df[df['ReviewScore'] <= 2].sort_values(by='Review Length', ascending=False) | |
| negative_reviews = negative_reviews.head(30) # ์์ 30๊ฐ ๋ฆฌ๋ทฐ ์ ํ | |
| negative_reviews.reset_index(drop=True, inplace=True) | |
| negative_reviews.index += 1 | |
| negative_reviews['์๋ฒ'] = negative_reviews.index | |
| negative_output = "\n\n".join(negative_reviews.apply( | |
| lambda x: f"{x['์๋ฒ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1)) | |
| return negative_output | |
| def process_reviews(file): | |
| df = read_excel_data(file) | |
| positive_reviews = get_positive_reviews(df) | |
| negative_reviews = get_negative_reviews(df) | |
| return positive_reviews, negative_reviews | |
| ############################## | |
| # [๊ธฐ๋ณธ์ฝ๋] ๋ (์์ ๋ฐ ์ญ์ ๋ถ๊ฐ) | |
| ############################## | |
| # LLM ๋ถ์์ ์ํ ํฌํผ ํจ์ | |
| def analyze_with_llm(review_content, system_prompt): | |
| # review_content: ๊ธ์ ํน์ ๋ถ์ ๋ฆฌ๋ทฐ ํ ์คํธ | |
| # system_prompt: ์์คํ ์ญํ ํ๋กฌํํธ | |
| analysis_result = call_api( | |
| content=review_content, | |
| system_message=system_prompt, | |
| max_tokens=500, | |
| temperature=0.7, | |
| top_p=1.0 | |
| ) | |
| return analysis_result | |
| # ๋ฆฌ๋ทฐ๋ฅผ ์ถ์ถํ ๋ค, ์๋์ผ๋ก LLM ๋ถ์๊น์ง ์ํ | |
| def process_reviews_and_analyze(file): | |
| # ๊ธฐ์กด ๋ฆฌ๋ทฐ ์ถ์ถ | |
| positive_reviews, negative_reviews = process_reviews(file) | |
| # ๊ธ์ ๋ฆฌ๋ทฐ ๋ถ์ | |
| system_prompt_positive = ( | |
| "๋น์ ์ ์ ๋ฌธ ๋ฆฌ๋ทฐ ๋ถ์๊ฐ์ ๋๋ค. ์๋์๋ ๊ธ์ ๋ฆฌ๋ทฐ๋ค์ด ๋์ด๋์ด ์์ต๋๋ค.\n" | |
| "์ด ๋ฆฌ๋ทฐ๋ค์์ ๊ณ ๊ฐ์ด ๋ง์กฑํดํ๋ ์ฃผ์ ํฌ์ธํธ์ ํน์ง์ ์์ฝํด์ฃผ๊ณ , " | |
| "์ถ๊ฐ์ ์ธ ์ธ์ฌ์ดํธ๋ฅผ ์ ์ํด ์ฃผ์ธ์." | |
| ) | |
| positive_analysis = analyze_with_llm(positive_reviews, system_prompt_positive) | |
| # ๋ถ์ ๋ฆฌ๋ทฐ ๋ถ์ | |
| system_prompt_negative = ( | |
| "๋น์ ์ ์ ๋ฌธ ๋ฆฌ๋ทฐ ๋ถ์๊ฐ์ ๋๋ค. ์๋์๋ ๋ถ์ ๋ฆฌ๋ทฐ๋ค์ด ๋์ด๋์ด ์์ต๋๋ค.\n" | |
| "์ด ๋ฆฌ๋ทฐ๋ค์์ ๊ณ ๊ฐ์ด ๋ถ๋ง์ ๊ฐ์ง๋ ์ฃผ์ ํฌ์ธํธ์ ํน์ง์ ์์ฝํด์ฃผ๊ณ , " | |
| "๊ฐ์ ์ ์ ์ ์ํด ์ฃผ์ธ์." | |
| ) | |
| negative_analysis = analyze_with_llm(negative_reviews, system_prompt_negative) | |
| return positive_reviews, negative_reviews, positive_analysis, negative_analysis | |
| # Gradio ์ธํฐํ์ด์ค ๊ตฌ์ฑ | |
| def create_interface(): | |
| with gr.Blocks() as demo: | |
| gr.Markdown("### ๋ฆฌ๋ทฐ ๋ฐ์ดํฐ ์ ๋ก๋") | |
| file_input = gr.File(label="์์ ํ์ผ ์ ๋ก๋", file_types=[".xlsx"]) | |
| # ๋ฒํผ๋ช ๋ณ๊ฒฝ: "๋ฆฌ๋ทฐ๋ถ์" | |
| analyze_button = gr.Button("๋ฆฌ๋ทฐ๋ถ์") | |
| with gr.Column(): | |
| gr.Markdown("### ๊ธ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ (์ต๋ 20๊ฐ)") | |
| positive_reviews_output = gr.Textbox(label="๊ธ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ", interactive=False, lines=20) | |
| gr.Markdown("### ๋ถ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ (์ต๋ 30๊ฐ)") | |
| negative_reviews_output = gr.Textbox(label="๋ถ์ ์ ์ธ ์ฃผ์ ๋ฆฌ๋ทฐ", interactive=False, lines=30) | |
| # LLM ๋ถ์ ๊ฒฐ๊ณผ๋ฅผ ํ์ํ ์์ญ | |
| gr.Markdown("### ๊ธ์ ๋ฆฌ๋ทฐ ๋ถ์ ๊ฒฐ๊ณผ") | |
| positive_analysis_output = gr.Textbox(label="๊ธ์ ๋ฆฌ๋ทฐ ๋ถ์", interactive=False, lines=7) | |
| gr.Markdown("### ๋ถ์ ๋ฆฌ๋ทฐ ๋ถ์ ๊ฒฐ๊ณผ") | |
| negative_analysis_output = gr.Textbox(label="๋ถ์ ๋ฆฌ๋ทฐ ๋ถ์", interactive=False, lines=7) | |
| # ๋ฆฌ๋ทฐ ์ถ์ถ + LLM ๋ถ์๊น์ง ํ ๋ฒ์ ์ํ | |
| analyze_button.click( | |
| fn=process_reviews_and_analyze, | |
| inputs=[file_input], | |
| outputs=[ | |
| positive_reviews_output, | |
| negative_reviews_output, | |
| positive_analysis_output, | |
| negative_analysis_output | |
| ] | |
| ) | |
| return demo | |
| if __name__ == "__main__": | |
| interface = create_interface() | |
| interface.launch() | |