CSB261's picture
Update app.py
6a0352d verified
import os
import pandas as pd
import gradio as gr
from datetime import datetime
import openai
##############################
# [LLM์ž๋ฃŒ] ์—์„œ ์ œ๊ณต๋œ ์ฝ”๋“œ ์‹œ์ž‘
##############################
# OpenAI API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
openai.api_key = os.getenv("OPENAI_API_KEY")
def call_api(content, system_message, max_tokens, temperature, top_p):
response = openai.ChatCompletion.create(
model="gpt-4o-mini", # ๋ฐ˜๋“œ์‹œ gpt-4o-mini ๋ชจ๋ธ ์‚ฌ์šฉ
messages=[
{"role": "system", "content": system_message},
{"role": "user", "content": content},
],
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
return response.choices[0].message['content']
##############################
# [LLM์ž๋ฃŒ] ์—์„œ ์ œ๊ณต๋œ ์ฝ”๋“œ ๋
##############################
##############################
# [๊ธฐ๋ณธ์ฝ”๋“œ] ์‹œ์ž‘ (์ˆ˜์ • ๋ฐ ์‚ญ์ œ ๋ถˆ๊ฐ€)
##############################
def read_excel_data(file):
df = pd.read_excel(file, usecols="A, B, C, D, E", skiprows=1,
names=["ID", "Review Date", "Option", "Review", "ReviewScore"], engine='openpyxl')
df['Review Date'] = pd.to_datetime(df['Review Date']).dt.tz_localize(None).dt.date
df['Year'] = df['Review Date'].astype(str).str.slice(0, 4)
df['Option1'] = df['Option'].astype(str).str.split(" / ").str[0] # 1์ฐจ ์˜ต์…˜ ์ถ”์ถœ
df['Review Length'] = df['Review'].str.len() # ๋ฆฌ๋ทฐ ๊ธธ์ด ๊ณ„์‚ฐ
return df
def get_positive_reviews(df):
positive_reviews = df[df['ReviewScore'] >= 4].sort_values(by='Review Length', ascending=False)
positive_reviews = positive_reviews.head(20) # ์ƒ์œ„ 20๊ฐœ ๋ฆฌ๋ทฐ ์„ ํƒ
positive_reviews.reset_index(drop=True, inplace=True)
positive_reviews.index += 1
positive_reviews['์ˆœ๋ฒˆ'] = positive_reviews.index
positive_output = "\n\n".join(positive_reviews.apply(
lambda x: f"{x['์ˆœ๋ฒˆ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
return positive_output
def get_negative_reviews(df):
negative_reviews = df[df['ReviewScore'] <= 2].sort_values(by='Review Length', ascending=False)
negative_reviews = negative_reviews.head(30) # ์ƒ์œ„ 30๊ฐœ ๋ฆฌ๋ทฐ ์„ ํƒ
negative_reviews.reset_index(drop=True, inplace=True)
negative_reviews.index += 1
negative_reviews['์ˆœ๋ฒˆ'] = negative_reviews.index
negative_output = "\n\n".join(negative_reviews.apply(
lambda x: f"{x['์ˆœ๋ฒˆ']}. **{x['Review Date']} / {x['ID']} / {x['Option']}**\n\n{x['Review']}", axis=1))
return negative_output
def process_reviews(file):
df = read_excel_data(file)
positive_reviews = get_positive_reviews(df)
negative_reviews = get_negative_reviews(df)
return positive_reviews, negative_reviews
##############################
# [๊ธฐ๋ณธ์ฝ”๋“œ] ๋ (์ˆ˜์ • ๋ฐ ์‚ญ์ œ ๋ถˆ๊ฐ€)
##############################
# LLM ๋ถ„์„์„ ์œ„ํ•œ ํ—ฌํผ ํ•จ์ˆ˜
def analyze_with_llm(review_content, system_prompt):
# review_content: ๊ธ์ • ํ˜น์€ ๋ถ€์ • ๋ฆฌ๋ทฐ ํ…์ŠคํŠธ
# system_prompt: ์‹œ์Šคํ…œ ์—ญํ•  ํ”„๋กฌํ”„ํŠธ
analysis_result = call_api(
content=review_content,
system_message=system_prompt,
max_tokens=500,
temperature=0.7,
top_p=1.0
)
return analysis_result
# ๋ฆฌ๋ทฐ๋ฅผ ์ถ”์ถœํ•œ ๋’ค, ์ž๋™์œผ๋กœ LLM ๋ถ„์„๊นŒ์ง€ ์ˆ˜ํ–‰
def process_reviews_and_analyze(file):
# ๊ธฐ์กด ๋ฆฌ๋ทฐ ์ถ”์ถœ
positive_reviews, negative_reviews = process_reviews(file)
# ๊ธ์ • ๋ฆฌ๋ทฐ ๋ถ„์„
system_prompt_positive = (
"๋‹น์‹ ์€ ์ „๋ฌธ ๋ฆฌ๋ทฐ ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค. ์•„๋ž˜์—๋Š” ๊ธ์ • ๋ฆฌ๋ทฐ๋“ค์ด ๋‚˜์—ด๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.\n"
"์ด ๋ฆฌ๋ทฐ๋“ค์—์„œ ๊ณ ๊ฐ์ด ๋งŒ์กฑํ•ดํ•˜๋Š” ์ฃผ์š” ํฌ์ธํŠธ์™€ ํŠน์ง•์„ ์š”์•ฝํ•ด์ฃผ๊ณ , "
"์ถ”๊ฐ€์ ์ธ ์ธ์‚ฌ์ดํŠธ๋ฅผ ์ œ์‹œํ•ด ์ฃผ์„ธ์š”."
)
positive_analysis = analyze_with_llm(positive_reviews, system_prompt_positive)
# ๋ถ€์ • ๋ฆฌ๋ทฐ ๋ถ„์„
system_prompt_negative = (
"๋‹น์‹ ์€ ์ „๋ฌธ ๋ฆฌ๋ทฐ ๋ถ„์„๊ฐ€์ž…๋‹ˆ๋‹ค. ์•„๋ž˜์—๋Š” ๋ถ€์ • ๋ฆฌ๋ทฐ๋“ค์ด ๋‚˜์—ด๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค.\n"
"์ด ๋ฆฌ๋ทฐ๋“ค์—์„œ ๊ณ ๊ฐ์ด ๋ถˆ๋งŒ์„ ๊ฐ€์ง€๋Š” ์ฃผ์š” ํฌ์ธํŠธ์™€ ํŠน์ง•์„ ์š”์•ฝํ•ด์ฃผ๊ณ , "
"๊ฐœ์„ ์ ์„ ์ œ์‹œํ•ด ์ฃผ์„ธ์š”."
)
negative_analysis = analyze_with_llm(negative_reviews, system_prompt_negative)
return positive_reviews, negative_reviews, positive_analysis, negative_analysis
# Gradio ์ธํ„ฐํŽ˜์ด์Šค ๊ตฌ์„ฑ
def create_interface():
with gr.Blocks() as demo:
gr.Markdown("### ๋ฆฌ๋ทฐ ๋ฐ์ดํ„ฐ ์—…๋กœ๋“œ")
file_input = gr.File(label="์—‘์…€ ํŒŒ์ผ ์—…๋กœ๋“œ", file_types=[".xlsx"])
# ๋ฒ„ํŠผ๋ช… ๋ณ€๊ฒฝ: "๋ฆฌ๋ทฐ๋ถ„์„"
analyze_button = gr.Button("๋ฆฌ๋ทฐ๋ถ„์„")
with gr.Column():
gr.Markdown("### ๊ธ์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ (์ตœ๋Œ€ 20๊ฐœ)")
positive_reviews_output = gr.Textbox(label="๊ธ์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ", interactive=False, lines=20)
gr.Markdown("### ๋ถ€์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ (์ตœ๋Œ€ 30๊ฐœ)")
negative_reviews_output = gr.Textbox(label="๋ถ€์ •์ ์ธ ์ฃผ์š” ๋ฆฌ๋ทฐ", interactive=False, lines=30)
# LLM ๋ถ„์„ ๊ฒฐ๊ณผ๋ฅผ ํ‘œ์‹œํ•  ์˜์—ญ
gr.Markdown("### ๊ธ์ • ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ")
positive_analysis_output = gr.Textbox(label="๊ธ์ • ๋ฆฌ๋ทฐ ๋ถ„์„", interactive=False, lines=7)
gr.Markdown("### ๋ถ€์ • ๋ฆฌ๋ทฐ ๋ถ„์„ ๊ฒฐ๊ณผ")
negative_analysis_output = gr.Textbox(label="๋ถ€์ • ๋ฆฌ๋ทฐ ๋ถ„์„", interactive=False, lines=7)
# ๋ฆฌ๋ทฐ ์ถ”์ถœ + LLM ๋ถ„์„๊นŒ์ง€ ํ•œ ๋ฒˆ์— ์ˆ˜ํ–‰
analyze_button.click(
fn=process_reviews_and_analyze,
inputs=[file_input],
outputs=[
positive_reviews_output,
negative_reviews_output,
positive_analysis_output,
negative_analysis_output
]
)
return demo
if __name__ == "__main__":
interface = create_interface()
interface.launch()