AD-Styles's picture
Rename app(1).py to app.py
0b61a2c verified
import os
import gradio as gr
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
# 1. ๋ฌธ์„œ ๋กœ๋“œ ๋ฐ ๋ฒกํ„ฐ DB ๊ตฌ์ถ• (์„œ๋ฒ„ ๊ตฌ๋™ ์‹œ 1ํšŒ ๊ณ ์ •)
loader = PyPDFLoader("Maximizing Muscle Hypertrophy.pdf")
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
# ๋ฏธ์…˜ 3: ๋„๋ฉ”์ธ ๋งž์ถค ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
SYSTEM_PROMPT = """๋‹น์‹ ์€ ์Šคํฌ์ธ  ์˜์–‘ํ•™ ๋ฐ ๊ทผ๋น„๋Œ€(Muscle Hypertrophy) ํ›ˆ๋ จ ๋ถ„์•ผ์˜ ์ตœ๊ณ  ๊ถŒ์œ„์ž์ด์ž ๋…ผ๋ฌธ ๋ฆฌ๋ทฐ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.
์ œ๊ณต๋œ [๋…ผ๋ฌธ ์ปจํ…์ŠคํŠธ]๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ์ „๋ฌธ์ ์ด๊ณ  ๋ช…ํ™•ํ•˜๋ฉฐ ๊ฐ๊ด€์ ์ธ ์–ด์กฐ๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”.
[์ œ์•ฝ ์กฐ๊ฑด]
1. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ์ปจํ…์ŠคํŠธ ๋‚ด์˜ ์ •๋ณด๋งŒ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”.
2. ๋…ผ๋ฌธ์— ์—†๋Š” ๋‚ด์šฉ์„ ์งˆ๋ฌธํ•˜๋ฉด "ํ•ด๋‹น ๋‚ด์šฉ์€ ์ œ๊ณต๋œ ๋…ผ๋ฌธ์—์„œ ํ™•์ธํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."๋ผ๊ณ  ๋ช…ํ™•ํžˆ ์„ ์„ ๊ทธ์œผ์„ธ์š”.
3. ๊ทผ์œก ์„ฑ์žฅ ๊ธฐ์ „์ด๋‚˜ ํ›ˆ๋ จ๋ฒ•์„ ์„ค๋ช…ํ•  ๋•Œ๋Š” ์ผ๋ฐ˜์ธ๋„ ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ๋‹จ๊ณ„๋ณ„๋กœ ๊ตฌ์กฐํ™”ํ•˜์—ฌ ์„ค๋ช…ํ•˜์„ธ์š”.
4. ๋ชจ๋“  ๋‹ต๋ณ€์€ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•˜๋ฉฐ, ์ฃผ์š” ์˜ํ•™ ๋ฐ ์šด๋™ํ•™ ์ „๋ฌธ ์šฉ์–ด๋Š” ๊ด„ํ˜ธ ์•ˆ์— ์˜๋ฌธ์„ ๋ณ‘๊ธฐํ•˜์„ธ์š” (์˜ˆ: ๋‹จ๋ฐฑ์งˆ ํ•ฉ์„ฑ(Protein Synthesis)).
[๋…ผ๋ฌธ ์ปจํ…์ŠคํŠธ]
{context}"""
qa_prompt = ChatPromptTemplate.from_messages([
("system", SYSTEM_PROMPT),
("placeholder", "{chat_history}"),
("human", "{input}"),
])
# Gradio์˜ ๋Œ€ํ™” ๊ธฐ๋ก ํ˜•์‹์„ LangChain์ด ์ดํ•ดํ•  ์ˆ˜ ์žˆ๊ฒŒ ๋ณ€ํ™˜ํ•˜๋Š” ํ—ฌํผ ํ•จ์ˆ˜
def format_history(history):
formatted = []
for user_msg, ai_msg in history:
formatted.append(("human", user_msg))
formatted.append(("ai", ai_msg))
return formatted
# ๋ฏธ์…˜ 1, 2, 5 ํ†ตํ•ฉ: ์ŠคํŠธ๋ฆฌ๋ฐ, ๋™์  ์„ค์ •, ์ถœ์ฒ˜ ํŒŒ์‹ฑ
def chat_response(message, history, temperature, k, model_name):
# ๋ฏธ์…˜ 2: UI์—์„œ ๋„˜๊ฒจ๋ฐ›์€ k ๊ฐ’์œผ๋กœ ๊ฒ€์ƒ‰ ๋ฒ”์œ„ ๋™์  ์กฐ์ ˆ
docs = vectorstore.similarity_search(message, k=k)
context = "\n\n".join(doc.page_content for doc in docs)
# ๋ฏธ์…˜ 2: UI์—์„œ ๋„˜๊ฒจ๋ฐ›์€ ๋ชจ๋ธ๊ณผ ์˜จ๋„๋กœ LLM ๋™์  ์ƒ์„ฑ
llm = ChatGoogleGenerativeAI(model=model_name, temperature=temperature)
# ํ”„๋กฌํ”„ํŠธ ์กฐ๋ฆฝ
prompt_value = qa_prompt.invoke({
"context": context,
"chat_history": format_history(history),
"input": message
})
partial_message = ""
# ๋ฏธ์…˜ 5: llm.stream()์„ ํ™œ์šฉํ•œ ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ์ถœ๋ ฅ
for chunk in llm.stream(prompt_value):
partial_message += chunk.content
yield partial_message # ๊ธ€์ž๊ฐ€ ์ƒ์„ฑ๋  ๋•Œ๋งˆ๋‹ค UI๋กœ ๋ฐ€์–ด๋ƒ„
# ๋ฏธ์…˜ 1: PyPDFLoader ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์—์„œ ์ถœ์ฒ˜ ๋ฐ ํŽ˜์ด์ง€ ์ถ”์ถœ (page๋Š” 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ +1)
sources = []
for doc in docs:
source_file = os.path.basename(doc.metadata.get('source', 'Unknown'))
page_num = doc.metadata.get('page', 0) + 1
sources.append(f"{source_file} (p.{page_num})")
# ๋ฆฌ์ŠคํŠธ ์ค‘๋ณต ์ œ๊ฑฐ ํ›„ ์ตœ์ข… ํ…์ŠคํŠธ ์กฐ๋ฆฝ
unique_sources = list(dict.fromkeys(sources))
source_str = "\n\n๐Ÿ“Ž **์ถœ์ฒ˜:** " + ", ".join(unique_sources)
# ์ตœ์ข…์ ์œผ๋กœ ๋‹ต๋ณ€ ๋์— ์ถœ์ฒ˜๋ฅผ ๋ง๋ถ™์—ฌ์„œ ์ „์†ก
yield partial_message + source_str
# ๋ฏธ์…˜ 4: ๋Œ€ํ™” ๋‚ด์—ญ ๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ ์ƒ์„ฑ ํ•จ์ˆ˜
def download_chat_history(history):
file_path = "chat_history.txt"
with open(file_path, "w", encoding="utf-8") as f:
for user_msg, ai_msg in history:
f.write(f"๐Ÿง‘โ€๐Ÿ’ป ์‚ฌ์šฉ์ž: {user_msg}\n")
f.write(f"๐Ÿค– AI: {ai_msg}\n")
f.write("-" * 50 + "\n")
return file_path
# UI ๋ ˆ์ด์•„์›ƒ ๊ตฌ์„ฑ
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## ๐Ÿ’ช ๊ทผ๋น„๋Œ€ ๊ทน๋Œ€ํ™” ๋…ผ๋ฌธ Q&A ๋ด‡ (Pro Version)")
# ๋ฏธ์…˜ 2: ์ ‘์„ ์ˆ˜ ์žˆ๋Š” ์„ค์ • ํŒจ๋„
with gr.Accordion("โš™๏ธ ์ฑ—๋ด‡ ์ƒ์„ธ ์„ค์ •", open=False):
with gr.Row():
model_dd = gr.Dropdown(choices=["gemini-2.0-flash", "gemini-1.5-pro", "gemini-1.5-flash"], value="gemini-2.0-flash", label="๐Ÿค– ๋ชจ๋ธ ์„ ํƒ")
temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="๐ŸŒก๏ธ Temperature (์ฐฝ์˜์„ฑ/ํ™˜๊ฐ ์กฐ์ ˆ)")
k_slider = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="๐Ÿ“š ์ฐธ๊ณ ํ•  ๋ฌธ์„œ ์กฐ๊ฐ ์ˆ˜ (k)")
# ํ•ต์‹ฌ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค (์„ค์ • ํŒจ๋„์˜ ๊ฐ’๋“ค์„ additional_inputs๋กœ ์—ฐ๊ฒฐ)
chat_interface = gr.ChatInterface(
fn=chat_response,
additional_inputs=[temp_slider, k_slider, model_dd],
chatbot=gr.Chatbot(height=500),
title="",
description="'Maximizing Muscle Hypertrophy' ๋…ผ๋ฌธ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ๊ทผ์„ฑ์žฅ ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ์งˆ๋ฌธํ•ด ๋ณด์„ธ์š”."
)
# ๋ฏธ์…˜ 4: ๋Œ€ํ™” ๋‚ด์—ญ ๋‹ค์šด๋กœ๋“œ ์˜์—ญ
with gr.Row():
download_btn = gr.Button("๐Ÿ’พ ํ˜„์žฌ ๋Œ€ํ™” ๋‚ด์—ญ ์ €์žฅ ๋ฐ ๋‹ค์šด๋กœ๋“œ", variant="primary")
download_file = gr.File(label="๋‹ค์šด๋กœ๋“œ ์ค€๋น„ ์™„๋ฃŒ (๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด์„ธ์š”)")
# ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ (์ฑ„ํŒ…์ฐฝ์˜ ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ๊ฐ€์ ธ์™€ ํŒŒ์ผ๋กœ ๋ณ€ํ™˜)
download_btn.click(
fn=download_chat_history,
inputs=[chat_interface.chatbot],
outputs=[download_file]
)
if __name__ == "__main__":
demo.launch()