File size: 5,925 Bytes
0668b2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import gradio as gr
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate

# 1. ๋ฌธ์„œ ๋กœ๋“œ ๋ฐ ๋ฒกํ„ฐ DB ๊ตฌ์ถ• (์„œ๋ฒ„ ๊ตฌ๋™ ์‹œ 1ํšŒ ๊ณ ์ •)
loader = PyPDFLoader("Maximizing Muscle Hypertrophy.pdf")
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)

embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

# ๋ฏธ์…˜ 3: ๋„๋ฉ”์ธ ๋งž์ถค ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
SYSTEM_PROMPT = """๋‹น์‹ ์€ ์Šคํฌ์ธ  ์˜์–‘ํ•™ ๋ฐ ๊ทผ๋น„๋Œ€(Muscle Hypertrophy) ํ›ˆ๋ จ ๋ถ„์•ผ์˜ ์ตœ๊ณ  ๊ถŒ์œ„์ž์ด์ž ๋…ผ๋ฌธ ๋ฆฌ๋ทฐ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค.

์ œ๊ณต๋œ [๋…ผ๋ฌธ ์ปจํ…์ŠคํŠธ]๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ์ „๋ฌธ์ ์ด๊ณ  ๋ช…ํ™•ํ•˜๋ฉฐ ๊ฐ๊ด€์ ์ธ ์–ด์กฐ๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”.



[์ œ์•ฝ ์กฐ๊ฑด]

1. ๋ฐ˜๋“œ์‹œ ์ œ๊ณต๋œ ์ปจํ…์ŠคํŠธ ๋‚ด์˜ ์ •๋ณด๋งŒ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ํ•˜์„ธ์š”.

2. ๋…ผ๋ฌธ์— ์—†๋Š” ๋‚ด์šฉ์„ ์งˆ๋ฌธํ•˜๋ฉด "ํ•ด๋‹น ๋‚ด์šฉ์€ ์ œ๊ณต๋œ ๋…ผ๋ฌธ์—์„œ ํ™•์ธํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."๋ผ๊ณ  ๋ช…ํ™•ํžˆ ์„ ์„ ๊ทธ์œผ์„ธ์š”.

3. ๊ทผ์œก ์„ฑ์žฅ ๊ธฐ์ „์ด๋‚˜ ํ›ˆ๋ จ๋ฒ•์„ ์„ค๋ช…ํ•  ๋•Œ๋Š” ์ผ๋ฐ˜์ธ๋„ ์ดํ•ดํ•˜๊ธฐ ์‰ฝ๊ฒŒ ๋‹จ๊ณ„๋ณ„๋กœ ๊ตฌ์กฐํ™”ํ•˜์—ฌ ์„ค๋ช…ํ•˜์„ธ์š”.

4. ๋ชจ๋“  ๋‹ต๋ณ€์€ ํ•œ๊ตญ์–ด๋กœ ์ž‘์„ฑํ•˜๋ฉฐ, ์ฃผ์š” ์˜ํ•™ ๋ฐ ์šด๋™ํ•™ ์ „๋ฌธ ์šฉ์–ด๋Š” ๊ด„ํ˜ธ ์•ˆ์— ์˜๋ฌธ์„ ๋ณ‘๊ธฐํ•˜์„ธ์š” (์˜ˆ: ๋‹จ๋ฐฑ์งˆ ํ•ฉ์„ฑ(Protein Synthesis)).



[๋…ผ๋ฌธ ์ปจํ…์ŠคํŠธ]

{context}"""

qa_prompt = ChatPromptTemplate.from_messages([
    ("system", SYSTEM_PROMPT),
    ("placeholder", "{chat_history}"),
    ("human", "{input}"),
])

# Gradio์˜ ๋Œ€ํ™” ๊ธฐ๋ก ํ˜•์‹์„ LangChain์ด ์ดํ•ดํ•  ์ˆ˜ ์žˆ๊ฒŒ ๋ณ€ํ™˜ํ•˜๋Š” ํ—ฌํผ ํ•จ์ˆ˜
def format_history(history):
    formatted = []
    for user_msg, ai_msg in history:
        formatted.append(("human", user_msg))
        formatted.append(("ai", ai_msg))
    return formatted

# ๋ฏธ์…˜ 1, 2, 5 ํ†ตํ•ฉ: ์ŠคํŠธ๋ฆฌ๋ฐ, ๋™์  ์„ค์ •, ์ถœ์ฒ˜ ํŒŒ์‹ฑ
def chat_response(message, history, temperature, k, model_name):
    # ๋ฏธ์…˜ 2: UI์—์„œ ๋„˜๊ฒจ๋ฐ›์€ k ๊ฐ’์œผ๋กœ ๊ฒ€์ƒ‰ ๋ฒ”์œ„ ๋™์  ์กฐ์ ˆ
    docs = vectorstore.similarity_search(message, k=k)
    context = "\n\n".join(doc.page_content for doc in docs)
    
    # ๋ฏธ์…˜ 2: UI์—์„œ ๋„˜๊ฒจ๋ฐ›์€ ๋ชจ๋ธ๊ณผ ์˜จ๋„๋กœ LLM ๋™์  ์ƒ์„ฑ
    llm = ChatGoogleGenerativeAI(model=model_name, temperature=temperature)
    
    # ํ”„๋กฌํ”„ํŠธ ์กฐ๋ฆฝ
    prompt_value = qa_prompt.invoke({
        "context": context,
        "chat_history": format_history(history),
        "input": message
    })
    
    partial_message = ""
    # ๋ฏธ์…˜ 5: llm.stream()์„ ํ™œ์šฉํ•œ ์‹ค์‹œ๊ฐ„ ์ŠคํŠธ๋ฆฌ๋ฐ ์ถœ๋ ฅ
    for chunk in llm.stream(prompt_value):
        partial_message += chunk.content
        yield partial_message # ๊ธ€์ž๊ฐ€ ์ƒ์„ฑ๋  ๋•Œ๋งˆ๋‹ค UI๋กœ ๋ฐ€์–ด๋ƒ„
        
    # ๋ฏธ์…˜ 1: PyPDFLoader ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ์—์„œ ์ถœ์ฒ˜ ๋ฐ ํŽ˜์ด์ง€ ์ถ”์ถœ (page๋Š” 0๋ถ€ํ„ฐ ์‹œ์ž‘ํ•˜๋ฏ€๋กœ +1)
    sources = []
    for doc in docs:
        source_file = os.path.basename(doc.metadata.get('source', 'Unknown'))
        page_num = doc.metadata.get('page', 0) + 1
        sources.append(f"{source_file} (p.{page_num})")
        
    # ๋ฆฌ์ŠคํŠธ ์ค‘๋ณต ์ œ๊ฑฐ ํ›„ ์ตœ์ข… ํ…์ŠคํŠธ ์กฐ๋ฆฝ
    unique_sources = list(dict.fromkeys(sources))
    source_str = "\n\n๐Ÿ“Ž **์ถœ์ฒ˜:** " + ", ".join(unique_sources)
    
    # ์ตœ์ข…์ ์œผ๋กœ ๋‹ต๋ณ€ ๋์— ์ถœ์ฒ˜๋ฅผ ๋ง๋ถ™์—ฌ์„œ ์ „์†ก
    yield partial_message + source_str

# ๋ฏธ์…˜ 4: ๋Œ€ํ™” ๋‚ด์—ญ ๋‹ค์šด๋กœ๋“œ ํŒŒ์ผ ์ƒ์„ฑ ํ•จ์ˆ˜
def download_chat_history(history):
    file_path = "chat_history.txt"
    with open(file_path, "w", encoding="utf-8") as f:
        for user_msg, ai_msg in history:
            f.write(f"๐Ÿง‘โ€๐Ÿ’ป ์‚ฌ์šฉ์ž: {user_msg}\n")
            f.write(f"๐Ÿค– AI: {ai_msg}\n")
            f.write("-" * 50 + "\n")
    return file_path

# UI ๋ ˆ์ด์•„์›ƒ ๊ตฌ์„ฑ
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## ๐Ÿ’ช ๊ทผ๋น„๋Œ€ ๊ทน๋Œ€ํ™” ๋…ผ๋ฌธ Q&A ๋ด‡ (Pro Version)")
    
    # ๋ฏธ์…˜ 2: ์ ‘์„ ์ˆ˜ ์žˆ๋Š” ์„ค์ • ํŒจ๋„
    with gr.Accordion("โš™๏ธ ์ฑ—๋ด‡ ์ƒ์„ธ ์„ค์ •", open=False):
        with gr.Row():
            model_dd = gr.Dropdown(choices=["gemini-2.0-flash", "gemini-1.5-pro", "gemini-1.5-flash"], value="gemini-2.0-flash", label="๐Ÿค– ๋ชจ๋ธ ์„ ํƒ")
            temp_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.0, step=0.1, label="๐ŸŒก๏ธ Temperature (์ฐฝ์˜์„ฑ/ํ™˜๊ฐ ์กฐ์ ˆ)")
            k_slider = gr.Slider(minimum=1, maximum=10, value=3, step=1, label="๐Ÿ“š ์ฐธ๊ณ ํ•  ๋ฌธ์„œ ์กฐ๊ฐ ์ˆ˜ (k)")

    # ํ•ต์‹ฌ ์ฑ—๋ด‡ ์ธํ„ฐํŽ˜์ด์Šค (์„ค์ • ํŒจ๋„์˜ ๊ฐ’๋“ค์„ additional_inputs๋กœ ์—ฐ๊ฒฐ)
    chat_interface = gr.ChatInterface(
        fn=chat_response,
        additional_inputs=[temp_slider, k_slider, model_dd],
        chatbot=gr.Chatbot(height=500),
        title="",
        description="'Maximizing Muscle Hypertrophy' ๋…ผ๋ฌธ ๋‚ด์šฉ์„ ๋ฐ”ํƒ•์œผ๋กœ ๊ทผ์„ฑ์žฅ ๋ฉ”์ปค๋‹ˆ์ฆ˜์„ ์งˆ๋ฌธํ•ด ๋ณด์„ธ์š”."
    )
    
    # ๋ฏธ์…˜ 4: ๋Œ€ํ™” ๋‚ด์—ญ ๋‹ค์šด๋กœ๋“œ ์˜์—ญ
    with gr.Row():
        download_btn = gr.Button("๐Ÿ’พ ํ˜„์žฌ ๋Œ€ํ™” ๋‚ด์—ญ ์ €์žฅ ๋ฐ ๋‹ค์šด๋กœ๋“œ", variant="primary")
        download_file = gr.File(label="๋‹ค์šด๋กœ๋“œ ์ค€๋น„ ์™„๋ฃŒ (๋ฒ„ํŠผ์„ ๋ˆ„๋ฅด์„ธ์š”)")
        
    # ๋ฒ„ํŠผ ํด๋ฆญ ์ด๋ฒคํŠธ (์ฑ„ํŒ…์ฐฝ์˜ ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ๊ฐ€์ ธ์™€ ํŒŒ์ผ๋กœ ๋ณ€ํ™˜)
    download_btn.click(
        fn=download_chat_history, 
        inputs=[chat_interface.chatbot], 
        outputs=[download_file]
    )

if __name__ == "__main__":
    demo.launch()