hallu11 commited on
Commit
b4f2bc7
·
verified ·
1 Parent(s): 19f31bf

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (2).py +136 -0
  2. requirements (2).txt +9 -0
app (2).py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import fitz # PyMuPDF
4
+ import gradio as gr
5
+ from groq import Groq
6
+ from langchain_core.documents import Document
7
+ from langchain_huggingface import HuggingFaceEmbeddings # ✅ 수정된 임포트
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain_community.vectorstores.utils import DistanceStrategy
10
+ from langchain_groq import ChatGroq
11
+ from langchain.chains import RetrievalQA
12
+ from langchain.prompts import PromptTemplate
13
+
14
+ # ✅ 1. GROQ API Key 환경변수에서 불러오기
15
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
+
17
+ # ✅ 2. PDF 파일 로딩 및 텍스트 추출
18
+ all_documents = []
19
+
20
+ def load_and_extract(file):
21
+ global all_documents
22
+ pdf_texts = []
23
+
24
+ with fitz.open(file.name) as doc:
25
+ text = "".join(page.get_text() for page in doc)
26
+ pdf_texts.append({"filename": file.name, "text": text})
27
+
28
+ all_documents = [
29
+ Document(page_content=doc["text"], metadata={"source": doc["filename"]})
30
+ for doc in pdf_texts
31
+ ]
32
+
33
+ # ✅ 3. 임베딩 모델 설정
34
+ embedding_model = HuggingFaceEmbeddings(
35
+ model_name="jhgan/ko-sbert-nli",
36
+ model_kwargs={"device": "cpu"},
37
+ encode_kwargs={"normalize_embeddings": True}
38
+ )
39
+
40
+ # ✅ 4. 문서 필터링
41
+ def filter_documents_by_keyword(docs, keyword):
42
+ keyword_lower = keyword.lower()
43
+ return [doc for doc in docs if keyword_lower in doc.page_content.lower()]
44
+
45
+ # ✅ 5. QA 체인 빌더
46
+ def build_qa_chain(filtered_docs):
47
+ if not filtered_docs:
48
+ return None
49
+
50
+ vectorstore = FAISS.from_documents(
51
+ documents=filtered_docs,
52
+ embedding=embedding_model,
53
+ distance_strategy=DistanceStrategy.COSINE
54
+ )
55
+
56
+ retriever = vectorstore.as_retriever(
57
+ search_type="mmr",
58
+ search_kwargs={"k": 5, "lambda_mult": 0.2}
59
+ )
60
+
61
+ llm = ChatGroq(model_name="llama3-8b-8192", temperature=0.1)
62
+
63
+ prompt = PromptTemplate(
64
+ input_variables=["context", "question"],
65
+ template="""
66
+ 당신은 문화 프로그램에 대해 친절하고 정확하게 설명하는 한국어 도우미입니다.
67
+
68
+ 문서 내용:
69
+ {context}
70
+
71
+ 질문: {question}
72
+
73
+ 지침:
74
+ - 반드시 한국어로 답변해주세요
75
+ - 문서에 없으면 "죄송하지만 해당 정보는 찾을 수 없습니다"라고 답변하세요
76
+ """
77
+ )
78
+
79
+ return RetrievalQA.from_chain_type(
80
+ llm=llm,
81
+ chain_type="stuff",
82
+ retriever=retriever,
83
+ chain_type_kwargs={"prompt": prompt},
84
+ return_source_documents=False
85
+ )
86
+
87
+ # ✅ 6. Gradio 인터페이스
88
+ chat_history = []
89
+ current_chain = None
90
+ current_keyword = ""
91
+
92
+ def handle_chat(message, keyword):
93
+ global current_chain, current_keyword
94
+
95
+ if not all_documents:
96
+ return "", [("❗ PDF 파일을 먼저 업로드해주세요.", "")]
97
+
98
+ if not keyword.strip():
99
+ return "", [("❗ 키워드를 입력해주세요.", "")]
100
+
101
+ if keyword != current_keyword:
102
+ filtered = filter_documents_by_keyword(all_documents, keyword)
103
+ current_chain = build_qa_chain(filtered)
104
+ current_keyword = keyword
105
+
106
+ if not current_chain:
107
+ return "", [(f"'{keyword}' 관련 문서를 찾을 수 없습니다.", "")]
108
+
109
+ response = current_chain({"query": message})
110
+ answer = response["result"]
111
+ chat_history.append((f"🙋‍♂️ {message}", f"🤖 {answer}"))
112
+ return "", chat_history
113
+
114
+ def clear_history():
115
+ global chat_history
116
+ chat_history = []
117
+ return chat_history
118
+
119
+ with gr.Blocks(title="오아시스 챗봇 Musesis") as demo:
120
+ gr.Markdown("### 📚 오아시스 PDF 기반 문화 Q&A 챗봇 (Musesis)")
121
+
122
+ file_upload = gr.File(label="📎 PDF 업로드", file_types=[".pdf"], type="file")
123
+ chatbot = gr.Chatbot(label="대화", height=400)
124
+ keyword_input = gr.Textbox(label="🔍 키워드", placeholder="예: 단오축제, 문화학교")
125
+ question_input = gr.Textbox(label="✉️ 질문", placeholder="질문을 입력하세요", lines=2)
126
+
127
+ with gr.Row():
128
+ submit_btn = gr.Button("질문하기 💬")
129
+ clear_btn = gr.Button("대화 초기화 🧹")
130
+
131
+ file_upload.change(fn=load_and_extract, inputs=file_upload)
132
+ submit_btn.click(fn=handle_chat, inputs=[question_input, keyword_input], outputs=[question_input, chatbot])
133
+ question_input.submit(fn=handle_chat, inputs=[question_input, keyword_input], outputs=[question_input, chatbot])
134
+ clear_btn.click(fn=clear_history, outputs=chatbot)
135
+
136
+ demo.launch()
requirements (2).txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ groq
3
+ PyMuPDF
4
+ langchain
5
+ langchain-community
6
+ langchain-groq
7
+ langchain-huggingface
8
+ faiss-cpu
9
+ sentence-transformers