Song commited on
Commit
a44c13d
·
1 Parent(s): 656d2f6
Files changed (3) hide show
  1. README.md +5 -8
  2. app.py +169 -0
  3. requirements.txt +13 -0
README.md CHANGED
@@ -1,12 +1,9 @@
1
  ---
2
- title: SimRAG
3
- emoji: 🚀
4
- colorFrom: green
5
- colorTo: pink
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Simple RAG System
3
+ emoji: 🤖
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
 
7
  app_file: app.py
8
  pinned: false
9
+ ---
 
 
app.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ from typing import List, Dict, Any
4
+
5
+ import gradio as gr
6
+ from langchain_community.document_loaders import TextLoader, PyPDFLoader, Docx2txtLoader
7
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
+ from langchain_core.prompts import PromptTemplate
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.runnables import RunnablePassthrough
13
+ from langchain_openai import ChatOpenAI
14
+
15
+
16
+ # LLM 設定(強烈建議使用環境變數)
17
+ llm = ChatOpenAI(
18
+ base_url=os.getenv("LITELLM_BASE_URL"),
19
+ api_key=os.getenv("OPENAI_API_KEY"),
20
+ model=os.getenv("LLM_MODEL", "azure-gpt-4.1"), # 改用更常見的預設模型
21
+ temperature=0.3,
22
+ )
23
+
24
+ # Embedding 模型(中文效果很好的小模型)
25
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
26
+
27
+
28
+ def format_docs(docs):
29
+ return "\n\n".join(doc.page_content for doc in docs)
30
+
31
+
32
+ prompt = PromptTemplate.from_template(
33
+ """你是一個有幫助且誠實的助手,請根據以下提供的上下文來回答問題。
34
+ 如果上下文不足以回答,請直接說「根據提供的文件,我無法回答這個問題。」,不要編造答案。
35
+
36
+ 上下文:
37
+ {context}
38
+
39
+ 問題:{question}
40
+ 回答:"""
41
+ )
42
+
43
+
44
+ # 上傳並建立向量庫
45
+ def upload_and_build_db(files: List[Any], vectorstore_state: FAISS | None):
46
+ if not files:
47
+ return "請上傳至少一個文件。", None, vectorstore_state
48
+
49
+ docs = []
50
+ for file in files:
51
+ # Gradio 上傳的 file 是 tempfile.NamedTemporaryFile 物件,有 .name 屬性
52
+ file_path = file.name
53
+
54
+ try:
55
+ if file_path.lower().endswith(".pdf"):
56
+ loader = PyPDFLoader(file_path)
57
+ elif file_path.lower().endswith(".docx"):
58
+ loader = Docx2txtLoader(file_path)
59
+ else:
60
+ loader = TextLoader(file_path, encoding="utf-8")
61
+
62
+ loaded_docs = loader.load()
63
+ docs.extend(loaded_docs)
64
+ except Exception as e:
65
+ return f"載入檔案失敗:{os.path.basename(file_path)},錯誤:{str(e)}", None, vectorstore_state
66
+
67
+ if not docs:
68
+ return "沒有成功載入任何文件內容。", None, vectorstore_state
69
+
70
+ # 分塊
71
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
72
+ splits = text_splitter.split_documents(docs)
73
+
74
+ # 建立新向量庫
75
+ new_vectorstore = FAISS.from_documents(splits, embeddings)
76
+
77
+ success_msg = f"成功載入 {len(docs)} 個文件,共 {len(splits)} 個區塊,已建立專屬知識庫!現在可以開始提問。"
78
+ return success_msg, None, new_vectorstore
79
+
80
+
81
+ # RAG 回答(使用新版 Gradio messages 格式)
82
+ def rag_answer(question: str, history: List[Dict], vectorstore_state: FAISS | None):
83
+ if vectorstore_state is None:
84
+ new_message = {"role": "assistant", "content": "請先上傳文件並點擊「建立知識庫」。"}
85
+ return "", history + [new_message]
86
+
87
+ retriever = vectorstore_state.as_retriever(search_kwargs={"k": 4})
88
+
89
+ chain = (
90
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
91
+ | prompt
92
+ | llm
93
+ | StrOutputParser()
94
+ )
95
+
96
+ try:
97
+ response = chain.invoke(question)
98
+ except Exception as e:
99
+ response = f"回答時發生錯誤:{str(e)}"
100
+
101
+ # 新格式:加入 user 和 assistant 兩條訊息
102
+ new_history = history + [
103
+ {"role": "user", "content": question},
104
+ {"role": "assistant", "content": response}
105
+ ]
106
+
107
+ return "", new_history
108
+
109
+
110
+ # 清除聊天記錄
111
+ def clear_chat():
112
+ return "", []
113
+
114
+
115
+ # Gradio 介面
116
+ with gr.Blocks(title="個人 RAG 問答系統", theme=gr.themes.Soft()) as demo:
117
+ gr.Markdown("# 📚 個人 RAG 問答系統\n上傳你的 TXT、PDF、DOCX 文件,建立專屬知識庫,然後向它提問!")
118
+
119
+ vectorstore_state = gr.State(None)
120
+
121
+ with gr.Row():
122
+ file_input = gr.File(
123
+ label="上傳文件(支援 .txt、.pdf、.docx,可多檔)",
124
+ file_count="multiple",
125
+ type="filepath"
126
+ )
127
+
128
+ with gr.Row():
129
+ build_btn = gr.Button("建立知識庫", variant="primary", scale=1)
130
+ clear_btn = gr.Button("清除對話", variant="secondary", scale=1)
131
+
132
+ status = gr.Textbox(label="狀態訊息", interactive=False)
133
+
134
+ # 使用新版 messages 格式,明確初始化為空列表
135
+ chatbot = gr.Chatbot(
136
+ height=500,
137
+ value=[],
138
+ label="對話紀錄",
139
+ avatar_images=("https://em-content.zobj.net/source/apple/391/man-technologist_1f468-200d-1f4bb.png",
140
+ "https://em-content.zobj.net/source/apple/391/robot_1f916.png")
141
+ )
142
+
143
+ msg = gr.Textbox(
144
+ label="你的問題",
145
+ placeholder="在這裡輸入問題,按 Enter 送出...",
146
+ scale=7
147
+ )
148
+
149
+ # 事件綁定
150
+ build_btn.click(
151
+ fn=upload_and_build_db,
152
+ inputs=[file_input, vectorstore_state],
153
+ outputs=[status, file_input, vectorstore_state] # 清空檔案列表避免重複上傳
154
+ )
155
+
156
+ msg.submit(
157
+ fn=rag_answer,
158
+ inputs=[msg, chatbot, vectorstore_state],
159
+ outputs=[msg, chatbot]
160
+ )
161
+
162
+ clear_btn.click(
163
+ fn=clear_chat,
164
+ inputs=None,
165
+ outputs=[msg, chatbot]
166
+ )
167
+
168
+ # Hugging Face Spaces 建議加上 share=True 產生公開連結(本地測試可關閉)
169
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain
3
+ langchain-huggingface
4
+ langchain_community
5
+ langchain-openai
6
+ langchain-core
7
+ langchain-text-splitters # 新增這行
8
+ faiss-cpu
9
+ pypdf
10
+ python-docx
11
+ sentence-transformers
12
+ huggingface-hub
13
+ openai