Spaces:
Sleeping
Sleeping
| import os, re, requests, base64 | |
| from langchain_core.documents import Document | |
| from langchain_chroma import Chroma | |
| from openai import OpenAI | |
| from langchain.embeddings.base import Embeddings | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| import chromadb | |
| import gradio as gr | |
| from langchain.memory import ConversationBufferMemory | |
| from langchain.chains import LLMChain | |
| from langchain.prompts import ChatPromptTemplate | |
| # ============================================= | |
| # 1️⃣ 自訂 LM Studio Embedding 類別 | |
| # ============================================= | |
| class LmStudioEmbeddings(Embeddings): | |
| def __init__(self, model_name, url): | |
| self.model_name = model_name | |
| self.client = OpenAI(base_url=url, api_key="lm-studio") | |
| def embed_query(self, text: str): | |
| res = self.client.embeddings.create(input=text, model=self.model_name) | |
| return res.data[0].embedding | |
| def embed_documents(self, texts: list[str]): | |
| res = self.client.embeddings.create(input=texts, model=self.model_name) | |
| return [x.embedding for x in res.data] | |
| # ============================================= | |
| # 2️⃣ 載入 QA 檔案並分類 | |
| # ============================================= | |
| path = "/Users/adamlin/Library/CloudStorage/OneDrive-個人/QA/QA_v2.txt" | |
| with open(path, "r", encoding="utf-8") as f: | |
| text = f.read() | |
| pattern = r"(Q[::].*?)(?=Q[::]|$)" | |
| qas = re.findall(pattern, text, flags=re.S) | |
| qa_docs = {"證券": [], "期貨": [], "複委託": []} | |
| for qa in qas: | |
| if "證券" in qa: | |
| qa_docs["證券"].append(Document(page_content=qa.strip(), metadata={"source": path})) | |
| elif "期貨" in qa: | |
| qa_docs["期貨"].append(Document(page_content=qa.strip(), metadata={"source": path})) | |
| elif "複委託" in qa: | |
| qa_docs["複委託"].append(Document(page_content=qa.strip(), metadata={"source": path})) | |
| print("✅ 已成功讀取 QA 並完成分類:") | |
| for k, v in qa_docs.items(): | |
| print(f" {k}:{len(v)} 筆") | |
| # ============================================= | |
| # 3️⃣ 建立三個獨立向量資料庫 | |
| # ============================================= | |
| embedding = LmStudioEmbeddings( | |
| model_name="text-embedding-bge-large-zh-v1.5", | |
| url="http://127.0.0.1:1234/v1" | |
| ) | |
| client = chromadb.PersistentClient(path="./chroma_db") | |
| collection_names = {"證券": "stocks", "期貨": "futures", "複委託": "overseas"} | |
| vectordbs = {} | |
| for cat, docs in qa_docs.items(): | |
| eng_name = collection_names[cat] | |
| vectordbs[cat] = Chroma( | |
| client=client, | |
| collection_name=eng_name, | |
| embedding_function=embedding | |
| ) | |
| if len(vectordbs[cat].get()["documents"]) == 0: | |
| vectordbs[cat].add_documents(docs) | |
| print("✅ 各類別向量資料庫建立完成") | |
| # ============================================= | |
| # 4️⃣ 初始化 Gemini LLM + 記憶模組 | |
| # ============================================= | |
| API_KEY = "AIzaSyAxoIHYjStZ5xPe2EoNrOapHhvVmx9QzWs" | |
| llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash', google_api_key=API_KEY) | |
| memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
| # ✅ 只保留一個變數 input,context 會手動插入文字中 | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", "你是一位金融客服人員,請根據下列公司規章內容回答使用者問題。若內容不足,也請根據既有資訊給出合理說明,並建議洽營業員了解詳情。"), | |
| ("human", "{input}") | |
| ]) | |
| chain = LLMChain( | |
| llm=llm, | |
| prompt=prompt, | |
| memory=memory | |
| ) | |
| # ============================================= | |
| # 5️⃣ 自動分類 + 對話主邏輯 | |
| # ============================================= | |
| def auto_detect_category(text): | |
| if any(k in text for k in ["股票", "證券", "開戶", "下單", "交割", "現股"]): | |
| return "證券" | |
| elif any(k in text for k in ["期貨", "選擇權", "結算", "保證金", "契約"]): | |
| return "期貨" | |
| elif any(k in text for k in ["複委託", "海外", "美股", "港股", "國外"]): | |
| return "複委託" | |
| else: | |
| return "證券" | |
| def chat_fn(message, history): | |
| print(f"[DEBUG] 問題:{message}") | |
| if "午餐吃什麼" in message: | |
| return "還在盤中交易無法離開,還是我們約下午茶如何?" | |
| category = auto_detect_category(message) | |
| vectordb = vectordbs.get(category) | |
| if not vectordb: | |
| return "目前尚無此類別的知識庫,請洽營業員。" | |
| # 向量檢索 | |
| docs = vectordb.similarity_search(message, k=2) | |
| context = "\n\n".join([d.page_content for d in docs]) if docs else "目前查無相關內容。" | |
| # ✅ 將 context 手動整合進輸入文字中(新版 LangChain 安全寫法) | |
| full_input = f"公司規章內容如下:\n{context}\n\n使用者問題:{message}" | |
| try: | |
| response = chain.invoke({"input": full_input}) | |
| reply = response["text"].strip() | |
| except Exception as e: | |
| reply = f"⚠️ 生成錯誤:{e}" | |
| return reply or "請洽營業員" | |
| # ============================================= | |
| # 6️⃣ Gradio 介面 + 左上角 logo | |
| # ============================================= | |
| """ | |
| #要在HF上部署的話需要改ㄧ下api,把它藏起來 | |
| import os | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| API_KEY = os.getenv("GOOGLE_API_KEY") | |
| if not API_KEY: | |
| raise ValueError("⚠️ 未設定 GOOGLE_API_KEY,請在 Hugging Face Secrets 中新增。") | |
| llm = ChatGoogleGenerativeAI(model='gemini-2.5-flash', google_api_key=API_KEY) | |
| """ | |
| # ============================================= | |
| logo_path = r"/Users/adamlin/Library/CloudStorage/OneDrive-個人/QA/mega.png" # ← 改成你的實際路徑 | |
| with open(logo_path, "rb") as f: | |
| logo_base64 = base64.b64encode(f.read()).decode("utf-8") | |
| with gr.Blocks( | |
| theme="Taithrah/Minimal", | |
| css=""" | |
| /* 固定 logo 在左上角 */ | |
| #logo-top { | |
| position: fixed; | |
| top: 12px; | |
| left: 18px; | |
| z-index: 1000; | |
| background-color: white; | |
| border-radius: 10px; | |
| padding: 6px 8px; | |
| box-shadow: 0 0 8px rgba(0,0,0,0.15); | |
| } | |
| #logo-top img { | |
| width: 120px; | |
| height: auto; | |
| display: block; | |
| } | |
| """ | |
| ) as demo: | |
| # 插入 logo | |
| gr.HTML(f""" | |
| <div id="logo-top"> | |
| <img src="data:image/png;base64,{logo_base64}" alt="logo"> | |
| </div> | |
| """) | |
| gr.Markdown("<h1 style='text-align:center'>👨💼 我是小智,您的金融好幫手🫰</h1>") | |
| with gr.Row(): | |
| with gr.Column(scale=4): | |
| chatbox = gr.Chatbot(label="💬 對話紀錄", type="messages") | |
| user_input = gr.Textbox(label="輸入訊息", placeholder="請輸入問題...") | |
| def handle_input(message, history): | |
| reply = chat_fn(message, history) | |
| history = history + [ | |
| {"role": "user", "content": message}, | |
| {"role": "assistant", "content": reply} | |
| ] | |
| return history, gr.update(value="") | |
| user_input.submit(handle_input, [user_input, chatbox], [chatbox, user_input]) | |
| with gr.Column(scale=1): | |
| gr.Markdown("### 👇 快速提問") | |
| btns = [ | |
| ("未成年可以開戶嗎?", "未成年可以開戶嗎?"), | |
| ("法人開戶要準備什麼?", "法人開戶要準備什麼?"), | |
| ("期貨交易保證金是什麼?", "期貨交易保證金是什麼?"), | |
| ("複委託要如何下單?", "複委託要如何下單?"), | |
| ("美股交易時間?", "美股交易時間?"), | |
| ("美股可以定期定額嗎?", "美股可以定期定額嗎?") | |
| ] | |
| for label, q in btns: | |
| gr.Button(label).click(lambda h, q=q: handle_input(q, h), [chatbox], [chatbox, user_input]) | |
| # ✅ 清除記憶按鈕 | |
| def clear_memory(): | |
| memory.clear() | |
| return [], gr.update(value="", placeholder="請輸入問題...") | |
| gr.Button("🧹 整理畫面").click(clear_memory, outputs=[chatbox, user_input]) | |
| demo.launch() |