Spaces:

PBThuong96
/

chatbot

Sleeping

App Files Files Community

PBThuong96 commited on Dec 2, 2025

Commit

927ef83

verified ·

1 Parent(s): 1ac8620

Update app.py

Browse files

Files changed (1) hide show

app.py +267 -458

app.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# app.py - DeepMed AI - Fixed Version
-import os
 import sys
 import logging
 import traceback
 import gradio as gr
@@ -9,513 +11,320 @@ import docx2txt
 import chromadb
 from chromadb.config import Settings
 from shutil import rmtree
-import gc
-# Fix SQLite for Hugging Face
-try:
-    __import__('pysqlite3')
-    sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
-except ImportError:
-    logging.warning("pysqlite3 not found, using default sqlite3")
-# LangChain imports
-try:
-    from langchain_google_genai import ChatGoogleGenerativeAI
-    from langchain_chroma import Chroma
-    from langchain_community.document_loaders import PyPDFLoader
-    from langchain_text_splitters import RecursiveCharacterTextSplitter
-    from langchain_community.retrievers import BM25Retriever
-    from langchain.retrievers.ensemble import EnsembleRetriever
-    from langchain.chains import create_retrieval_chain, create_history_aware_retriever
-    from langchain.chains.combine_documents import create_stuff_documents_chain
-    from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
-    from langchain_core.messages import HumanMessage, AIMessage
-    from langchain_core.documents import Document
-    from langchain_huggingface import HuggingFaceEmbeddings
-    logging.info("✅ All LangChain imports successful")
-except ImportError as e:
-    logging.error(f"❌ Import error: {e}")
-    sys.exit(1)
-# Configuration
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 DATA_PATH = "medical_data"
 DB_PATH = "chroma_db"
-MAX_HISTORY_TURNS = 5  # Reduced for stability
 FORCE_REBUILD_DB = False
-# Setup logging
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(message)s",
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler("deepmed.log", encoding='utf-8')
-    ]
-)
-def safe_process_excel(file_path: str, filename: str) -> list:
-    """Safely process Excel files with error handling"""
     docs = []
     try:
-        logging.info(f"Processing Excel file: {filename}")
         if file_path.endswith(".csv"):
-            df = pd.read_csv(file_path, encoding='utf-8', on_bad_lines='skip')
         else:
             df = pd.read_excel(file_path)
-        # Clean dataframe
-        df = df.dropna(how='all')
-        df = df.fillna("Không có thông tin")
         for idx, row in df.iterrows():
-            try:
-                content_parts = []
-                for col in df.columns:
-                    if pd.notna(row[col]):
-                        value = str(row[col]).strip()
-                        if value and value.lower() not in ['nan', 'none', '']:
-                            content_parts.append(f"{col}: {value}")
-                if content_parts:
-                    page_content = f"Tài liệu: {filename} (Dòng {idx+1}):\n" + "\n".join(content_parts)
-                    metadata = {
-                        "source": filename,
-                        "row": idx+1,
-                        "type": "excel",
-                        "doc_id": f"{filename}_row_{idx+1}"
-                    }
-                    docs.append(Document(page_content=page_content, metadata=metadata))
-            except Exception as e:
-                logging.warning(f"Error processing row {idx+1} in {filename}: {e}")
-                continue
     except Exception as e:
-        logging.error(f"Failed to process Excel {filename}: {e}")
     return docs
-def load_documents_safely() -> list:
-    """Load documents with comprehensive error handling"""
-    documents = []
-    # Create data directory if not exists
-    if not os.path.exists(DATA_PATH):
-        os.makedirs(DATA_PATH, exist_ok=True)
-        logging.info(f"Created data directory: {DATA_PATH}")
-        return documents
-    # Get all files
-    all_files = []
-    for root, _, files in os.walk(DATA_PATH):
-        for file in files:
-            if file.lower().endswith(('.pdf', '.docx', '.xlsx', '.xls', '.csv', '.txt', '.md')):
-                all_files.append(os.path.join(root, file))
-    if not all_files:
-        logging.warning(f"No documents found in {DATA_PATH}")
-        return documents
-    logging.info(f"Found {len(all_files)} files to process")
-    # Process each file
-    for file_path in all_files:
-        filename = os.path.basename(file_path)
-        file_ext = os.path.splitext(filename)[1].lower()
-        try:
-            if file_ext == '.pdf':
-                loader = PyPDFLoader(file_path)
-                docs = loader.load()
-                for doc in docs:
-                    doc.metadata.update({
-                        "source": filename,
-                        "file_type": "pdf"
-                    })
-                documents.extend(docs)
-                logging.info(f"✓ Loaded PDF: {filename} ({len(docs)} pages)")
-            elif file_ext == '.docx':
-                text = docx2txt.process(file_path)
-                if text.strip():
-                    doc = Document(
-                        page_content=text,
-                        metadata={"source": filename, "file_type": "docx"}
-                    )
-                    documents.append(doc)
-                    logging.info(f"✓ Loaded DOCX: {filename}")
-            elif file_ext in ['.xlsx', '.xls', '.csv']:
-                excel_docs = safe_process_excel(file_path, filename)
-                documents.extend(excel_docs)
-                logging.info(f"✓ Loaded Excel: {filename} ({len(excel_docs)} rows)")
-            elif file_ext in ['.txt', '.md']:
-                with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
-                    text = f.read()
-                if text.strip():
-                    doc = Document(
-                        page_content=text,
-                        metadata={"source": filename, "file_type": "txt"}
-                    )
-                    documents.append(doc)
-                    logging.info(f"✓ Loaded TXT: {filename}")
-        except Exception as e:
-            logging.error(f"✗ Failed to load {filename}: {e}")
-            continue
-    logging.info(f"Total documents loaded: {len(documents)}")
     return documents
-def create_simple_retriever():
-    """Create a simplified retriever for Hugging Face"""
-    try:
-        logging.info("Initializing embedding model...")
-        # Use lightweight model
-        embedding_model = HuggingFaceEmbeddings(
-            model_name="sentence-transformers/all-MiniLM-L6-v2",
-            model_kwargs={'device': 'cpu'},
-            encode_kwargs={'normalize_embeddings': True}
-        )
-        # Check if DB exists
-        if os.path.exists(DB_PATH) and not FORCE_REBUILD_DB:
-            try:
-                logging.info(f"Loading existing vector store from {DB_PATH}")
-                vectorstore = Chroma(
-                    persist_directory=DB_PATH,
-                    embedding_function=embedding_model,
-                    client_settings=Settings(anonymized_telemetry=False)
-                )
-                # Test if vectorstore works
-                test_results = vectorstore.similarity_search("test", k=1)
-                logging.info(f"Vector store loaded successfully with {vectorstore._collection.count()} documents")
-                return vectorstore.as_retriever(search_kwargs={"k": 8})
-            except Exception as e:
-                logging.warning(f"Failed to load existing DB: {e}, creating new...")
-                if os.path.exists(DB_PATH):
-                    rmtree(DB_PATH, ignore_errors=True)
-        # Create new vector store
-        logging.info("Creating new vector store...")
-        documents = load_documents_safely()
-        if not documents:
-            logging.warning("No documents to index")
             return None
-        # Simple text splitting
-        text_splitter = RecursiveCharacterTextSplitter(
-            chunk_size=600,
-            chunk_overlap=100,
-            length_function=len,
-            separators=["\n\n", "\n", " ", ""]
-        )
-        splits = text_splitter.split_documents(documents)
-        logging.info(f"Split into {len(splits)} chunks")
-        # Create vector store
         vectorstore = Chroma.from_documents(
-            documents=splits,
-            embedding=embedding_model,
             persist_directory=DB_PATH,
-            client_settings=Settings(anonymized_telemetry=False)
         )
-        logging.info(f"Created vector store with {len(splits)} chunks")
-        return vectorstore.as_retriever(search_kwargs={"k": 8})
-    except Exception as e:
-        logging.error(f"Failed to create retriever: {e}")
-        return None
-class MedicalAssistant:
-    def __init__(self):
-        self.llm = None
-        self.retriever = None
-        self.chain = None
-        self.initialized = False
-    def initialize(self):
-        """Initialize the assistant"""
-        try:
-            # Check API key
-            if not GOOGLE_API_KEY:
-                logging.error("GOOGLE_API_KEY environment variable is not set")
-                self.llm = self.create_fallback_llm()
-            else:
-                self.llm = ChatGoogleGenerativeAI(
-                    model="gemini-2.5-flash",
-                    temperature=0.1,
-                    google_api_key=GOOGLE_API_KEY,
-                    max_output_tokens=1000,
-                    timeout=30
-                )
-                logging.info("✅ Gemini LLM initialized")
-            # Create retriever
-            self.retriever = create_simple_retriever()
-            # Build chain
-            self._build_chain()
-            self.initialized = True
-            logging.info("✅ Medical Assistant initialized successfully")
-            return True
-        except Exception as e:
-            logging.error(f"❌ Failed to initialize: {e}")
-            self.llm = self.create_fallback_llm()
-            self._build_chain()
-            return False
-    def create_fallback_llm(self):
-        """Create a fallback LLM when Gemini fails"""
-        from langchain.llms.fake import FakeListLLM
-        responses = [
-            "Xin lỗi, tôi đang gặp sự cố kết nối với hệ thống AI. Vui lòng thử lại sau.",
-            "Hệ thống tạm thời không khả dụng. Vui lòng kiểm tra kết nối internet.",
-            "Tôi không thể xử lý yêu cầu của bạn ngay lúc này."
-        ]
-        return FakeListLLM(responses=responses)
-    def _build_chain(self):
-        """Build the RAG chain"""
-        try:
-            # System prompt
-            system_prompt = """Bạn là DeepMed AI, trợ lý y tế thông minh.
-            Trả lời câu hỏi dựa trên thông tin được cung cấp.
-            Nếu không có thông tin, hãy nói rõ.
-            Luôn trả lời bằng tiếng Việt.
-            Context: {context}
-            Câu hỏi: {input}"""
-            prompt = ChatPromptTemplate.from_messages([
-                ("system", system_prompt),
-                MessagesPlaceholder("chat_history"),
-                ("human", "{input}"),
-            ])
-            if self.retriever and self.llm:
-                # Create RAG chain
-                question_answer_chain = create_stuff_documents_chain(self.llm, prompt)
-                self.chain = create_retrieval_chain(self.retriever, question_answer_chain)
-                logging.info("✅ RAG chain built with retriever")
-            else:
-                # Simple chain without retrieval
-                self.chain = prompt | self.llm
-                logging.info("✅ Simple LLM chain built")
-        except Exception as e:
-            logging.error(f"Failed to build chain: {e}")
-            # Create a minimal working chain
-            self.chain = lambda x: {"answer": "Xin lỗi, hệ thống đang bảo trì."}
-    def chat(self, message: str, history: list):
-        """Process chat message"""
-        if not self.initialized:
-            if not self.initialize():
-                yield "❌ Hệ thống chưa thể khởi động. Vui lòng thử lại sau."
-                return
         try:
-            # Prepare chat history
-            chat_history = []
-            for user_msg, bot_msg in history[-MAX_HISTORY_TURNS:]:
-                chat_history.append(HumanMessage(content=str(user_msg)))
-                chat_history.append(AIMessage(content=str(bot_msg)))
-            # Create input
-            inputs = {
-                "input": message,
-                "chat_history": chat_history
-            }
-            # Get response
-            if hasattr(self.chain, 'invoke'):
-                response = self.chain.invoke(inputs)
-                if isinstance(response, dict) and "answer" in response:
-                    answer = response["answer"]
-                elif hasattr(response, 'content'):
-                    answer = response.content
-                else:
-                    answer = str(response)
-                yield answer
-            else:
-                yield "Xin chào! Tôi là DeepMed AI. Tôi có thể giúp gì cho bạn về y tế?"
         except Exception as e:
-            logging.error(f"Chat error: {e}")
-            yield f"⚠️ Đã xảy ra lỗi: {str(e)[:100]}"
-# Create assistant instance
-assistant = MedicalAssistant()
-# Initialize on startup (but don't block)
-import threading
-def init_in_background():
-    assistant.initialize()
-threading.Thread(target=init_in_background, daemon=True).start()
-# Gradio Interface
-def gradio_chat(message, history):
-    """Wrapper for Gradio chat"""
-    for response in assistant.chat(message, history):
-        yield response
-# Custom CSS
-css = """
-.gradio-container {
-    font-family: 'Arial', sans-serif;
-    max-width: 800px;
-    margin: 0 auto;
-}
-#chatbot {
-    border: 1px solid #e0e0e0;
-    border-radius: 10px;
-    padding: 15px;
-    min-height: 400px;
-    max-height: 500px;
-    overflow-y: auto;
-    background: #f9f9f9;
-}
-.user, .assistant {
-    padding: 10px 15px;
-    margin: 8px 0;
-    border-radius: 15px;
-    max-width: 80%;
-}
-.user {
-    background: #e3f2fd;
-    margin-left: auto;
-    border-bottom-right-radius: 5px;
-}
-.assistant {
-    background: #f5f5f5;
-    margin-right: auto;
-    border-bottom-left-radius: 5px;
-}
-input {
-    border-radius: 20px;
-    padding: 12px 20px;
-    border: 2px solid #4a90e2;
-}
-button {
-    border-radius: 20px;
-    padding: 10px 20px;
-    background: #4a90e2;
-    color: white;
-    border: none;
-}
-button:hover {
-    background: #357abd;
-}
-@media (max-width: 768px) {
-    .gradio-container {
-        padding: 10px;
-    }
-    #chatbot {
-        min-height: 300px;
-        max-height: 400px;
-    }
-    .user, .assistant {
-        max-width: 90%;
-    }
-}
 """
-# Create Gradio interface
-with gr.Blocks(css=css, title="DeepMed AI - Trợ lý Y tế") as demo:
-    gr.Markdown("# 🏥 DeepMed AI - Trợ lý Y tế Thông minh")
-    gr.Markdown("Hỏi đáp về thuốc, bệnh lý và hướng dẫn y tế")
-    chatbot = gr.Chatbot(
-        height=400,
-        label="Hội thoại",
-        placeholder="Xin chào! Tôi có thể giúp gì cho bạn?"
-    )
-    with gr.Row():
-        msg = gr.Textbox(
-            label="Câu hỏi của bạn",
-            placeholder="Nhập câu hỏi về y tế...",
-            scale=4
-        )
-        submit_btn = gr.Button("Gửi", variant="primary", scale=1)
-        clear_btn = gr.Button("Xóa", variant="secondary", scale=1)
-    # Footer
-    gr.Markdown("---")
-    gr.Markdown("⚠️ **Lưu ý:** Thông tin chỉ mang tính tham khảo. Vui lòng tham khảo ý kiến bác sĩ trước khi áp dụng.")
-    # Event handlers
-    def clear_chat():
-        return None
-    # Submit function
-    def respond(message, chat_history):
-        chat_history.append((message, ""))
-        yield chat_history
-        response = ""
-        for chunk in gradio_chat(message, chat_history[:-1]):
-            response = chunk
-            chat_history[-1] = (message, response)
-            yield chat_history
-    # Connect events
-    msg.submit(
-        respond,
-        [msg, chatbot],
-        [chatbot]
-    ).then(lambda: "", outputs=[msg])
-    submit_btn.click(
-        respond,
-        [msg, chatbot],
-        [chatbot]
-    ).then(lambda: "", outputs=[msg])
-    clear_btn.click(
-        clear_chat,
-        outputs=[chatbot]
     )
-# Launch with error handling
 if __name__ == "__main__":
-    try:
-        logging.info("🚀 Starting DeepMed AI...")
-        demo.queue(max_size=10)
-        demo.launch(
-            server_name="0.0.0.0",
-            server_port=7860,
-            show_error=True,
-            debug=False,
-            share=False
-        )
-    except Exception as e:
-        logging.error(f"Failed to launch app: {e}")
-        print(f"Error: {e}")
-        sys.exit(1)

+__import__("pysqlite3")
 import sys
+sys.modules["sqlite3"] = sys.modules.pop("pysqlite3")
+import os
 import logging
 import traceback
 import gradio as gr
 import chromadb
 from chromadb.config import Settings
 from shutil import rmtree
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_chroma import Chroma
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_community.retrievers import BM25Retriever
+from langchain.retrievers.ensemble import EnsembleRetriever
+from langchain.chains import create_retrieval_chain, create_history_aware_retriever
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain_core.messages import HumanMessage, AIMessage
+from langchain_core.documents import Document
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain.retrievers import ContextualCompressionRetriever
+from langchain.retrievers.document_compressors import CrossEncoderReranker
+from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
 DATA_PATH = "medical_data"
 DB_PATH = "chroma_db"
+MAX_HISTORY_TURNS = 6
 FORCE_REBUILD_DB = False
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
+def process_excel_file(file_path: str, filename: str) -> list[Document]:
+    """
+    Xử lý Excel thông minh: Biến mỗi dòng thành một Document riêng biệt
+    giúp tìm kiếm chính xác từng bản ghi thuốc/bệnh nhân.
+    """
     docs = []
     try:
         if file_path.endswith(".csv"):
+            df = pd.read_csv(file_path)
         else:
             df = pd.read_excel(file_path)
+        df.dropna(how='all', inplace=True)
+        df.fillna("Không có thông tin", inplace=True)
         for idx, row in df.iterrows():
+            content_parts = []
+            for col_name, val in row.items():
+                clean_val = str(val).strip()
+                if clean_val and clean_val.lower() != "nan":
+                    content_parts.append(f"{col_name}: {clean_val}")
+            if content_parts:
+                page_content = f"Dữ liệu từ file {filename} (Dòng {idx+1}):\n" + "\n".join(content_parts)
+                metadata = {"source": filename, "row": idx+1, "type": "excel_record"}
+                docs.append(Document(page_content=page_content, metadata=metadata))
     except Exception as e:
+        logging.error(f"Lỗi xử lý Excel {filename}: {e}")
     return docs
+def load_documents_from_folder(folder_path: str) -> list[Document]:
+    logging.info(f"--- Bắt đầu quét thư mục: {folder_path} ---")
+    documents: list[Document] = []
+    if not os.path.exists(folder_path):
+        os.makedirs(folder_path, exist_ok=True)
+        return []
+    for root, _, files in os.walk(folder_path):
+        for filename in files:
+            file_path = os.path.join(root, filename)
+            filename_lower = filename.lower()
+            try:
+                if filename_lower.endswith(".pdf"):
+                    loader = PyPDFLoader(file_path)
+                    docs = loader.load()
+                    for d in docs: d.metadata["source"] = filename
+                    documents.extend(docs)
+                elif filename_lower.endswith(".docx"):
+                    text = docx2txt.process(file_path)
+                    if text.strip():
+                        documents.append(Document(page_content=text, metadata={"source": filename}))
+                elif filename_lower.endswith((".xlsx", ".xls", ".csv")):
+                    excel_docs = process_excel_file(file_path, filename)
+                    documents.extend(excel_docs)
+                elif filename_lower.endswith((".txt", ".md")):
+                    with open(file_path, "r", encoding="utf-8") as f: text = f.read()
+                    if text.strip():
+                        documents.append(Document(page_content=text, metadata={"source": filename}))
+            except Exception as e:
+                logging.error(f"Lỗi đọc file {filename}: {e}")
+    logging.info(f"Tổng cộng đã load: {len(documents)} tài liệu gốc.")
     return documents
+def get_retriever_chain():
+    logging.info("--- Tải Embedding Model ---")
+    embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
+    vectorstore = None
+    splits = []
+    chroma_settings = Settings(anonymized_telemetry=False)
+    if FORCE_REBUILD_DB and os.path.exists(DB_PATH):
+        logging.warning("Đang xóa DB cũ theo yêu cầu FORCE_REBUILD...")
+        rmtree(DB_PATH, ignore_errors=True)
+    if os.path.exists(DB_PATH) and os.listdir(DB_PATH):
+        try:
+            vectorstore = Chroma(
+                persist_directory=DB_PATH,
+                embedding_function=embedding_model,
+                client_settings=chroma_settings
+            )
+            existing_data = vectorstore.get()
+            if existing_data['documents']:
+                for text, meta in zip(existing_data['documents'], existing_data['metadatas']):
+                    splits.append(Document(page_content=text, metadata=meta))
+                logging.info(f"Đã khôi phục {len(splits)} chunks từ DB.")
+            else:
+                logging.warning("DB rỗng, sẽ tạo mới.")
+                vectorstore = None
+        except Exception as e:
+            logging.error(f"DB lỗi: {e}. Đang reset...")
+            rmtree(DB_PATH, ignore_errors=True)
+            vectorstore = None
+    if not vectorstore:
+        logging.info("--- Tạo Index dữ liệu mới ---")
+        raw_docs = load_documents_from_folder(DATA_PATH)
+        if not raw_docs:
+            logging.warning("Không có dữ liệu trong thư mục medical_data.")
             return None
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        splits = text_splitter.split_documents(raw_docs)
         vectorstore = Chroma.from_documents(
+            documents=splits,
+            embedding=embedding_model,
             persist_directory=DB_PATH,
+            client_settings=chroma_settings
         )
+        logging.info("Đã lưu VectorStore thành công.")
+    vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
+    if splits:
+        bm25_retriever = BM25Retriever.from_documents(splits)
+        bm25_retriever.k = 10
+        ensemble_retriever = EnsembleRetriever(
+            retrievers=[bm25_retriever, vector_retriever],
+            weights=[0.4, 0.6]
+        )
+    else:
+        ensemble_retriever = vector_retriever
+    logging.info("--- Tải Reranker Model (BGE-M3) ---")
+    reranker_model = HuggingFaceCrossEncoder(model_name="BAAI/bge-reranker-v2-m3")
+    compressor = CrossEncoderReranker(model=reranker_model, top_n=5)
+    final_retriever = ContextualCompressionRetriever(
+        base_compressor=compressor,
+        base_retriever=ensemble_retriever
+    )
+    return final_retriever
+class DeepMedBot:
+    def __init__(self):
+        self.rag_chain = None
+        self.ready = False
+        if not GOOGLE_API_KEY:
+            logging.error("⚠️ Thiếu GOOGLE_API_KEY! Vui lòng thiết lập biến môi trường.")
+            return
         try:
+            self.retriever = get_retriever_chain()
+            if not self.retriever:
+                logging.warning("⚠️ Chưa có dữ liệu để Retreive. Bot sẽ chỉ trả lời bằng kiến thức nền.")
+            self.llm = ChatGoogleGenerativeAI(
+                model="gemini-2.5-flash",
+                temperature=0.11,
+                google_api_key=GOOGLE_API_KEY
+            )
+            self._build_chains()
+            self.ready = True
+            logging.info("✅ Bot DeepMed đã sẵn sàng phục vụ!")
         except Exception as e:
+            logging.error(f"🔥 Lỗi khởi tạo bot: {e}")
+            logging.debug(traceback.format_exc())
+    def _build_chains(self):
+        context_system_prompt = (
+            "Dựa trên lịch sử chat và câu hỏi mới nhất của người dùng, "
+            "hãy viết lại câu hỏi đó thành một câu đầy đủ ngữ cảnh để hệ thống có thể hiểu được. "
+            "KHÔNG trả lời câu hỏi, chỉ viết lại nó."
+        )
+        context_prompt = ChatPromptTemplate.from_messages([
+            ("system", context_system_prompt),
+            MessagesPlaceholder("chat_history"),
+            ("human", "{input}"),
+        ])
+        if self.retriever:
+            history_aware_retriever = create_history_aware_retriever(
+                self.llm, self.retriever, context_prompt
+            )
+        qa_system_prompt = (
+            "Bạn là 'DeepMed-AI' - Trợ lý Dược lâm sàng tại Trung Tâm Y Tế. "
+            "Sử dụng các thông tin được cung cấp trong phần Context dưới đây để trả lời câu hỏi về thuốc, bệnh học và y lệnh.\n"
+            "Nếu Context có dữ liệu từ Excel, hãy trình bày dạng bảng hoặc gạch đầu dòng rõ ràng.\n"
+            "Nếu không tìm thấy thông tin trong Context, hãy nói 'Tôi không tìm thấy thông tin trong dữ liệu nội bộ' và gợi ý dựa trên kiến thức y khoa chung của bạn.\n\n"
+            "Context:\n{context}"
+        )
+        qa_prompt = ChatPromptTemplate.from_messages([
+            ("system", qa_system_prompt),
+            MessagesPlaceholder("chat_history"),
+            ("human", "{input}"),
+        ])
+        question_answer_chain = create_stuff_documents_chain(self.llm, qa_prompt)
+        if self.retriever:
+            self.rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
+        else:
+            self.rag_chain = qa_prompt | self.llm
+    def chat_stream(self, message: str, history: list):
+        if not self.ready:
+            yield "Hệ thống đang khởi động hoặc gặp lỗi cấu hình."
+            return
+        chat_history = []
+        for u, b in history[-MAX_HISTORY_TURNS:]:
+            chat_history.append(HumanMessage(content=str(u)))
+            chat_history.append(AIMessage(content=str(b)))
+        full_response = ""
+        retrieved_docs = []
+        try:
+            stream_input = {"input": message, "chat_history": chat_history} if self.retriever else {"input": message, "chat_history": chat_history}
+            if self.rag_chain:
+                for chunk in self.rag_chain.stream(stream_input):
+                    if isinstance(chunk, dict):
+                        if "answer" in chunk:
+                            full_response += chunk["answer"]
+                            yield full_response
+                        if "context" in chunk:
+                            retrieved_docs = chunk["context"]
+                    elif hasattr(chunk, 'content'):
+                        full_response += chunk.content
+                        yield full_response
+                    elif isinstance(chunk, str):
+                        full_response += chunk
+                        yield full_response
+                if retrieved_docs:
+                    refs = self._build_references_text(retrieved_docs)
+                    if refs:
+                        full_response += f"\n\n---\n📚 **Nguồn tham khảo:**\n{refs}"
+                        yield full_response
+        except Exception as e:
+            logging.error(f"Lỗi khi chat: {e}")
+            logging.debug(traceback.format_exc())
+            yield f"Đã xảy ra lỗi: {str(e)}"
+    @staticmethod
+    def _build_references_text(docs) -> str:
+        lines = []
+        seen = set()
+        for doc in docs:
+            src = doc.metadata.get("source", "Tài liệu")
+            row_info = ""
+            if "row" in doc.metadata:
+                row_info = f"(Dòng {doc.metadata['row']})"
+            ref_str = f"- {src} {row_info}"
+            if ref_str not in seen:
+                lines.append(ref_str)
+                seen.add(ref_str)
+        return "\n".join(lines)
+bot = DeepMedBot()
+def gradio_chat_stream(message, history):
+    yield from bot.chat_stream(message, history)
+css = """
+.gradio-container {min_height: 600px !important;}
+h1 {text-align: center; color: #2E86C1;}
 """
+with gr.Blocks(css=css, title="DeepMed AI") as demo:
+    gr.Markdown("# 🏥 DeepMed AI - Trợ lý Lâm Sàng")
+    gr.Markdown("Hệ thống hỗ trợ lâm sàng tại Trung Tâm Y Tế Khu Vực Thanh Ba.")
+    chat_interface = gr.ChatInterface(
+        fn=gradio_chat_stream,
     )
 if __name__ == "__main__":
+    demo.launch()