Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 20, 2025

Commit

4429fce

verified ·

1 Parent(s): 359f734

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -30

app.py CHANGED Viewed

@@ -19,26 +19,26 @@ MODEL_CONFIG = {
     "phi-3": {
         "model_name": "microsoft/phi-3-mini-4k-instruct",
         "template": """<|user|>
-Using the following context, please answer the question. If the context doesn't contain relevant information, say so.
 Context:
 {context}
 Question: {question}<|end|>
 <|assistant|>
-Let me help answer your question based on the provided context."""
     },
     "llama3-8b": {
         "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
         "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
-Using the following context, please answer the question. If the context doesn't contain relevant information, say so.
 Context:
 {context}
 Question: {question}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>
-Let me help answer your question based on the provided context."""
     }
 }
@@ -53,7 +53,9 @@ class ChatModel:
     def __init__(self):
         self.models = {}
         self.tokenizers = {}
-        self.vectorstore = {}
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
@@ -62,8 +64,8 @@ class ChatModel:
         """Load and cache the model and tokenizer"""
         if model_name not in self.models:
             logger.info(f"Loading model: {model_name}")
-            config = MODEL_CONFIG[model_name]
             try:
                 tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
                 tokenizer.pad_token = tokenizer.eos_token
                 model = AutoModelForCausalLM.from_pretrained(
@@ -80,32 +82,51 @@ class ChatModel:
                 raise
     def load_vector_store(self, store_name):
-        """Load and cache vector stores"""
-        if store_name not in self.vectorstore:
-            logger.info(f"Loading vector store: {store_name}")
-            try:
-                self.vectorstore[store_name] = FAISS.load_local(
-                    f"vector_stores_index/{store_name}",
                     self.embeddings,
                     allow_dangerous_deserialization=True
                 )
-                # Verify vector store content
-                self.check_vectorstore(store_name)
                 logger.info(f"Successfully loaded vector store: {store_name}")
-            except Exception as e:
-                logger.error(f"Error loading vector store {store_name}: {str(e)}")
-                raise
-        return self.vectorstore[store_name]
-    def check_vectorstore(self, store_name):
-        """Verify vector store content"""
         try:
-            vectorstore = self.vectorstore[store_name]
-            sample_query = "test query"
-            docs = vectorstore.similarity_search(sample_query, k=1)
-            logger.info(f"Sample document from {store_name}: {docs[0].page_content[:200]}...")
         except Exception as e:
-            logger.error(f"Error checking vector store {store_name}: {str(e)}")
             raise
     def generate(self, message, model_name, vector_store_name, history):
@@ -120,8 +141,14 @@ class ChatModel:
             # Retrieve relevant context
             logger.info(f"Retrieving context for query: {message}")
             docs = vectorstore.similarity_search(message, k=3)
             context = "\n\n".join([d.page_content for d in docs])
-            logger.info(f"Retrieved context: {context[:200]}...")
             # Format prompt
             prompt = config["template"].format(
@@ -129,6 +156,8 @@ class ChatModel:
                 question=message
             )
             # Generate response
             pipe = pipeline(
                 "text-generation",
@@ -173,7 +202,7 @@ def chat(message, history, model_choice, vector_store_choice):
             history
         )
-        # Format response with metrics
         formatted_response = (
             f"{response}\n\n"
             f"⏱️ Response Time: {response_time:.2f}s | "
@@ -189,7 +218,10 @@ def chat(message, history, model_choice, vector_store_choice):
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 Enhanced RAG Chatbot with Performance Metrics")
     with gr.Row():
         model_choice = gr.Dropdown(
@@ -198,9 +230,10 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             value="phi-3"
         )
         vector_store_choice = gr.Dropdown(
-            ["llm", "scoliosis"],
             value="scoliosis",
-            label="Knowledge Base"
         )
     with gr.Row():

     "phi-3": {
         "model_name": "microsoft/phi-3-mini-4k-instruct",
         "template": """<|user|>
+Using only the following context, please provide a relevant answer to the question. If the context doesn't contain relevant information, please say so clearly.
 Context:
 {context}
 Question: {question}<|end|>
 <|assistant|>
+Based on the provided context, I'll answer your question:"""
     },
     "llama3-8b": {
         "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
         "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
+Using only the following context, please provide a relevant answer to the question. If the context doesn't contain relevant information, please say so clearly.
 Context:
 {context}
 Question: {question}<|eot_id|>
 <|start_header_id|>assistant<|end_header_id|>
+Based on the provided context, I'll answer your question:"""
     }
 }
     def __init__(self):
         self.models = {}
         self.tokenizers = {}
+        self.current_store = None
+        self.current_vectorstore = None
+        # Use the same embedding model as in vector store creation
         self.embeddings = HuggingFaceEmbeddings(
             model_name="sentence-transformers/all-MiniLM-L6-v2"
         )
         """Load and cache the model and tokenizer"""
         if model_name not in self.models:
             logger.info(f"Loading model: {model_name}")
             try:
+                config = MODEL_CONFIG[model_name]
                 tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
                 tokenizer.pad_token = tokenizer.eos_token
                 model = AutoModelForCausalLM.from_pretrained(
                 raise
     def load_vector_store(self, store_name):
+        """Load vector store with cache invalidation"""
+        try:
+            # Check if we need to load a new store
+            if self.current_store != store_name:
+                logger.info(f"Loading new vector store: {store_name}")
+                vector_store_path = f"vector-stores/{store_name}"
+                if not os.path.exists(vector_store_path):
+                    raise ValueError(f"Vector store not found at: {vector_store_path}")
+                # Load new vector store
+                self.current_vectorstore = FAISS.load_local(
+                    vector_store_path,
                     self.embeddings,
                     allow_dangerous_deserialization=True
                 )
+                self.current_store = store_name
+                # Verify the new store
+                self.check_vectorstore()
                 logger.info(f"Successfully loaded vector store: {store_name}")
+            return self.current_vectorstore
+        except Exception as e:
+            logger.error(f"Error loading vector store {store_name}: {str(e)}")
+            # Reset state on error
+            self.current_store = None
+            self.current_vectorstore = None
+            raise
+    def check_vectorstore(self):
+        """Verify current vector store content"""
         try:
+            if self.current_vectorstore is None:
+                raise ValueError("No vector store currently loaded")
+            # Use a generic query to test retrieval
+            sample_query = "what is this document about"
+            docs = self.current_vectorstore.similarity_search(sample_query, k=1)
+            logger.info(f"Vector store {self.current_store} content sample:")
+            logger.info(f"Document content: {docs[0].page_content[:200]}...")
+            logger.info(f"Document source: {docs[0].metadata.get('source', 'unknown')}")
         except Exception as e:
+            logger.error(f"Error checking vector store: {str(e)}")
             raise
     def generate(self, message, model_name, vector_store_name, history):
             # Retrieve relevant context
             logger.info(f"Retrieving context for query: {message}")
             docs = vectorstore.similarity_search(message, k=3)
+            # Log retrieved documents for debugging
+            for i, doc in enumerate(docs):
+                logger.info(f"Retrieved document {i + 1}:")
+                logger.info(f"Source: {doc.metadata.get('source', 'unknown')}")
+                logger.info(f"Content: {doc.page_content[:200]}...")
             context = "\n\n".join([d.page_content for d in docs])
             # Format prompt
             prompt = config["template"].format(
                 question=message
             )
+            logger.info(f"Generated prompt: {prompt[:200]}...")
             # Generate response
             pipe = pipeline(
                 "text-generation",
             history
         )
+        # Format response with metrics and source context
         formatted_response = (
             f"{response}\n\n"
             f"⏱️ Response Time: {response_time:.2f}s | "
 # Gradio interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""# 🚀 Enhanced RAG Chatbot with Performance Metrics
+    This chatbot uses Retrieval-Augmented Generation (RAG) to provide informed responses based on your documents.
+    """)
     with gr.Row():
         model_choice = gr.Dropdown(
             value="phi-3"
         )
         vector_store_choice = gr.Dropdown(
+            ["llm", "scoliosis"],  # Update these choices based on your vector stores
             value="scoliosis",
+            label="Knowledge Base",
+            interactive=True
         )
     with gr.Row():