Spaces:

InnovisionLLC
/

example_test

Paused

App Files Files Community

Wenye He commited on Feb 20, 2025

Commit

7167bd9

verified ·

1 Parent(s): 2499b05

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -152

app.py CHANGED Viewed

@@ -1,178 +1,144 @@
-# app.py
 import gradio as gr
 import torch
-import time
-import os
-from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
-from langchain_community.document_loaders import PyPDFLoader, TextLoader
-from langchain_text_splitters import RecursiveCharacterTextSplitter
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain_community.vectorstores import FAISS
-# Configuration
 MODEL_CONFIG = {
-    "phi-3": {
-        "model_name": "microsoft/phi-3-mini-4k-instruct",
-        "template": "<|user|>\n{message}<|end|>\n<|assistant|>"
     },
-    "llama3-8b": {
-        "model_name": "NousResearch/Meta-Llama-3-8B-Instruct",
-        "template": """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
-{message}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"""
     }
 }
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_use_double_quant=True
-)
-class ChatModel:
-    def __init__(self):
-        self.models = {}
-        self.tokenizers = {}
-        self.vectorstore = None
-    def load_model(self, model_name):
-        if model_name not in self.models:
-            config = MODEL_CONFIG[model_name]
-            tokenizer = AutoTokenizer.from_pretrained(config["model_name"])
-            tokenizer.pad_token = tokenizer.eos_token
-            model = AutoModelForCausalLM.from_pretrained(
-                config["model_name"],
-                quantization_config=bnb_config,
-                device_map="auto",
-                torch_dtype=torch.float16,
-            )
-            self.models[model_name] = model
-            self.tokenizers[model_name] = tokenizer
-    def process_documents(self, files, progress=gr.Progress()):
-        """Process uploaded documents into vector embeddings"""
-        try:
-            progress(0, desc="Starting document processing")
-            documents = []
-            # Load documents
-            for file_path in progress.tqdm(files, desc="Loading files"):
-                if file_path.endswith(".pdf"):
-                    loader = PyPDFLoader(file_path)
-                elif file_path.endswith(".txt"):
-                    loader = TextLoader(file_path)
-                else:
-                    continue
-                documents.extend(loader.load())
-            # Split documents
-            progress(0.3, desc="Processing documents")
-            text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=512,
-                chunk_overlap=50
-            )
-            texts = text_splitter.split_documents(documents)
-            # Create embeddings
-            progress(0.6, desc="Generating embeddings")
-            embeddings = HuggingFaceEmbeddings(
-                model_name="BAAI/bge-small-en-v1.5"
-            )
-            # Create vector store
-            progress(0.8, desc="Building vector database")
-            self.vectorstore = FAISS.from_documents(texts, embeddings)
-            return "✅ Documents processed successfully! Ready for queries."
-        except Exception as e:
-            return f"❌ Error processing documents: {str(e)}"
-    def generate(self, message, model_name, history):
-        start_time = time.time()
-        self.load_model(model_name)
-        config = MODEL_CONFIG[model_name]
-        # Retrieve relevant context
-        context = ""
-        if self.vectorstore:
-            docs = self.vectorstore.similarity_search(message, k=3)
-            context = "\n\n".join([d.page_content for d in docs])
-        # Format prompt with context
-        prompt = config["template"].format(
-            message=f"Context:\n{context}\n\nQuestion: {message}"
         )
-        # Generate response
-        pipe = pipeline(
             "text-generation",
-            model=self.models[model_name],
-            tokenizer=self.tokenizers[model_name],
-            max_new_tokens=384,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1,
-            do_sample=True,
-            return_full_text=False
         )
-        response = pipe(prompt)[0]['generated_text']
-        # Calculate metrics
-        elapsed_time = time.time() - start_time
-        tokens = len(self.tokenizers[model_name].encode(response))
-        tokens_per_sec = tokens / elapsed_time if elapsed_time > 0 else 0
-        return response, elapsed_time, tokens_per_sec
-# Initialize model handler
-model_handler = ChatModel()
-def chat(message, history, model_choice):
     try:
-        response, response_time, token_speed = model_handler.generate(message, model_choice, history)
-        formatted_response = f"{response}\n\n⏱️ Response Time: {response_time:.2f}s | 🚀 Speed: {token_speed:.2f} tokens/s"
-        return [(message, formatted_response)]
     except Exception as e:
-        return [(message, f"Error: {str(e)}")]
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🚀 LLM Chatbot with RAG & Performance Metrics")
-    with gr.Row():
-        model_choice = gr.Dropdown(
-            choices=["phi-3", "llama3-8b"],
-            label="Select Model",
-            value="phi-3"
-        )
-    with gr.Row():
-        with gr.Column(scale=1):
-            file_upload = gr.File(
-                label="Upload Documents (PDF/TXT)",
-                file_count="multiple",
-                file_types=[".pdf", ".txt"],
-                type="filepath"
-            )
-            status = gr.Textbox(label="Processing Status", interactive=False)
-        with gr.Column(scale=3):
-            chatbot = gr.Chatbot(height=500)
-            msg = gr.Textbox(label="Message", placeholder="Type your question here...")
-    with gr.Row():
-        submit_btn = gr.Button("Send", variant="primary")
-        clear_btn = gr.ClearButton([msg, chatbot, file_upload])
-    # Event handlers
-    file_upload.upload(
-        fn=model_handler.process_documents,
-        inputs=file_upload,
-        outputs=status,
-        show_progress="full"
     )
-    msg.submit(chat, [msg, chatbot, model_choice], chatbot)
-    submit_btn.click(chat, [msg, chatbot, model_choice], chatbot)
-demo.launch()

 import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from langchain.chains import ConversationalRetrievalChain
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.vectorstores import FAISS
+from langchain.memory import ConversationBufferMemory
+from langchain.llms import HuggingFacePipeline
 import torch
+# Model Configuration
 MODEL_CONFIG = {
+    "phi-3-mini": {
+        "name": "microsoft/phi-3-mini-128k-instruct",
+        "max_tokens": 1024,
+        "temperature": 0.8
     },
+    "Mistral-7B": {
+        "name": "mistralai/Mistral-7B-Instruct-v0.3",
+        "max_tokens": 512,
+        "temperature": 0.7
     }
 }
+# Cache Stores
+vector_store_cache = {}
+model_pipeline_cache = {}
+embedder = HuggingFaceEmbeddings()
+def load_vector_store(store_name):
+    """Cache vector stores in memory"""
+    if store_name not in vector_store_cache:
+        vector_store_cache[store_name] = FAISS.load_local(
+            f"vector_stores/{store_name}",
+            embedder
+        )
+    return vector_store_cache[store_name]
+def get_model_pipeline(model_choice):
+    """Cache model pipelines in memory"""
+    if model_choice not in model_pipeline_cache:
+        cfg = MODEL_CONFIG[model_choice]
+        tokenizer = AutoTokenizer.from_pretrained(cfg["name"])
+        model = AutoModelForCausalLM.from_pretrained(
+            cfg["name"],
+            device_map="auto",
+            torch_dtype="auto" if "phi-3" in model_choice else torch.float16
         )
+        model_pipeline_cache[model_choice] = pipeline(
             "text-generation",
+            model=model,
+            tokenizer=tokenizer,
+            max_new_tokens=cfg["max_tokens"],
+            temperature=cfg["temperature"]
         )
+    return model_pipeline_cache[model_choice]
+class SessionChain:
+    """Per-session chain manager with memory"""
+    def __init__(self):
+        self.current_model = None
+        self.current_vector_store = None
+        self.chain = None
+    def get_chain(self, model_choice, vector_store_name):
+        """Get or create chain with proper configuration"""
+        if self.current_model != model_choice or self.current_vector_store != vector_store_name:
+            self._create_new_chain(model_choice, vector_store_name)
+        return self.chain
+    def _create_new_chain(self, model_choice, vector_store_name):
+        """Create new chain with updated configuration"""
+        vector_store = load_vector_store(vector_store_name)
+        pipe = get_model_pipeline(model_choice)
+        self.chain = ConversationalRetrievalChain.from_llm(
+            llm=HuggingFacePipeline(pipeline=pipe),
+            retriever=vector_store.as_retriever(),
+            memory=ConversationBufferMemory(),
+            verbose=False
+        )
+        self.current_model = model_choice
+        self.current_vector_store = vector_store_name
+def respond(message, history, model_choice, vector_store, session_state):
+    """Handle message with cached resources and session chain"""
+    # Initialize session chain if not exists
+    if session_state is None:
+        session_state = SessionChain()
+    # Get the appropriate chain for this session
+    chain = session_state.get_chain(model_choice, vector_store)
     try:
+        # Convert Gradio history to LangChain format
+        for human, ai in history[-5:]:  # Keep last 5 exchanges as memory
+            chain.memory.save_context({"input": human}, {"output": ai})
+        # Generate response
+        result = chain.invoke({"question": message})
+        response = result["answer"]
+        return "", history + [(message, response)], session_state
     except Exception as e:
+        return "", history + [(message, f"⚠️ Error: {str(e)}")], session_state
+with gr.Blocks() as demo:
+    gr.Markdown("# 🚀 Optimized Chat with Session Management")
+    # UI Components
+    model_dropdown = gr.Dropdown(
+        list(MODEL_CONFIG.keys()),
+        value="phi-3-mini",
+        label="Select Model"
+    )
+    vector_store_dropdown = gr.Dropdown(
+        ["legal_docs", "tech_docs"],
+        value="tech_docs",
+        label="Knowledge Base"
+    )
+    # Session state stored in the browser
+    session = gr.State()
+    chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(label="Your Message")
+    clear = gr.Button("Clear History")
+    # Chat handlers
+    msg.submit(
+        respond,
+        [msg, chatbot, model_dropdown, vector_store_dropdown, session],
+        [msg, chatbot, session]
     )
+    clear.click(
+        lambda: ([], None),
+        [],
+        [chatbot, session]
+    )
+demo.launch()