Spaces:

random2222
/

tryagain

Build error

App Files Files Community

random2222 commited on Apr 13, 2025

Commit

1051bf9

verified ·

1 Parent(s): 2c2ba82

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -53

app.py CHANGED Viewed

@@ -1,48 +1,56 @@
-import gradio as gr
 import os
-from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.vectorstores import FAISS
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 # Configuration
-DOCS_DIR = "business_docs"
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
-MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"
-# Initialize components once at startup
 def initialize_system():
-    # Load and process PDFs from business_docs folder
     if not os.path.exists(DOCS_DIR):
         raise FileNotFoundError(f"Business documents folder '{DOCS_DIR}' not found")
     pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
     if not pdf_files:
         raise ValueError(f"No PDF files found in {DOCS_DIR} folder")
-    # Process documents
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=200
     )
-    texts = []
-    for pdf in pdf_files:
-        loader = PyPDFLoader(pdf)
-        pages = loader.load_and_split(text_splitter)
-        texts.extend(pages)
-    # Create vector store
     embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
-    vector_store = FAISS.from_documents(texts, embeddings)
-    # Load model with quantization for faster inference
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         device_map="auto",
-        load_in_8bit=True
     )
     return vector_store, model, tokenizer
@@ -50,52 +58,66 @@ def initialize_system():
 # Initialize system components
 try:
     vector_store, model, tokenizer = initialize_system()
-    print("System initialized successfully with business documents")
 except Exception as e:
-    print(f"Initialization error: {str(e)}")
     raise
-# Response generation with context
 def generate_response(query):
-    # Retrieve relevant context
-    docs = vector_store.similarity_search(query, k=3)
-    context = "\n".join([doc.page_content for doc in docs])
-    # Create instruction prompt
-    prompt = f"""<s>[INST] You are a customer support agent.
-    Answer ONLY using information from the provided business documents.
-    If unsure, say "I don't have information about that."
-    Context: {context}
-    Question: {query} [/INST]"""
-    # Generate response
-    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    outputs = model.generate(
-        **inputs,
-        max_new_tokens=500,
-        temperature=0.3,
-        do_sample=True
-    )
-    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("[/INST]")[-1].strip()
-# Chat interface
-with gr.Blocks() as demo:
-    gr.Markdown("## Business Support Chatbot\nAsk questions about our services!")
-    chatbot = gr.Chatbot(label="Conversation")
-    msg = gr.Textbox(label="Type your question")
     clear = gr.Button("Clear History")
     def respond(message, chat_history):
-        try:
-            response = generate_response(message)
-        except Exception as e:
-            response = "Sorry, I'm having trouble answering right now. Please try again later."
         chat_history.append((message, response))
         return "", chat_history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
-demo.launch()

 import os
+import gradio as gr
+import torch
+from langchain_community.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 # Configuration
+DOCS_DIR = ".business_docs"
 EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+# System Initialization
 def initialize_system():
+    # Validate documents folder
     if not os.path.exists(DOCS_DIR):
         raise FileNotFoundError(f"Business documents folder '{DOCS_DIR}' not found")
+    # Load and process PDFs
     pdf_files = [os.path.join(DOCS_DIR, f) for f in os.listdir(DOCS_DIR) if f.endswith(".pdf")]
     if not pdf_files:
         raise ValueError(f"No PDF files found in {DOCS_DIR} folder")
     text_splitter = RecursiveCharacterTextSplitter(
         chunk_size=1000,
         chunk_overlap=200
     )
+    documents = []
+    for pdf_path in pdf_files:
+        loader = PyPDFLoader(pdf_path)
+        documents.extend(loader.load_and_split(text_splitter))
+    # Create embeddings
     embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
+    vector_store = FAISS.from_documents(documents, embeddings)
+    # Quantization config
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.float16,
+    )
+    # Load model and tokenizer
     tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
+        quantization_config=bnb_config,
         device_map="auto",
+        trust_remote_code=True
     )
     return vector_store, model, tokenizer
 # Initialize system components
 try:
     vector_store, model, tokenizer = initialize_system()
+    print("✅ System initialized with business documents")
 except Exception as e:
+    print(f"❌ Initialization failed: {str(e)}")
     raise
+# Response Generation
 def generate_response(query):
+    try:
+        # Retrieve relevant context
+        docs = vector_store.similarity_search(query, k=3)
+        context = "\n".join([doc.page_content for doc in docs])
+        # Create formatted prompt
+        prompt = f"""<|system|>
+        You are a customer support assistant. Answer ONLY using the provided business documents.
+        If the answer isn't in the documents, respond: "I don't have that information."
+        Context: {context}</s>
+        <|user|>
+        {query}</s>
+        <|assistant|>
+        """
+        # Generate response
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+        outputs = model.generate(
+            inputs.input_ids,
+            max_new_tokens=512,
+            temperature=0.3,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Extract only the assistant's response
+        return response.split("<|assistant|>")[-1].strip()
+    except Exception as e:
+        return f"⚠️ Error: {str(e)}"
+# Chat Interface
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 📚 Business Document Assistant")
+    with gr.Row():
+        gr.Image("https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png",
+                width=100)
+        gr.Markdown("Ask questions about our policies, products, and services!")
+    chatbot = gr.Chatbot(height=400)
+    msg = gr.Textbox(label="Your Question", placeholder="Type your question here...")
     clear = gr.Button("Clear History")
     def respond(message, chat_history):
+        response = generate_response(message)
         chat_history.append((message, response))
         return "", chat_history
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)