Spaces:

random2222
/

tryagain

Build error

App Files Files Community

random2222 commited on Apr 13, 2025

Commit

4d6816c

verified ·

1 Parent(s): 2d88065

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -28

app.py CHANGED Viewed

@@ -22,7 +22,7 @@ def initialize_system():
                 if f.endswith(".pdf")]
     text_splitter = RecursiveCharacterTextSplitter(
-        chunk_size=1000,  # Increased chunk size for better context
         chunk_overlap=200
     )
@@ -41,17 +41,14 @@ def initialize_system():
     # Vector store
     vector_store = FAISS.from_documents(texts, embeddings)
-    # Load model with memory optimization
-    tokenizer = AutoTokenizer.from_pretrained(
-        MODEL_NAME,
-        trust_remote_code=True,
-        padding_side="left"
-    )
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         trust_remote_code=True,
-        torch_dtype=torch.float16,
         device_map="auto",
         low_cpu_mem_usage=True
     )
@@ -61,7 +58,11 @@ def initialize_system():
 try:
     vector_store, model, tokenizer = initialize_system()
     print("✅ System initialized successfully")
-    print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.1f}GB") if torch.cuda.is_available() else None
 except Exception as e:
     print(f"❌ Initialization failed: {str(e)}")
     raise
@@ -71,18 +72,15 @@ def generate_response(query):
         # Context retrieval
         docs = vector_store.similarity_search(query, k=3)
         context = "\n".join([d.page_content for d in docs])
-        # Optimized prompt
-        prompt = f"""<|system|>
-        You are a customer service expert. Answer using:
-        {context}
-        - Be concise (2-3 sentences)
-        - If information is missing: "Let me check with the team"
-        </s>
-        <|user|>{query}</s>
-        <|assistant|>"""
-        inputs = tokenizer(prompt, return_tensors="pt")
         outputs = model.generate(
             inputs.input_ids,
             max_new_tokens=300,
@@ -92,20 +90,21 @@ def generate_response(query):
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return response.split("<|assistant|>")[-1].strip()
     except Exception as e:
-        return "Please try again later."
-# Enhanced interface
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Enterprise Customer Support")
-    with gr.Row():
-        chatbot = gr.Chatbot(height=500, label="Conversation")
     with gr.Row():
         msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
         submit_btn = gr.Button("Send", variant="primary", scale=1)
     clear = gr.ClearButton([msg, chatbot])
     def respond(message, history):
@@ -116,4 +115,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
-demo.launch(server_port=7860)

                 if f.endswith(".pdf")]
     text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=1000,
         chunk_overlap=200
     )
     # Vector store
     vector_store = FAISS.from_documents(texts, embeddings)
+    # Load model and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
+    tokenizer.pad_token = tokenizer.eos_token  # Fix padding issue
     model = AutoModelForCausalLM.from_pretrained(
         MODEL_NAME,
         trust_remote_code=True,
+        torch_dtype=torch.float32 if not torch.cuda.is_available() else torch.float16,
         device_map="auto",
         low_cpu_mem_usage=True
     )
 try:
     vector_store, model, tokenizer = initialize_system()
     print("✅ System initialized successfully")
+    if torch.cuda.is_available():
+        print("🚀 Using CUDA")
+        print(f"Memory usage: {torch.cuda.memory_allocated()/1024**3:.2f} GB")
+    else:
+        print("🧠 Using CPU")
 except Exception as e:
     print(f"❌ Initialization failed: {str(e)}")
     raise
         # Context retrieval
         docs = vector_store.similarity_search(query, k=3)
         context = "\n".join([d.page_content for d in docs])
+        # Prompt template optimized for Phi-2
+        prompt = f"""Context:
+{context}
+Question: {query}
+Answer:"""
+        inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         outputs = model.generate(
             inputs.input_ids,
             max_new_tokens=300,
         )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response.split("Answer:")[-1].strip()
     except Exception as e:
+        return "Sorry, an error occurred while generating a response."
+# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧠 Enterprise Customer Support Chatbot")
+    chatbot = gr.Chatbot(height=500, label="Conversation")
     with gr.Row():
         msg = gr.Textbox(placeholder="Ask about our services...", scale=7)
         submit_btn = gr.Button("Send", variant="primary", scale=1)
     clear = gr.ClearButton([msg, chatbot])
     def respond(message, history):
     submit_btn.click(respond, [msg, chatbot], [msg, chatbot])
     msg.submit(respond, [msg, chatbot], [msg, chatbot])
+demo.launch(server_port=7860)