Spaces:

Nihal2000
/

SLMChatbot

Sleeping

App Files Files Community

Nihal2000 commited on Aug 5, 2025

Commit

a77e21d

verified ·

1 Parent(s): dd191a9

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -78

app.py CHANGED Viewed

@@ -9,25 +9,10 @@ import faiss
 import numpy as np
 import PyPDF2
 from pathlib import Path
-# Ensure all required packages are installed
-# This is generally handled by requirements.txt on Hugging Face Spaces,
-# but this is a fallback for local execution.
-try:
-    import faiss
-except ImportError:
-    print("Installing faiss-cpu...")
-    os.system("pip install --quiet faiss-cpu")
-    import faiss
-try:
-    import PyPDF2
-except ImportError:
-    print("Installing PyPDF2...")
-    os.system("pip install --quiet PyPDF2")
-    import PyPDF2
-# --- Model Architecture (Copied from your provided code) ---
 class EfficientMultiHeadAttention(nn.Module):
     def __init__(self, d_model, n_heads, dropout=0.1):
         super().__init__()
@@ -109,35 +94,43 @@ TOKENIZER_NAME = "bert-base-uncased"
 EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
 DEVICE = torch.device('cpu')
-# Create a dummy model if the actual model is not found
-if not os.path.exists(MODEL_PATH):
-    print(f"Warning: Model not found at {MODEL_PATH}. Creating a dummy model for demonstration.")
-    os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
-    dummy_tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
-    dummy_config = {
-        'vocab_size': dummy_tokenizer.vocab_size, 'd_model': 320, 'n_heads': 8, 'n_layers': 4, 'd_ff': 1280, 'max_length': 512
-    }
-    dummy_model = EdgeOptimizedSLM(**dummy_config)
-    torch.save({
-        'config': dummy_config, 'model_state_dict': dummy_model.state_dict(), 'quantization': 'dynamic_int8'
-    }, MODEL_PATH)
-def load_custom_model(model_path):
-    checkpoint = torch.load(model_path, map_location=DEVICE)
     config = checkpoint['config']
-    model = EdgeOptimizedSLM(**config)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(DEVICE)
-    model.eval()
-    return model, config
-print("Loading models...")
-inference_model, model_config = load_custom_model(MODEL_PATH)
-tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
-embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME, device=DEVICE)
-print("Models loaded successfully.")
-# --- RAG and PDF Processing Logic ---
 class RAGPipeline:
     def __init__(self, embedding_model):
         self.text_chunks = []
@@ -146,36 +139,23 @@ class RAGPipeline:
         self.raw_embeddings_path = "document_embeddings.raw"
     def process_pdf(self, pdf_file_obj):
-        if pdf_file_obj is None:
-            return "Please upload a PDF file first.", None
         print(f"Processing PDF: {pdf_file_obj.name}")
         self.text_chunks = []
         try:
             pdf_reader = PyPDF2.PdfReader(pdf_file_obj.name)
             text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
-            if not text:
-                return "Could not extract text from the PDF.", None
             words = text.split()
             chunk_size, overlap = 200, 30
             for i in range(0, len(words), chunk_size - overlap):
                 self.text_chunks.append(" ".join(words[i:i + chunk_size]))
-            if not self.text_chunks:
-                return "Text extracted but could not be split into chunks.", None
-            print(f"Generating embeddings for {len(self.text_chunks)} chunks...")
             embeddings = self.embedding_model.encode(self.text_chunks, convert_to_tensor=False, show_progress_bar=True)
             with open(self.raw_embeddings_path, 'wb') as f:
                 f.write(embeddings.tobytes())
             self.vector_store = faiss.IndexFlatL2(embeddings.shape[1])
             self.vector_store.add(embeddings)
             status_message = f"Successfully processed '{Path(pdf_file_obj.name).name}'. Ready for questions."
             print("PDF processing complete.")
             return status_message, self.raw_embeddings_path
@@ -183,7 +163,6 @@ class RAGPipeline:
             print(f"Error processing PDF: {e}")
             return f"Error processing PDF: {e}", None
     def retrieve_context(self, query, top_k=3):
         if self.vector_store is None: return ""
         query_embedding = self.embedding_model.encode([query])
@@ -192,19 +171,16 @@ class RAGPipeline:
 rag_pipeline = RAGPipeline(embedding_model)
-# --- Chatbot Inference Logic ---
 def get_answer(question, context):
     if not context:
         return "I could not find relevant information in the document to answer that question."
     inputs = tokenizer(question, context, return_tensors='pt', max_length=model_config.get('max_length', 512), truncation=True, padding='max_length')
     input_ids, attention_mask = inputs['input_ids'].to(DEVICE), inputs['attention_mask'].to(DEVICE)
     with torch.no_grad():
         outputs = inference_model(input_ids, attention_mask)
         start_index = torch.argmax(outputs['start_logits'], dim=1).item()
         end_index = torch.argmax(outputs['end_logits'], dim=1).item()
     if start_index <= end_index:
         answer_ids = input_ids[0][start_index:end_index+1]
         answer = tokenizer.decode(answer_ids, skip_special_tokens=True)
@@ -214,41 +190,37 @@ def get_answer(question, context):
 # --- Gradio Interface ---
 def add_text(history, text):
-    history = history + [(text, None)]
     return history, ""
 def bot(history):
-    question = history[-1][0]
     context = rag_pipeline.retrieve_context(question)
     answer = get_answer(question, context)
-    history[-1][1] = answer
     return history
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Chat with your PDF using a Custom Edge SLM")
     gr.Markdown("1. Upload a PDF. 2. Wait for it to be processed. 3. Ask questions about its content.")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_upload = gr.File(label="Upload PDF")
             upload_status = gr.Textbox(label="PDF Status", interactive=False)
             download_embeddings = gr.File(label="Download Raw Embeddings", interactive=False)
         with gr.Column(scale=2):
-            chatbot = gr.Chatbot(label="Chat History", height=500)
             question_box = gr.Textbox(label="Your Question", placeholder="Ask something about the document...")
-    # Event Handlers
     question_box.submit(add_text, [chatbot, question_box], [chatbot, question_box]).then(
         bot, chatbot, chatbot
     )
     pdf_upload.upload(
         fn=rag_pipeline.process_pdf,
         inputs=[pdf_upload],
         outputs=[upload_status, download_embeddings]
     )
-# To this:
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7830) # Or another port if 7860 is taken

 import numpy as np
 import PyPDF2
 from pathlib import Path
+import traceback  # Import traceback for detailed error logging
+import sys
+# --- Model Architecture (Same as before) ---
 class EfficientMultiHeadAttention(nn.Module):
     def __init__(self, d_model, n_heads, dropout=0.1):
         super().__init__()
 EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
 DEVICE = torch.device('cpu')
+# --- Robust Model Loading ---
+try:
+    print("--- Starting Application ---")
+    # 1. Load Custom Inference Model
+    print(f"Attempting to load custom model from: {MODEL_PATH}")
+    if not os.path.exists(MODEL_PATH):
+        raise FileNotFoundError(f"CRITICAL: Model file not found at '{MODEL_PATH}'. Please ensure the file exists in your repository.")
+    checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
     config = checkpoint['config']
+    inference_model = EdgeOptimizedSLM(**config)
+    inference_model.load_state_dict(checkpoint['model_state_dict'])
+    inference_model.to(DEVICE)
+    inference_model.eval()
+    print("✅ Custom inference model loaded successfully.")
+    # 2. Load Tokenizer
+    print(f"Attempting to load tokenizer: {TOKENIZER_NAME}")
+    tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
+    print("✅ Tokenizer loaded successfully.")
+    # 3. Load Embedding Model
+    print(f"Attempting to load embedding model: {EMBEDDING_MODEL_NAME}")
+    embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME, device=DEVICE)
+    print("✅ Embedding model loaded successfully.")
+except Exception as e:
+    print("--- 🔴 AN ERROR OCCURRED DURING STARTUP ---")
+    print(f"Error Type: {type(e).__name__}")
+    print(f"Error Details: {e}")
+    print("------------------------------------------")
+    traceback.print_exc() # Print the full traceback for detailed debugging
+    # We exit here because the app cannot run without the models.
+    sys.exit("Exiting application due to critical startup error.")
+# --- RAG and PDF Processing Logic (Same as before) ---
 class RAGPipeline:
     def __init__(self, embedding_model):
         self.text_chunks = []
         self.raw_embeddings_path = "document_embeddings.raw"
     def process_pdf(self, pdf_file_obj):
+        if pdf_file_obj is None: return "Please upload a PDF file first.", None
         print(f"Processing PDF: {pdf_file_obj.name}")
         self.text_chunks = []
         try:
             pdf_reader = PyPDF2.PdfReader(pdf_file_obj.name)
             text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
+            if not text: return "Could not extract text from the PDF.", None
             words = text.split()
             chunk_size, overlap = 200, 30
             for i in range(0, len(words), chunk_size - overlap):
                 self.text_chunks.append(" ".join(words[i:i + chunk_size]))
+            if not self.text_chunks: return "Text extracted but could not be split into chunks.", None
             embeddings = self.embedding_model.encode(self.text_chunks, convert_to_tensor=False, show_progress_bar=True)
             with open(self.raw_embeddings_path, 'wb') as f:
                 f.write(embeddings.tobytes())
             self.vector_store = faiss.IndexFlatL2(embeddings.shape[1])
             self.vector_store.add(embeddings)
             status_message = f"Successfully processed '{Path(pdf_file_obj.name).name}'. Ready for questions."
             print("PDF processing complete.")
             return status_message, self.raw_embeddings_path
             print(f"Error processing PDF: {e}")
             return f"Error processing PDF: {e}", None
     def retrieve_context(self, query, top_k=3):
         if self.vector_store is None: return ""
         query_embedding = self.embedding_model.encode([query])
 rag_pipeline = RAGPipeline(embedding_model)
+# --- Chatbot Inference Logic (Same as before) ---
 def get_answer(question, context):
     if not context:
         return "I could not find relevant information in the document to answer that question."
     inputs = tokenizer(question, context, return_tensors='pt', max_length=model_config.get('max_length', 512), truncation=True, padding='max_length')
     input_ids, attention_mask = inputs['input_ids'].to(DEVICE), inputs['attention_mask'].to(DEVICE)
     with torch.no_grad():
         outputs = inference_model(input_ids, attention_mask)
         start_index = torch.argmax(outputs['start_logits'], dim=1).item()
         end_index = torch.argmax(outputs['end_logits'], dim=1).item()
     if start_index <= end_index:
         answer_ids = input_ids[0][start_index:end_index+1]
         answer = tokenizer.decode(answer_ids, skip_special_tokens=True)
 # --- Gradio Interface ---
 def add_text(history, text):
+    history.append({"role": "user", "content": text})
     return history, ""
 def bot(history):
+    question = history[-1]["content"]
     context = rag_pipeline.retrieve_context(question)
     answer = get_answer(question, context)
+    history.append({"role": "assistant", "content": answer})
     return history
+print("--- Models loaded, building Gradio interface ---")
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Chat with your PDF using a Custom Edge SLM")
     gr.Markdown("1. Upload a PDF. 2. Wait for it to be processed. 3. Ask questions about its content.")
     with gr.Row():
         with gr.Column(scale=1):
             pdf_upload = gr.File(label="Upload PDF")
             upload_status = gr.Textbox(label="PDF Status", interactive=False)
             download_embeddings = gr.File(label="Download Raw Embeddings", interactive=False)
         with gr.Column(scale=2):
+            chatbot = gr.Chatbot(label="Chat History", height=500, type='messages')
             question_box = gr.Textbox(label="Your Question", placeholder="Ask something about the document...")
     question_box.submit(add_text, [chatbot, question_box], [chatbot, question_box]).then(
         bot, chatbot, chatbot
     )
     pdf_upload.upload(
         fn=rag_pipeline.process_pdf,
         inputs=[pdf_upload],
         outputs=[upload_status, download_embeddings]
     )
+print("✅ Gradio interface built successfully.")
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)