Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,25 +9,10 @@ import faiss
|
|
| 9 |
import numpy as np
|
| 10 |
import PyPDF2
|
| 11 |
from pathlib import Path
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
#
|
| 14 |
-
# This is generally handled by requirements.txt on Hugging Face Spaces,
|
| 15 |
-
# but this is a fallback for local execution.
|
| 16 |
-
try:
|
| 17 |
-
import faiss
|
| 18 |
-
except ImportError:
|
| 19 |
-
print("Installing faiss-cpu...")
|
| 20 |
-
os.system("pip install --quiet faiss-cpu")
|
| 21 |
-
import faiss
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
import PyPDF2
|
| 25 |
-
except ImportError:
|
| 26 |
-
print("Installing PyPDF2...")
|
| 27 |
-
os.system("pip install --quiet PyPDF2")
|
| 28 |
-
import PyPDF2
|
| 29 |
-
|
| 30 |
-
# --- Model Architecture (Copied from your provided code) ---
|
| 31 |
class EfficientMultiHeadAttention(nn.Module):
|
| 32 |
def __init__(self, d_model, n_heads, dropout=0.1):
|
| 33 |
super().__init__()
|
|
@@ -109,35 +94,43 @@ TOKENIZER_NAME = "bert-base-uncased"
|
|
| 109 |
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
|
| 110 |
DEVICE = torch.device('cpu')
|
| 111 |
|
| 112 |
-
#
|
| 113 |
-
|
| 114 |
-
print(
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
torch.
|
| 122 |
-
'config': dummy_config, 'model_state_dict': dummy_model.state_dict(), 'quantization': 'dynamic_int8'
|
| 123 |
-
}, MODEL_PATH)
|
| 124 |
-
|
| 125 |
-
def load_custom_model(model_path):
|
| 126 |
-
checkpoint = torch.load(model_path, map_location=DEVICE)
|
| 127 |
config = checkpoint['config']
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
class RAGPipeline:
|
| 142 |
def __init__(self, embedding_model):
|
| 143 |
self.text_chunks = []
|
|
@@ -146,36 +139,23 @@ class RAGPipeline:
|
|
| 146 |
self.raw_embeddings_path = "document_embeddings.raw"
|
| 147 |
|
| 148 |
def process_pdf(self, pdf_file_obj):
|
| 149 |
-
if pdf_file_obj is None:
|
| 150 |
-
return "Please upload a PDF file first.", None
|
| 151 |
-
|
| 152 |
print(f"Processing PDF: {pdf_file_obj.name}")
|
| 153 |
self.text_chunks = []
|
| 154 |
-
|
| 155 |
try:
|
| 156 |
pdf_reader = PyPDF2.PdfReader(pdf_file_obj.name)
|
| 157 |
text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
|
| 158 |
-
|
| 159 |
-
if not text:
|
| 160 |
-
return "Could not extract text from the PDF.", None
|
| 161 |
-
|
| 162 |
words = text.split()
|
| 163 |
chunk_size, overlap = 200, 30
|
| 164 |
for i in range(0, len(words), chunk_size - overlap):
|
| 165 |
self.text_chunks.append(" ".join(words[i:i + chunk_size]))
|
| 166 |
-
|
| 167 |
-
if not self.text_chunks:
|
| 168 |
-
return "Text extracted but could not be split into chunks.", None
|
| 169 |
-
|
| 170 |
-
print(f"Generating embeddings for {len(self.text_chunks)} chunks...")
|
| 171 |
embeddings = self.embedding_model.encode(self.text_chunks, convert_to_tensor=False, show_progress_bar=True)
|
| 172 |
-
|
| 173 |
with open(self.raw_embeddings_path, 'wb') as f:
|
| 174 |
f.write(embeddings.tobytes())
|
| 175 |
-
|
| 176 |
self.vector_store = faiss.IndexFlatL2(embeddings.shape[1])
|
| 177 |
self.vector_store.add(embeddings)
|
| 178 |
-
|
| 179 |
status_message = f"Successfully processed '{Path(pdf_file_obj.name).name}'. Ready for questions."
|
| 180 |
print("PDF processing complete.")
|
| 181 |
return status_message, self.raw_embeddings_path
|
|
@@ -183,7 +163,6 @@ class RAGPipeline:
|
|
| 183 |
print(f"Error processing PDF: {e}")
|
| 184 |
return f"Error processing PDF: {e}", None
|
| 185 |
|
| 186 |
-
|
| 187 |
def retrieve_context(self, query, top_k=3):
|
| 188 |
if self.vector_store is None: return ""
|
| 189 |
query_embedding = self.embedding_model.encode([query])
|
|
@@ -192,19 +171,16 @@ class RAGPipeline:
|
|
| 192 |
|
| 193 |
rag_pipeline = RAGPipeline(embedding_model)
|
| 194 |
|
| 195 |
-
# --- Chatbot Inference Logic ---
|
| 196 |
def get_answer(question, context):
|
| 197 |
if not context:
|
| 198 |
return "I could not find relevant information in the document to answer that question."
|
| 199 |
-
|
| 200 |
inputs = tokenizer(question, context, return_tensors='pt', max_length=model_config.get('max_length', 512), truncation=True, padding='max_length')
|
| 201 |
input_ids, attention_mask = inputs['input_ids'].to(DEVICE), inputs['attention_mask'].to(DEVICE)
|
| 202 |
-
|
| 203 |
with torch.no_grad():
|
| 204 |
outputs = inference_model(input_ids, attention_mask)
|
| 205 |
start_index = torch.argmax(outputs['start_logits'], dim=1).item()
|
| 206 |
end_index = torch.argmax(outputs['end_logits'], dim=1).item()
|
| 207 |
-
|
| 208 |
if start_index <= end_index:
|
| 209 |
answer_ids = input_ids[0][start_index:end_index+1]
|
| 210 |
answer = tokenizer.decode(answer_ids, skip_special_tokens=True)
|
|
@@ -214,41 +190,37 @@ def get_answer(question, context):
|
|
| 214 |
|
| 215 |
# --- Gradio Interface ---
|
| 216 |
def add_text(history, text):
|
| 217 |
-
history
|
| 218 |
return history, ""
|
| 219 |
|
| 220 |
def bot(history):
|
| 221 |
-
question = history[-1][
|
| 222 |
context = rag_pipeline.retrieve_context(question)
|
| 223 |
answer = get_answer(question, context)
|
| 224 |
-
history
|
| 225 |
return history
|
| 226 |
|
|
|
|
| 227 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 228 |
gr.Markdown("# Chat with your PDF using a Custom Edge SLM")
|
| 229 |
gr.Markdown("1. Upload a PDF. 2. Wait for it to be processed. 3. Ask questions about its content.")
|
| 230 |
-
|
| 231 |
with gr.Row():
|
| 232 |
with gr.Column(scale=1):
|
| 233 |
pdf_upload = gr.File(label="Upload PDF")
|
| 234 |
upload_status = gr.Textbox(label="PDF Status", interactive=False)
|
| 235 |
download_embeddings = gr.File(label="Download Raw Embeddings", interactive=False)
|
| 236 |
-
|
| 237 |
with gr.Column(scale=2):
|
| 238 |
-
chatbot = gr.Chatbot(label="Chat History", height=500)
|
| 239 |
question_box = gr.Textbox(label="Your Question", placeholder="Ask something about the document...")
|
| 240 |
-
|
| 241 |
-
# Event Handlers
|
| 242 |
question_box.submit(add_text, [chatbot, question_box], [chatbot, question_box]).then(
|
| 243 |
bot, chatbot, chatbot
|
| 244 |
)
|
| 245 |
-
|
| 246 |
pdf_upload.upload(
|
| 247 |
fn=rag_pipeline.process_pdf,
|
| 248 |
inputs=[pdf_upload],
|
| 249 |
outputs=[upload_status, download_embeddings]
|
| 250 |
)
|
|
|
|
| 251 |
|
| 252 |
-
# To this:
|
| 253 |
if __name__ == "__main__":
|
| 254 |
-
demo.launch(server_name="0.0.0.0", server_port=
|
|
|
|
| 9 |
import numpy as np
|
| 10 |
import PyPDF2
|
| 11 |
from pathlib import Path
|
| 12 |
+
import traceback # Import traceback for detailed error logging
|
| 13 |
+
import sys
|
| 14 |
|
| 15 |
+
# --- Model Architecture (Same as before) ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
class EfficientMultiHeadAttention(nn.Module):
|
| 17 |
def __init__(self, d_model, n_heads, dropout=0.1):
|
| 18 |
super().__init__()
|
|
|
|
| 94 |
EMBEDDING_MODEL_NAME = 'all-MiniLM-L6-v2'
|
| 95 |
DEVICE = torch.device('cpu')
|
| 96 |
|
| 97 |
+
# --- Robust Model Loading ---
|
| 98 |
+
try:
|
| 99 |
+
print("--- Starting Application ---")
|
| 100 |
+
|
| 101 |
+
# 1. Load Custom Inference Model
|
| 102 |
+
print(f"Attempting to load custom model from: {MODEL_PATH}")
|
| 103 |
+
if not os.path.exists(MODEL_PATH):
|
| 104 |
+
raise FileNotFoundError(f"CRITICAL: Model file not found at '{MODEL_PATH}'. Please ensure the file exists in your repository.")
|
| 105 |
+
|
| 106 |
+
checkpoint = torch.load(MODEL_PATH, map_location=DEVICE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
config = checkpoint['config']
|
| 108 |
+
inference_model = EdgeOptimizedSLM(**config)
|
| 109 |
+
inference_model.load_state_dict(checkpoint['model_state_dict'])
|
| 110 |
+
inference_model.to(DEVICE)
|
| 111 |
+
inference_model.eval()
|
| 112 |
+
print("✅ Custom inference model loaded successfully.")
|
| 113 |
+
|
| 114 |
+
# 2. Load Tokenizer
|
| 115 |
+
print(f"Attempting to load tokenizer: {TOKENIZER_NAME}")
|
| 116 |
+
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_NAME)
|
| 117 |
+
print("✅ Tokenizer loaded successfully.")
|
| 118 |
+
|
| 119 |
+
# 3. Load Embedding Model
|
| 120 |
+
print(f"Attempting to load embedding model: {EMBEDDING_MODEL_NAME}")
|
| 121 |
+
embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME, device=DEVICE)
|
| 122 |
+
print("✅ Embedding model loaded successfully.")
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
print("--- 🔴 AN ERROR OCCURRED DURING STARTUP ---")
|
| 126 |
+
print(f"Error Type: {type(e).__name__}")
|
| 127 |
+
print(f"Error Details: {e}")
|
| 128 |
+
print("------------------------------------------")
|
| 129 |
+
traceback.print_exc() # Print the full traceback for detailed debugging
|
| 130 |
+
# We exit here because the app cannot run without the models.
|
| 131 |
+
sys.exit("Exiting application due to critical startup error.")
|
| 132 |
+
|
| 133 |
+
# --- RAG and PDF Processing Logic (Same as before) ---
|
| 134 |
class RAGPipeline:
|
| 135 |
def __init__(self, embedding_model):
|
| 136 |
self.text_chunks = []
|
|
|
|
| 139 |
self.raw_embeddings_path = "document_embeddings.raw"
|
| 140 |
|
| 141 |
def process_pdf(self, pdf_file_obj):
|
| 142 |
+
if pdf_file_obj is None: return "Please upload a PDF file first.", None
|
|
|
|
|
|
|
| 143 |
print(f"Processing PDF: {pdf_file_obj.name}")
|
| 144 |
self.text_chunks = []
|
|
|
|
| 145 |
try:
|
| 146 |
pdf_reader = PyPDF2.PdfReader(pdf_file_obj.name)
|
| 147 |
text = "".join(page.extract_text() for page in pdf_reader.pages if page.extract_text())
|
| 148 |
+
if not text: return "Could not extract text from the PDF.", None
|
|
|
|
|
|
|
|
|
|
| 149 |
words = text.split()
|
| 150 |
chunk_size, overlap = 200, 30
|
| 151 |
for i in range(0, len(words), chunk_size - overlap):
|
| 152 |
self.text_chunks.append(" ".join(words[i:i + chunk_size]))
|
| 153 |
+
if not self.text_chunks: return "Text extracted but could not be split into chunks.", None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
embeddings = self.embedding_model.encode(self.text_chunks, convert_to_tensor=False, show_progress_bar=True)
|
|
|
|
| 155 |
with open(self.raw_embeddings_path, 'wb') as f:
|
| 156 |
f.write(embeddings.tobytes())
|
|
|
|
| 157 |
self.vector_store = faiss.IndexFlatL2(embeddings.shape[1])
|
| 158 |
self.vector_store.add(embeddings)
|
|
|
|
| 159 |
status_message = f"Successfully processed '{Path(pdf_file_obj.name).name}'. Ready for questions."
|
| 160 |
print("PDF processing complete.")
|
| 161 |
return status_message, self.raw_embeddings_path
|
|
|
|
| 163 |
print(f"Error processing PDF: {e}")
|
| 164 |
return f"Error processing PDF: {e}", None
|
| 165 |
|
|
|
|
| 166 |
def retrieve_context(self, query, top_k=3):
|
| 167 |
if self.vector_store is None: return ""
|
| 168 |
query_embedding = self.embedding_model.encode([query])
|
|
|
|
| 171 |
|
| 172 |
rag_pipeline = RAGPipeline(embedding_model)
|
| 173 |
|
| 174 |
+
# --- Chatbot Inference Logic (Same as before) ---
|
| 175 |
def get_answer(question, context):
|
| 176 |
if not context:
|
| 177 |
return "I could not find relevant information in the document to answer that question."
|
|
|
|
| 178 |
inputs = tokenizer(question, context, return_tensors='pt', max_length=model_config.get('max_length', 512), truncation=True, padding='max_length')
|
| 179 |
input_ids, attention_mask = inputs['input_ids'].to(DEVICE), inputs['attention_mask'].to(DEVICE)
|
|
|
|
| 180 |
with torch.no_grad():
|
| 181 |
outputs = inference_model(input_ids, attention_mask)
|
| 182 |
start_index = torch.argmax(outputs['start_logits'], dim=1).item()
|
| 183 |
end_index = torch.argmax(outputs['end_logits'], dim=1).item()
|
|
|
|
| 184 |
if start_index <= end_index:
|
| 185 |
answer_ids = input_ids[0][start_index:end_index+1]
|
| 186 |
answer = tokenizer.decode(answer_ids, skip_special_tokens=True)
|
|
|
|
| 190 |
|
| 191 |
# --- Gradio Interface ---
|
| 192 |
def add_text(history, text):
|
| 193 |
+
history.append({"role": "user", "content": text})
|
| 194 |
return history, ""
|
| 195 |
|
| 196 |
def bot(history):
|
| 197 |
+
question = history[-1]["content"]
|
| 198 |
context = rag_pipeline.retrieve_context(question)
|
| 199 |
answer = get_answer(question, context)
|
| 200 |
+
history.append({"role": "assistant", "content": answer})
|
| 201 |
return history
|
| 202 |
|
| 203 |
+
print("--- Models loaded, building Gradio interface ---")
|
| 204 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 205 |
gr.Markdown("# Chat with your PDF using a Custom Edge SLM")
|
| 206 |
gr.Markdown("1. Upload a PDF. 2. Wait for it to be processed. 3. Ask questions about its content.")
|
|
|
|
| 207 |
with gr.Row():
|
| 208 |
with gr.Column(scale=1):
|
| 209 |
pdf_upload = gr.File(label="Upload PDF")
|
| 210 |
upload_status = gr.Textbox(label="PDF Status", interactive=False)
|
| 211 |
download_embeddings = gr.File(label="Download Raw Embeddings", interactive=False)
|
|
|
|
| 212 |
with gr.Column(scale=2):
|
| 213 |
+
chatbot = gr.Chatbot(label="Chat History", height=500, type='messages')
|
| 214 |
question_box = gr.Textbox(label="Your Question", placeholder="Ask something about the document...")
|
|
|
|
|
|
|
| 215 |
question_box.submit(add_text, [chatbot, question_box], [chatbot, question_box]).then(
|
| 216 |
bot, chatbot, chatbot
|
| 217 |
)
|
|
|
|
| 218 |
pdf_upload.upload(
|
| 219 |
fn=rag_pipeline.process_pdf,
|
| 220 |
inputs=[pdf_upload],
|
| 221 |
outputs=[upload_status, download_embeddings]
|
| 222 |
)
|
| 223 |
+
print("✅ Gradio interface built successfully.")
|
| 224 |
|
|
|
|
| 225 |
if __name__ == "__main__":
|
| 226 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|