Spaces:

Punit1
/

pdf-analyzer

Sleeping

App Files Files Community

Punit1 commited on Jan 30

Commit

e6c2792

verified ·

1 Parent(s): e2d2e34

Update app.py

Browse files

Files changed (1) hide show

app.py +111 -26

app.py CHANGED Viewed

@@ -1,33 +1,70 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from sentence_transformers import SentenceTransformer
-import faiss
-import numpy as np
 from pypdf import PdfReader
-# Load embedding model
-embed_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Load Phi-3-mini
 model_name = "microsoft/Phi-3-mini-4k-instruct"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name)
-# Global storage
 chunks = []
 index = None
 def process_pdf(pdf_file):
     global chunks, index
     reader = PdfReader(pdf_file)
     text = ""
     for page in reader.pages:
-        text += page.extract_text()
-    # Chunking
-    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
     embeddings = embed_model.encode(chunks)
     dimension = embeddings.shape[1]
@@ -35,40 +72,88 @@ def process_pdf(pdf_file):
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings))
-    return "PDF processed successfully!"
 def ask_question(query):
     global chunks, index
     query_embedding = embed_model.encode([query])
-    D, I = index.search(np.array(query_embedding), k=3)
     context = "\n".join([chunks[i] for i in I[0]])
-    prompt = f"""
-    Use the context below to answer the question.
-    Context:
-    {context}
-    Question:
-    {query}
-    Answer:
-    """
     inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = model.generate(**inputs, max_new_tokens=200)
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return response
 with gr.Blocks() as demo:
-    gr.Markdown("# 📚 Minimal RAG with Phi-3-mini")
     pdf_input = gr.File(label="Upload PDF")
     upload_btn = gr.Button("Process PDF")
-    status = gr.Textbox()
     question = gr.Textbox(label="Ask a question")
     answer = gr.Textbox(label="Answer")

 import gradio as gr
 import torch
+import time
+import logging
+import numpy as np
+import faiss
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from sentence_transformers import SentenceTransformer
 from pypdf import PdfReader
+# ==============================
+# Logging Setup
+# ==============================
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# ==============================
+# Load Embedding Model
+# ==============================
+logger.info("Loading embedding model...")
+embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
+# ==============================
+# Load Phi-3 Mini (CPU Optimized)
+# ==============================
 model_name = "microsoft/Phi-3-mini-4k-instruct"
+logger.info("Loading Phi-3-mini model...")
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype=torch.float32,
+    low_cpu_mem_usage=True
+)
+model.to("cpu")
+model.eval()
+logger.info("Model loaded successfully.")
+# ==============================
+# Global Storage
+# ==============================
 chunks = []
 index = None
+# ==============================
+# PDF Processing
+# ==============================
 def process_pdf(pdf_file):
     global chunks, index
+    logger.info("Processing PDF...")
     reader = PdfReader(pdf_file)
     text = ""
     for page in reader.pages:
+        content = page.extract_text()
+        if content:
+            text += content
+    # Smaller chunks = faster generation
+    chunk_size = 350
+    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
+    logger.info(f"Total chunks created: {len(chunks)}")
     embeddings = embed_model.encode(chunks)
     dimension = embeddings.shape[1]
     index = faiss.IndexFlatL2(dimension)
     index.add(np.array(embeddings))
+    logger.info("FAISS index built successfully.")
+    return "✅ PDF processed successfully!"
+# ==============================
+# Smart RAG Question Answering
+# ==============================
 def ask_question(query):
     global chunks, index
+    if index is None:
+        return "Please upload and process a PDF first."
+    start_total = time.time()
+    logger.info("Received question.")
+    # Embed query
     query_embedding = embed_model.encode([query])
+    # Retrieve top 2 relevant chunks
+    D, I = index.search(np.array(query_embedding), k=2)
     context = "\n".join([chunks[i] for i in I[0]])
+    # Phi-3 Instruct Template (CRITICAL)
+    prompt = f"""<|system|>
+You are an expert AI assistant.
+Answer clearly, accurately, and concisely.
+Use structured explanation when helpful.
+Avoid repeating the question.
+If answer not in context, say so.
+<|end|>
+<|user|>
+Context:
+{context}
+Question:
+{query}
+<|end|>
+<|assistant|>
+"""
     inputs = tokenizer(prompt, return_tensors="pt")
+    logger.info(f"Prompt token length: {len(inputs['input_ids'][0])}")
+    with torch.no_grad():
+        start_gen = time.time()
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=120,
+            temperature=0.6,
+            top_p=0.9,
+            do_sample=True,
+            repetition_penalty=1.15,
+            use_cache=True
+        )
+        logger.info(f"Generation time: {time.time() - start_gen:.2f}s")
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove prompt from response
+    answer = response.split("<|assistant|>")[-1].strip()
+    logger.info(f"Total response time: {time.time() - start_total:.2f}s")
+    return answer
+# ==============================
+# Gradio UI
+# ==============================
 with gr.Blocks() as demo:
+    gr.Markdown("# 📚 Optimized RAG with Phi-3-mini")
     pdf_input = gr.File(label="Upload PDF")
     upload_btn = gr.Button("Process PDF")
+    status = gr.Textbox(label="Status")
     question = gr.Textbox(label="Ask a question")
     answer = gr.Textbox(label="Answer")