Spaces:

simran40
/

RAG-CHATBOT

Sleeping

App Files Files Community

simran40 commited on Dec 16, 2025

Commit

0fd613a

verified ·

1 Parent(s): 720e7c7

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -57

app.py CHANGED Viewed

@@ -2,31 +2,26 @@ import gradio as gr
 import fitz  # PyMuPDF
 import re
 import faiss
-import torch
 import numpy as np
 from sentence_transformers import SentenceTransformer
-from transformers import AutoTokenizer, AutoModelForCausalLM
 # =================================================
 # MODEL LOADING (ONCE AT STARTUP)
 # =================================================
-# Better embedding model for Q&A
 embedding_model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
-# Lightweight open-source LLM (CPU friendly)
-LLM_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-tokenizer = AutoTokenizer.from_pretrained(LLM_NAME)
-llm = AutoModelForCausalLM.from_pretrained(
-    LLM_NAME,
-    torch_dtype=torch.float32
 )
-llm.eval()
 # =================================================
 # PDF PROCESSING
@@ -41,13 +36,13 @@ def extract_text_from_pdf(pdf_path):
 def clean_text(text):
-    # remove extra spaces
     text = re.sub(r"\s+", " ", text)
-    # remove table of contents noise
     text = re.sub(r"Table of contents.*?Introduction", "", text, flags=re.I)
-    # remove page numbers
     text = re.sub(r"\bPage \d+\b", "", text)
     return text.strip()
@@ -55,7 +50,7 @@ def clean_text(text):
 def chunk_text(text, chunk_size=350, overlap=80):
     """
-    Smaller overlapping chunks improve semantic retrieval accuracy
     """
     chunks = []
     start = 0
@@ -85,7 +80,7 @@ def build_faiss_index(chunks):
 def retrieve_relevant_chunks(query, index, chunks, top_k=5):
     """
-    Retrieve top-K chunks + re-rank by distance
     """
     query_embedding = embedding_model.encode([query]).astype("float32")
     distances, indices = index.search(query_embedding, top_k)
@@ -94,56 +89,41 @@ def retrieve_relevant_chunks(query, index, chunks, top_k=5):
     for rank, idx in enumerate(indices[0]):
         results.append((chunks[idx], distances[0][rank]))
-    # re-rank: smaller distance = more relevant
     results.sort(key=lambda x: x[1])
     return [r[0] for r in results]
 # =================================================
-# ANSWER GENERATION (LLM)
 # =================================================
 def generate_answer(question, context_chunks):
-    context = "\n\n".join(context_chunks)
-    prompt = f"""
-You are a precise academic assistant.
-RULES:
-- Answer ONLY from the given context.
-- Do NOT add external knowledge.
-- Be concise and factual.
-- If the answer is missing, reply exactly:
-  "Information not found in the document."
-CONTEXT:
-{context}
-QUESTION:
-{question}
-FINAL ANSWER:
-"""
-    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
-    with torch.no_grad():
-        output = llm.generate(
-            **inputs,
-            max_new_tokens=180,
-            temperature=0.1
-        )
-    decoded = tokenizer.decode(output[0], skip_special_tokens=True)
-    return decoded.split("FINAL ANSWER:")[-1].strip()
 # =================================================
-# MAIN RAG PIPELINE
 # =================================================
-def pdf_rag_chat(pdf_file, question):
     if pdf_file is None or question.strip() == "":
         return "Please upload a PDF and enter a valid question."
@@ -154,13 +134,13 @@ def pdf_rag_chat(pdf_file, question):
     # 2. Chunking
     chunks = chunk_text(cleaned_text)
-    # 3. Vector DB
     index, chunks = build_faiss_index(chunks)
-    # 4. Retrieval
     relevant_chunks = retrieve_relevant_chunks(question, index, chunks)
-    # 5. Answer generation
     return generate_answer(question, relevant_chunks)
@@ -171,11 +151,13 @@ def pdf_rag_chat(pdf_file, question):
 with gr.Blocks() as demo:
     gr.Markdown("""
-    # 📄 PDF RAG Chatbot (Open-Source AI)
-    Upload a **PDF document** and ask questions based **only on its content**.
-    This system implements an **accuracy-optimized Retrieval Augmented Generation (RAG)** pipeline
-    using **open-source Hugging Face models**, running on **free CPU**.
     """)
     with gr.Row():
@@ -200,7 +182,7 @@ with gr.Blocks() as demo:
             )
     submit_btn.click(
-        fn=pdf_rag_chat,
         inputs=[pdf_input, question_input],
         outputs=answer_output
     )

 import fitz  # PyMuPDF
 import re
 import faiss
 import numpy as np
 from sentence_transformers import SentenceTransformer
+from transformers import pipeline
 # =================================================
 # MODEL LOADING (ONCE AT STARTUP)
 # =================================================
+# Embedding model (good for question-answer retrieval)
 embedding_model = SentenceTransformer("multi-qa-MiniLM-L6-cos-v1")
+# Extractive Question Answering model (HIGH ACCURACY)
+qa_pipeline = pipeline(
+    "question-answering",
+    model="deepset/roberta-base-squad2",
+    tokenizer="deepset/roberta-base-squad2"
 )
 # =================================================
 # PDF PROCESSING
 def clean_text(text):
+    # Remove extra spaces
     text = re.sub(r"\s+", " ", text)
+    # Remove table of contents noise
     text = re.sub(r"Table of contents.*?Introduction", "", text, flags=re.I)
+    # Remove page numbers
     text = re.sub(r"\bPage \d+\b", "", text)
     return text.strip()
 def chunk_text(text, chunk_size=350, overlap=80):
     """
+    Smaller overlapping chunks improve accuracy
     """
     chunks = []
     start = 0
 def retrieve_relevant_chunks(query, index, chunks, top_k=5):
     """
+    Retrieve top-K chunks and re-rank by distance
     """
     query_embedding = embedding_model.encode([query]).astype("float32")
     distances, indices = index.search(query_embedding, top_k)
     for rank, idx in enumerate(indices[0]):
         results.append((chunks[idx], distances[0][rank]))
+    # Re-rank (lower distance = more relevant)
     results.sort(key=lambda x: x[1])
     return [r[0] for r in results]
 # =================================================
+# ANSWER GENERATION (EXTRACTIVE QA – ACCURATE)
 # =================================================
 def generate_answer(question, context_chunks):
+    best_answer = ""
+    best_score = 0.0
+    for chunk in context_chunks:
+        result = qa_pipeline(
+            question=question,
+            context=chunk
+        )
+        if result["score"] > best_score:
+            best_score = result["score"]
+            best_answer = result["answer"]
+    if best_score < 0.25 or best_answer.strip() == "":
+        return "Information not found in the document."
+    return best_answer
 # =================================================
+# MAIN PIPELINE
 # =================================================
+def pdf_qa_chat(pdf_file, question):
     if pdf_file is None or question.strip() == "":
         return "Please upload a PDF and enter a valid question."
     # 2. Chunking
     chunks = chunk_text(cleaned_text)
+    # 3. Vector database
     index, chunks = build_faiss_index(chunks)
+    # 4. Retrieve relevant chunks
     relevant_chunks = retrieve_relevant_chunks(question, index, chunks)
+    # 5. Extractive QA
     return generate_answer(question, relevant_chunks)
 with gr.Blocks() as demo:
     gr.Markdown("""
+    # 📄 PDF Question Answering System (Accurate AI)
+    Upload a **PDF document** and ask questions.
+    The system uses **semantic retrieval + extractive AI**, ensuring
+    **accurate answers strictly from the document text**.
+    ---
     """)
     with gr.Row():
             )
     submit_btn.click(
+        fn=pdf_qa_chat,
         inputs=[pdf_input, question_input],
         outputs=answer_output
     )