Spaces:

simran40
/

RAG-CHATBOT

Sleeping

App Files Files Community

simran40 commited on 24 days ago

Commit

ead3409

verified ·

1 Parent(s): 4e13ba0

Update app.py

Browse files

Files changed (1) hide show

app.py +147 -41

app.py CHANGED Viewed

@@ -1,59 +1,95 @@
 import gradio as gr
-import fitz
 import re
 import faiss
 import torch
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM
-# -------- Load Models --------
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-llm_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
-tokenizer = AutoTokenizer.from_pretrained(llm_name)
 llm = AutoModelForCausalLM.from_pretrained(
-    llm_name,
     torch_dtype=torch.float32
 )
-# -------- Helper Functions --------
-def extract_text(pdf_file):
-    doc = fitz.open(pdf_file)
     text = ""
     for page in doc:
         text += page.get_text()
     return text
 def clean_text(text):
-    return re.sub(r"\s+", " ", text)
 def chunk_text(text, chunk_size=500, overlap=50):
     chunks = []
     start = 0
-    while start < len(text):
         end = start + chunk_size
         chunks.append(text[start:end])
         start = end - overlap
     return chunks
-def build_vector_db(chunks):
     embeddings = embedding_model.encode(chunks)
     embeddings = np.array(embeddings).astype("float32")
     index = faiss.IndexFlatL2(embeddings.shape[1])
     index.add(embeddings)
     return index, chunks
-def retrieve_context(query, index, chunks, k=3):
-    q_emb = embedding_model.encode([query]).astype("float32")
-    _, indices = index.search(q_emb, k)
     return [chunks[i] for i in indices[0]]
 def generate_answer(question, context_chunks):
     context = "\n\n".join(context_chunks)
     prompt = f"""
-Answer the question using ONLY the context below.
-If not found, say "Information not found in the document."
 Context:
 {context}
@@ -63,31 +99,101 @@ Question:
 Answer:
 """
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
     with torch.no_grad():
-        output = llm.generate(**inputs, max_new_tokens=200)
-    response = tokenizer.decode(output[0], skip_special_tokens=True)
-    return response.split("Answer:")[-1].strip()
-# -------- Main Pipeline --------
-def pdf_chat(pdf, question):
-    text = extract_text(pdf.name)
-    text = clean_text(text)
-    chunks = chunk_text(text)
-    index, chunks = build_vector_db(chunks)
-    context = retrieve_context(question, index, chunks)
-    return generate_answer(question, context)
-# -------- Gradio UI --------
-interface = gr.Interface(
-    fn=pdf_chat,
-    inputs=[
-        gr.File(label="Upload PDF"),
-        gr.Textbox(label="Ask a question")
-    ],
-    outputs=gr.Textbox(label="Answer"),
-    title="📄 PDF RAG Chatbot (Open-Source AI)",
-    description="Upload a PDF and ask questions. Runs on free CPU using Hugging Face open-source models."
-)
-interface.launch()

 import gradio as gr
+import fitz  # PyMuPDF
 import re
 import faiss
 import torch
 import numpy as np
 from sentence_transformers import SentenceTransformer
 from transformers import AutoTokenizer, AutoModelForCausalLM
+# ===============================
+# MODEL LOADING (ONCE)
+# ===============================
+# Embedding model (lightweight & fast)
 embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+# Open-source LLM (CPU friendly)
+LLM_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+tokenizer = AutoTokenizer.from_pretrained(LLM_NAME)
 llm = AutoModelForCausalLM.from_pretrained(
+    LLM_NAME,
     torch_dtype=torch.float32
 )
+llm.eval()
+# ===============================
+# PDF PROCESSING FUNCTIONS
+# ===============================
+def extract_text_from_pdf(pdf_path):
+    doc = fitz.open(pdf_path)
     text = ""
     for page in doc:
         text += page.get_text()
     return text
 def clean_text(text):
+    return re.sub(r"\s+", " ", text).strip()
 def chunk_text(text, chunk_size=500, overlap=50):
     chunks = []
     start = 0
+    text_length = len(text)
+    while start < text_length:
         end = start + chunk_size
         chunks.append(text[start:end])
         start = end - overlap
     return chunks
+# ===============================
+# VECTOR DATABASE (FAISS)
+# ===============================
+def build_faiss_index(chunks):
     embeddings = embedding_model.encode(chunks)
     embeddings = np.array(embeddings).astype("float32")
     index = faiss.IndexFlatL2(embeddings.shape[1])
     index.add(embeddings)
     return index, chunks
+def retrieve_relevant_chunks(query, index, chunks, top_k=3):
+    query_embedding = embedding_model.encode([query]).astype("float32")
+    _, indices = index.search(query_embedding, top_k)
     return [chunks[i] for i in indices[0]]
+# ===============================
+# ANSWER GENERATION (LLM)
+# ===============================
 def generate_answer(question, context_chunks):
     context = "\n\n".join(context_chunks)
     prompt = f"""
+You are an AI assistant.
+Answer the question strictly using the given context.
+If the answer is not found, reply:
+"Information not found in the document."
 Context:
 {context}
 Answer:
 """
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
     with torch.no_grad():
+        output = llm.generate(
+            **inputs,
+            max_new_tokens=200,
+            temperature=0.2
+        )
+    decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
+    return decoded_output.split("Answer:")[-1].strip()
+# ===============================
+# MAIN RAG PIPELINE
+# ===============================
+def pdf_rag_chat(pdf_file, question):
+    if pdf_file is None or question.strip() == "":
+        return "Please upload a PDF and enter a question."
+    # 1. Extract & preprocess text
+    raw_text = extract_text_from_pdf(pdf_file.name)
+    cleaned_text = clean_text(raw_text)
+    # 2. Chunking
+    chunks = chunk_text(cleaned_text)
+    # 3. Vector DB
+    index, chunks = build_faiss_index(chunks)
+    # 4. Retrieval
+    relevant_chunks = retrieve_relevant_chunks(question, index, chunks)
+    # 5. LLM Answer
+    return generate_answer(question, relevant_chunks)
+# ===============================
+# GRADIO UI (PRODUCTION READY)
+# ===============================
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 📄 PDF RAG Chatbot (Open-Source AI)
+    Upload a **PDF document** and ask questions based **only on its content**.
+    This system uses a **Retrieval Augmented Generation (RAG)** architecture with
+    **open-source Hugging Face models**, running entirely on **free CPU**.
+    ---
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            pdf_input = gr.File(
+                label="📤 Upload PDF",
+                file_types=[".pdf"],
+                file_count="single"
+            )
+            question_input = gr.Textbox(
+                label="❓ Ask your question",
+                placeholder="e.g. What is the objective of the project?",
+                lines=2
+            )
+            submit_btn = gr.Button("🔍 Get Answer", variant="primary")
+        with gr.Column(scale=2):
+            answer_output = gr.Textbox(
+                label="📌 Answer",
+                lines=10,
+                show_copy_button=True
+            )
+    submit_btn.click(
+        fn=pdf_rag_chat,
+        inputs=[pdf_input, question_input],
+        outputs=answer_output
+    )
+    gr.Markdown("""
+    ---
+    ### ⚙️ System Information
+    - **LLM:** TinyLlama (Open-Source, Hugging Face)
+    - **Embeddings:** Sentence Transformers
+    - **Vector Store:** FAISS
+    - **Deployment:** Hugging Face Spaces (Free CPU)
+    ---
+    © **Simranpreet Kaur**
+    **NIELIT Ropar | AIML Six Months Training | 2026**
+    """)
+demo.launch()