import gradio as gr import fitz import re import faiss import torch import numpy as np from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForCausalLM # =============================== # MODEL LOADING # =============================== embedding_model = SentenceTransformer("all-MiniLM-L6-v2") LLM_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" tokenizer = AutoTokenizer.from_pretrained(LLM_NAME) llm = AutoModelForCausalLM.from_pretrained( LLM_NAME, torch_dtype=torch.float32 ) llm.eval() # =============================== # PDF PROCESSING # =============================== def extract_text_from_pdf(pdf_path): doc = fitz.open(pdf_path) text = "" for page in doc: text += page.get_text() return text def clean_text(text): return re.sub(r"\s+", " ", text).strip() def chunk_text(text, chunk_size=500, overlap=50): chunks = [] start = 0 while start < len(text): end = start + chunk_size chunks.append(text[start:end]) start = end - overlap return chunks # =============================== # VECTOR DB (FAISS) # =============================== def build_faiss_index(chunks): embeddings = embedding_model.encode(chunks) embeddings = np.array(embeddings).astype("float32") index = faiss.IndexFlatL2(embeddings.shape[1]) index.add(embeddings) return index, chunks def retrieve_relevant_chunks(query, index, chunks, top_k=3): query_embedding = embedding_model.encode([query]).astype("float32") _, indices = index.search(query_embedding, top_k) return [chunks[i] for i in indices[0]] # =============================== # LLM ANSWER # =============================== def generate_answer(question, context_chunks): context = "\n\n".join(context_chunks) prompt = f""" Answer the question strictly using the given context. If the answer is not found, say: "Information not found in the document." Context: {context} Question: {question} Answer: """ inputs = tokenizer(prompt, return_tensors="pt", truncation=True) with torch.no_grad(): output = llm.generate( **inputs, max_new_tokens=200, temperature=0.2 ) decoded = tokenizer.decode(output[0], skip_special_tokens=True) return decoded.split("Answer:")[-1].strip() # =============================== # MAIN PIPELINE # =============================== def pdf_rag_chat(pdf_file, question): if pdf_file is None or question.strip() == "": return "Please upload a PDF and enter a question." text = extract_text_from_pdf(pdf_file.name) text = clean_text(text) chunks = chunk_text(text) index, chunks = build_faiss_index(chunks) context = retrieve_relevant_chunks(question, index, chunks) return generate_answer(question, context) # =============================== # GRADIO UI (GRADIO 6 SAFE) # =============================== with gr.Blocks() as demo: gr.Markdown(""" # 📄 PDF RAG Chatbot (Open-Source AI) Upload a **PDF** and ask questions based **only on its content**. Built using **Retrieval Augmented Generation (RAG)** and **open-source Hugging Face models**, running on **free CPU**. """) with gr.Row(): with gr.Column(scale=1): pdf_input = gr.File( label="📤 Upload PDF", file_types=[".pdf"] ) question_input = gr.Textbox( label="❓ Ask a question", placeholder="e.g. What is the objective of the project?", lines=2 ) submit_btn = gr.Button("🔍 Get Answer") with gr.Column(scale=2): answer_output = gr.Textbox( label="📌 Answer", lines=10 ) submit_btn.click( fn=pdf_rag_chat, inputs=[pdf_input, question_input], outputs=answer_output ) gr.Markdown(""" --- **© Simranpreet Kaur** **NIELIT Ropar | AIML Six Months Training | 2026** """) demo.launch()