import gradio as gr from pypdf import PdfReader from sentence_transformers import SentenceTransformer import faiss import numpy as np from transformers import pipeline # Load embedding model embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") # Load QA model qa_model = pipeline("text-generation", model="gpt2") # Temporary in-memory storage documents = [] vectors = None index = None def read_pdfs(pdf_files): global documents, vectors, index all_text = "" documents = [] for pdf in pdf_files: reader = PdfReader(pdf.name) text = "" for page in reader.pages: text += page.extract_text() + "\n" documents.append(text) all_text += text + "\n" # Split text into chunks chunks = all_text.split("\n") # Embed chunks embeddings = embed_model.encode(chunks) vectors = np.array(embeddings).astype("float32") # Create FAISS Index index = faiss.IndexFlatL2(vectors.shape[1]) index.add(vectors) return "Documents uploaded and processed. You may now ask questions." def ask_question(query): global vectors, index, documents if index is None: return "Please upload PDF documents first." # Embed query q_embed = embed_model.encode([query]).astype("float32") # Search similar chunks D, I = index.search(q_embed, k=3) # Collect top matches context = "" for idx in I[0]: context += documents[0][idx: idx + 500] + "\n" # Generate answer prompt = f"Context: {context}\nQuestion: {query}\nAnswer:" answer = qa_model(prompt, max_length=120)[0]["generated_text"] return answer # Gradio UI with gr.Blocks() as demo: gr.Markdown("## PDF Chatbot") pdf_input = gr.File(label="Upload multiple PDFs", file_count="multiple") upload_btn = gr.Button("Process Documents") status = gr.Textbox(label="Status") question = gr.Textbox(label="Ask a question") answer = gr.Textbox(label="Answer") upload_btn.click(read_pdfs, inputs=pdf_input, outputs=status) question.submit(ask_question, inputs=question, outputs=answer) demo.launch()