Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from pypdf import PdfReader | |
| from sentence_transformers import SentenceTransformer | |
| import faiss | |
| import numpy as np | |
| from transformers import pipeline | |
| # Load embedding model | |
| embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
| # Load QA model | |
| qa_model = pipeline("text-generation", model="gpt2") | |
| # Temporary in-memory storage | |
| documents = [] | |
| vectors = None | |
| index = None | |
| def read_pdfs(pdf_files): | |
| global documents, vectors, index | |
| all_text = "" | |
| documents = [] | |
| for pdf in pdf_files: | |
| reader = PdfReader(pdf.name) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| documents.append(text) | |
| all_text += text + "\n" | |
| # Split text into chunks | |
| chunks = all_text.split("\n") | |
| # Embed chunks | |
| embeddings = embed_model.encode(chunks) | |
| vectors = np.array(embeddings).astype("float32") | |
| # Create FAISS Index | |
| index = faiss.IndexFlatL2(vectors.shape[1]) | |
| index.add(vectors) | |
| return "Documents uploaded and processed. You may now ask questions." | |
| def ask_question(query): | |
| global vectors, index, documents | |
| if index is None: | |
| return "Please upload PDF documents first." | |
| # Embed query | |
| q_embed = embed_model.encode([query]).astype("float32") | |
| # Search similar chunks | |
| D, I = index.search(q_embed, k=3) | |
| # Collect top matches | |
| context = "" | |
| for idx in I[0]: | |
| context += documents[0][idx: idx + 500] + "\n" | |
| # Generate answer | |
| prompt = f"Context: {context}\nQuestion: {query}\nAnswer:" | |
| answer = qa_model(prompt, max_length=120)[0]["generated_text"] | |
| return answer | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## PDF Chatbot") | |
| pdf_input = gr.File(label="Upload multiple PDFs", file_count="multiple") | |
| upload_btn = gr.Button("Process Documents") | |
| status = gr.Textbox(label="Status") | |
| question = gr.Textbox(label="Ask a question") | |
| answer = gr.Textbox(label="Answer") | |
| upload_btn.click(read_pdfs, inputs=pdf_input, outputs=status) | |
| question.submit(ask_question, inputs=question, outputs=answer) | |
| demo.launch() | |