CircuitsTutor / app.py
SyedWaqad's picture
Create app.py
03b2846 verified
import gradio as gr
from pypdf import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from transformers import pipeline
# Load embedding model
embed_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# Load QA model
qa_model = pipeline("text-generation", model="gpt2")
# Temporary in-memory storage
documents = []
vectors = None
index = None
def read_pdfs(pdf_files):
global documents, vectors, index
all_text = ""
documents = []
for pdf in pdf_files:
reader = PdfReader(pdf.name)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
documents.append(text)
all_text += text + "\n"
# Split text into chunks
chunks = all_text.split("\n")
# Embed chunks
embeddings = embed_model.encode(chunks)
vectors = np.array(embeddings).astype("float32")
# Create FAISS Index
index = faiss.IndexFlatL2(vectors.shape[1])
index.add(vectors)
return "Documents uploaded and processed. You may now ask questions."
def ask_question(query):
global vectors, index, documents
if index is None:
return "Please upload PDF documents first."
# Embed query
q_embed = embed_model.encode([query]).astype("float32")
# Search similar chunks
D, I = index.search(q_embed, k=3)
# Collect top matches
context = ""
for idx in I[0]:
context += documents[0][idx: idx + 500] + "\n"
# Generate answer
prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
answer = qa_model(prompt, max_length=120)[0]["generated_text"]
return answer
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## PDF Chatbot")
pdf_input = gr.File(label="Upload multiple PDFs", file_count="multiple")
upload_btn = gr.Button("Process Documents")
status = gr.Textbox(label="Status")
question = gr.Textbox(label="Ask a question")
answer = gr.Textbox(label="Answer")
upload_btn.click(read_pdfs, inputs=pdf_input, outputs=status)
question.submit(ask_question, inputs=question, outputs=answer)
demo.launch()