|
|
import PyPDF2
|
|
|
from sentence_transformers import SentenceTransformer
|
|
|
import faiss
|
|
|
import numpy as np
|
|
|
|
|
|
|
|
|
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
|
|
|
|
|
def process_pdf(pdf_file):
|
|
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
|
|
document_text = ""
|
|
|
for page in pdf_reader.pages:
|
|
|
document_text += page.extract_text()
|
|
|
sentences = document_text.split('. ')
|
|
|
embeddings = embedding_model.encode(sentences)
|
|
|
faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
|
|
|
faiss_index.add(embeddings)
|
|
|
return sentences, embeddings, faiss_index
|
|
|
|
|
|
|
|
|
def get_relevant_context(query, faiss_index, sentences, k=3):
|
|
|
query_vector = embedding_model.encode([query])
|
|
|
_, I = faiss_index.search(query_vector, k)
|
|
|
relevant_sentences = [sentences[i] for i in I[0]]
|
|
|
return ". ".join(relevant_sentences)
|
|
|
|
|
|
from transformers import pipeline
|
|
|
|
|
|
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad")
|
|
|
|
|
|
def answer_question(query, faiss_index, sentences):
|
|
|
if not sentences:
|
|
|
return "Please upload a document first.", ""
|
|
|
relevant_context = get_relevant_context(query, faiss_index, sentences)
|
|
|
answer = qa_model(question=query, context=relevant_context)
|
|
|
return answer['answer'], relevant_context
|
|
|
|
|
|
import gradio as gr
|
|
|
|
|
|
def process_and_answer(pdf_file, query):
|
|
|
sentences, embeddings, faiss_index = process_pdf(pdf_file)
|
|
|
answer, context = answer_question(query, faiss_index, sentences)
|
|
|
return answer, context
|
|
|
|
|
|
with gr.Blocks() as demo:
|
|
|
gr.Markdown("# Interactive QA Bot")
|
|
|
pdf_input = gr.File(label="Upload PDF")
|
|
|
query_input = gr.Textbox(label="Ask a question about the document")
|
|
|
answer_output = gr.Textbox(label="Answer")
|
|
|
context_output = gr.Textbox(label="Relevant Context")
|
|
|
submit_button = gr.Button("Submit")
|
|
|
|
|
|
submit_button.click(process_and_answer, inputs=[pdf_input, query_input], outputs=[answer_output, context_output])
|
|
|
|
|
|
demo.launch()
|
|
|
|