pdf_reader / ans_generator1.py
deepkansara-123's picture
Upload 6 files
9ddeec6 verified
from transformers import pipeline
import PyPDF2
import os
UPLOAD_DIR = "uploaded_pdfs"
class AnswerGenerator:
def __init__(self):
# βœ… Default FLAN-T5 model for question answering
self.qa_pipeline = pipeline("question-answering", model="google/flan-t5-base")
#---------------------------------------------------------------
# updated the modal
#self.qa_pipeline = pipeline(
# "question-answering",
# model="tiiuae/falcon-7b-instruct", # <-- Updated model here
# tokenizer="tiiuae/falcon-7b-instruct" # <-- Explicitly specifying tokenizer)
#-----------------------------------------------------------------
def extract_pdf_text(self, token):
pdf_path = os.path.join(UPLOAD_DIR, f"{token}.pdf")
if not os.path.exists(pdf_path):
raise FileNotFoundError("PDF not found for given token")
with open(pdf_path, "rb") as f:
reader = PyPDF2.PdfReader(f)
return [page.extract_text() or "" for page in reader.pages] # List of page texts
def generate_answers(self, token, questions):
pages = self.extract_pdf_text(token)
full_text = "\n".join(pages) # Merge pages as context
results = []
for question in questions:
try:
# βœ… Default FLAN-T5 logic
result = self.qa_pipeline(question=question, context=full_text)
results.append({"question": question, "answer": result["answer"]})
except Exception as e:
results.append({"question": question, "answer": "Error", "error": str(e)})
return results