Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import PyPDF2 | |
| import os | |
| UPLOAD_DIR = "uploaded_pdfs" | |
| class AnswerGenerator: | |
| def __init__(self): | |
| # β Default FLAN-T5 model for question answering | |
| self.qa_pipeline = pipeline("question-answering", model="google/flan-t5-base") | |
| #--------------------------------------------------------------- | |
| # updated the modal | |
| #self.qa_pipeline = pipeline( | |
| # "question-answering", | |
| # model="tiiuae/falcon-7b-instruct", # <-- Updated model here | |
| # tokenizer="tiiuae/falcon-7b-instruct" # <-- Explicitly specifying tokenizer) | |
| #----------------------------------------------------------------- | |
| def extract_pdf_text(self, token): | |
| pdf_path = os.path.join(UPLOAD_DIR, f"{token}.pdf") | |
| if not os.path.exists(pdf_path): | |
| raise FileNotFoundError("PDF not found for given token") | |
| with open(pdf_path, "rb") as f: | |
| reader = PyPDF2.PdfReader(f) | |
| return [page.extract_text() or "" for page in reader.pages] # List of page texts | |
| def generate_answers(self, token, questions): | |
| pages = self.extract_pdf_text(token) | |
| full_text = "\n".join(pages) # Merge pages as context | |
| results = [] | |
| for question in questions: | |
| try: | |
| # β Default FLAN-T5 logic | |
| result = self.qa_pipeline(question=question, context=full_text) | |
| results.append({"question": question, "answer": result["answer"]}) | |
| except Exception as e: | |
| results.append({"question": question, "answer": "Error", "error": str(e)}) | |
| return results | |