import torch import gradio as gr from transformers import pipeline from PyPDF2 import PdfReader # Alternative for PDF handling from docx import Document # For handling .docx files model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots" "/cbf50ba81465d4d8676b8bab348e31835147541b") question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2") def read_file_content(file_obj): try: # Determine the file extension file_extension = file_obj.name.split('.')[-1].lower() if file_extension == 'txt': # Reading text files with open(file_obj.name, 'r', encoding='utf-8') as file: context = file.read() elif file_extension == 'pdf': # Reading PDF files using PyPDF2 reader = PdfReader(file_obj.name) context = "" for page in reader.pages: context += page.extract_text() elif file_extension == 'docx': # Reading Word documents using python-docx doc = Document(file_obj.name) context = "\n".join([para.text for para in doc.paragraphs]) else: return "Unsupported file format. Please upload a .txt, .pdf, or .docx file." return context except Exception as e: return f"An error occurred: {e}" def get_answer(file, question): context = read_file_content(file) if "An error occurred" in context or "Unsupported" in context: return context # Return error message directly if present answer = question_answer(question=question, context=context) return answer["answer"] demo = gr.Interface( fn=get_answer, inputs=[ gr.File(label="Upload your file"), gr.Textbox(label="Input your question", lines=1) ], outputs=[gr.Textbox(label="Answer text", lines=1)], title="Explore Documents", description="THIS APPLICATION WILL BE USED TO ANSWER QUESTIONS BASED ON CONTEXT PROVIDED." ) demo.launch()