Spaces:
Sleeping
Sleeping
| import torch | |
| import gradio as gr | |
| import os | |
| from PyPDF2 import PdfReader | |
| from docx import Document | |
| import io | |
| # Use a pipeline as a high-level helper | |
| from transformers import pipeline | |
| question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
| def extract_file_content(file_obj): | |
| filename = file_obj.name | |
| file_ext = filename.split('.')[-1].lower() | |
| try: | |
| if file_ext == "pdf": | |
| reader = PdfReader(file_obj) | |
| text = "" | |
| for page in reader.pages: | |
| text += page.extract_text() + "\n" | |
| return text.strip() | |
| elif file_ext == "txt": | |
| return file_obj.read().decode('utf-8') | |
| elif file_ext == "docx": | |
| doc = Document(io.BytesIO(file_obj.read())) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text.strip() | |
| else: | |
| return "Unsupported file format. Please upload PDF, TXT, or DOCX." | |
| except Exception as e: | |
| return f"Error reading file: {str(e)}" | |
| def get_ans(file,question): | |
| context = extract_file_content(file) | |
| answer = question_answer(question=question ,context=context) | |
| return answer["answer"] | |
| demo = gr.Interface( | |
| fn=get_ans, | |
| inputs=[gr.File(label="Upload Your File"), gr.Textbox(label="Enter question",lines=4)], | |
| outputs=[gr.Textbox(label="Answer", lines=5)], | |
| title="@RosangenAi Project 3: Document question answer") | |
| demo.launch() | |