Spaces:

Rosan144
/

Document_Question_Answer

Sleeping

Create app.py

f23cc4f verified 8 months ago

1.51 kB

	import torch
	import gradio as gr
	import os
	from PyPDF2 import PdfReader
	from docx import Document
	import io


	# Use a pipeline as a high-level helper
	from transformers import pipeline

	question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2")



	def extract_file_content(file_obj):
	filename = file_obj.name
	file_ext = filename.split('.')[-1].lower()

	try:
	if file_ext == "pdf":
	reader = PdfReader(file_obj)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text.strip()

	elif file_ext == "txt":
	return file_obj.read().decode('utf-8')

	elif file_ext == "docx":
	doc = Document(io.BytesIO(file_obj.read()))
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text.strip()

	else:
	return "Unsupported file format. Please upload PDF, TXT, or DOCX."

	except Exception as e:
	return f"Error reading file: {str(e)}"

	def get_ans(file,question):
	context = extract_file_content(file)
	answer = question_answer(question=question ,context=context)
	return answer["answer"]


	demo = gr.Interface(
	fn=get_ans,
	inputs=[gr.File(label="Upload Your File"), gr.Textbox(label="Enter question",lines=4)],
	outputs=[gr.Textbox(label="Answer", lines=5)],
	title="@RosangenAi Project 3: Document question answer")

	demo.launch()