Spaces:

amritn8
/

try12

Runtime error

App Files Files Community

try12 / app.py

amritn8

Upload 5 files

d3a1193 verified 6 months ago

raw

history blame contribute delete

4.11 kB

	import os
	import torch
	import whisper
	import PyPDF2
	import gradio as gr
	from transformers import BertTokenizerFast, BertForQuestionAnswering, pipeline
	from torch.nn.functional import softmax
	from docx import Document

	device = "cuda" if torch.cuda.is_available() else "cpu"

	qa_model = BertForQuestionAnswering.from_pretrained("deepset/bert-base-cased-squad2").to(device)
	tokenizer = BertTokenizerFast.from_pretrained("deepset/bert-base-cased-squad2")
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	whisper_model = whisper.load_model("base")


	def extract_text(file_obj):
	ext = os.path.splitext(file_obj.name)[1].lower()
	if ext == ".pdf":
	reader = PyPDF2.PdfReader(file_obj)
	return "\n".join([p.extract_text() for p in reader.pages if p.extract_text()])
	elif ext == ".docx":
	doc = Document(file_obj)
	return "\n".join([p.text for p in doc.paragraphs])
	elif ext == ".txt":
	return file_obj.read().decode("utf-8")
	return ""


	def summarize_text(text):
	if len(text) < 50:
	return "Text too short to summarize."
	if len(text) > 1000:
	text = text[:1000]
	summary = summarizer(text, max_length=120, min_length=30, do_sample=False)
	return summary[0]['summary_text']


	def ask_question(question, context):
	inputs = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True, max_length=512).to(device)
	with torch.no_grad():
	outputs = qa_model(**inputs)
	start_idx = torch.argmax(outputs.start_logits)
	end_idx = torch.argmax(outputs.end_logits) + 1
	score = softmax(outputs.start_logits, dim=1)[0][start_idx] * softmax(outputs.end_logits, dim=1)[0][end_idx - 1]
	answer = tokenizer.decode(inputs["input_ids"][0][start_idx:end_idx])
	return f"Answer: {answer.strip()}\nConfidence: {round(score.item()*100, 2)}%"


	def transcribe(audio_path):
	result = whisper_model.transcribe(audio_path)
	return result["text"]


	with gr.Blocks() as demo:
	gr.Markdown("# 🎙️📄 LexPilot: Voice + Document Q&A Assistant")
	gr.Markdown("Upload a document or paste content. Ask questions by typing or using your voice.")

	with gr.Tab("Question Answering"):
	with gr.Row():
	uploaded_file = gr.File(label="Upload .pdf / .docx / .txt", file_types=[".pdf", ".docx", ".txt"])
	pasted_text = gr.Textbox(label="Paste text manually", lines=10)
	with gr.Row():
	typed_question = gr.Textbox(label="Type your question")
	audio_input = gr.Audio(source="microphone",type="filepath", label="Or speak your question")
	qa_btn = gr.Button("Get Answer")
	qa_output = gr.Textbox(label="Answer and Confidence", lines=3)

	def handle_qa(file, text, typed, audio):
	context = ""
	if file:
	context = extract_text(file)
	elif text:
	context = text
	else:
	return "❗ Please upload or paste content."

	if typed:
	question = typed
	elif audio:
	question = transcribe(audio)
	else:
	return "❗ Please speak or type a question."

	return ask_question(question, context)

	qa_btn.click(handle_qa, inputs=[uploaded_file, pasted_text, typed_question, audio_input], outputs=qa_output)

	with gr.Tab("Summarization"):
	with gr.Row():
	sum_file = gr.File(label="Upload .pdf / .docx / .txt", file_types=[".pdf", ".docx", ".txt"])
	sum_text = gr.Textbox(label="Or paste content", lines=10)
	sum_btn = gr.Button("Summarize")
	sum_output = gr.Textbox(label="Summary", lines=4)

	def handle_summary(file, text):
	if file:
	context = extract_text(file)
	elif text:
	context = text
	else:
	return "❗ Please upload or paste content to summarize."
	return summarize_text(context)

	sum_btn.click(handle_summary, inputs=[sum_file, sum_text], outputs=sum_output)

	demo.launch()