Spaces:

SparshSG
/

AI-text-summarizer

Build error

App Files Files Community

AI-text-summarizer / app.py

SparshSG

Upload app.py

2a0c297 verified 11 months ago

raw

history blame contribute delete

3.32 kB

	import gradio as gr
	from transformers import pipeline
	from newspaper import Article
	import fitz # PyMuPDF
	from summarizer import Summarizer

	# --------- UTILITY FUNCTIONS ---------

	def extract_text_from_pdf(pdf_file):
	doc = fitz.open(stream=pdf_file, filetype="pdf")
	text = ""
	for page in doc:
	text += page.get_text()
	return text

	def extract_text_from_url(url):
	article = Article(url)
	article.download()
	article.parse()
	return article.text

	abstractive_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	extractive_summarizer = Summarizer()

	def generate_abstractive_summary(text, max_length=130, min_length=30):
	summary = abstractive_summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
	return summary[0]['summary_text']

	def generate_extractive_summary(text, ratio=0.3):
	return extractive_summarizer(text, ratio=ratio)

	def summarize_text(source_type, text, pdf, url, max_length, min_length, ratio):
	input_text = ""

	try:
	if source_type == "Text" and text:
	input_text = text
	elif source_type == "PDF" and pdf is not None:
	input_text = extract_text_from_pdf(pdf)
	elif source_type == "URL" and url:
	input_text = extract_text_from_url(url)
	else:
	return "❗Please provide a valid input.", ""

	if len(input_text.strip()) == 0:
	return "❗Input is empty after extraction.", ""

	# Bart/T5 models handle ~1024 tokens (~2000 characters)
	input_text = input_text[:2000]

	abstractive = generate_abstractive_summary(input_text, max_length, min_length)
	extractive = generate_extractive_summary(input_text, ratio)

	return abstractive, extractive

	except Exception as e:
	return f"⚠️ Error: {str(e)}", ""

	# --------- GRADIO UI ---------

	with gr.Blocks() as demo:
	gr.Markdown("## 🧠 AI Text Summarizer\nChoose input type and get both abstractive and extractive summaries.")

	source_type = gr.Radio(["Text", "PDF", "URL"], label="Select Input Source")

	text_input = gr.Textbox(lines=8, label="Enter Text", visible=False)
	pdf_input = gr.File(label="Upload PDF", type="binary", visible=False)
	url_input = gr.Textbox(label="Enter URL", visible=False)

	max_length = gr.Slider(50, 300, step=10, value=130, label="Max Length (Abstractive)")
	min_length = gr.Slider(20, 100, step=10, value=30, label="Min Length (Abstractive)")
	ratio = gr.Slider(0.1, 1.0, step=0.1, value=0.3, label="Summary Ratio (Extractive)")

	btn = gr.Button("Generate Summaries")

	output_ab = gr.Textbox(label="Abstractive Summary")
	output_ex = gr.Textbox(label="Extractive Summary")

	def toggle_inputs(src):
	return {
	text_input: gr.update(visible=(src == "Text")),
	pdf_input: gr.update(visible=(src == "PDF")),
	url_input: gr.update(visible=(src == "URL"))
	}

	source_type.change(fn=toggle_inputs, inputs=source_type, outputs=[text_input, pdf_input, url_input])

	btn.click(
	summarize_text,
	inputs=[source_type, text_input, pdf_input, url_input, max_length, min_length, ratio],
	outputs=[output_ab, output_ex]
	)

	if __name__ == "__main__":
	demo.launch()