Spaces:

Nikhil2411
/

textsumerizar1234

Sleeping

App Files Files Community

textsumerizar1234 / interface.py

Nikhil2411

Upload 3 files

16d1baf verified over 1 year ago

raw

history blame contribute delete

4.61 kB

	import gradio as gr
	import nltk
	import numpy as np
	import networkx as nx
	from sklearn.feature_extraction.text import TfidfVectorizer
	from sklearn.metrics.pairwise import cosine_similarity
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.corpus import stopwords
	import string
	from transformers import PegasusForConditionalGeneration, PegasusTokenizer

	# Download required NLTK data files
	nltk.download('punkt')
	nltk.download('stopwords')

	# Load pre-trained Pegasus model and tokenizer
	model_name = "pegasus-fine_tuned_model" # Example Pegasus model
	tokenizer = PegasusTokenizer.from_pretrained(model_name)
	model = PegasusForConditionalGeneration.from_pretrained(model_name)

	def preprocess_text(text):
	# Tokenize text into sentences
	sentences = sent_tokenize(text)
	# Remove stopwords and punctuation, and convert to lowercase
	stop_words = set(stopwords.words('english'))
	preprocessed_sentences = []
	for sentence in sentences:
	words = word_tokenize(sentence.lower())
	filtered_words = [word for word in words if word not in stop_words and word not in string.punctuation]
	preprocessed_sentences.append(' '.join(filtered_words))
	return sentences, preprocessed_sentences

	def build_similarity_matrix(sentences):
	tfidf_vectorizer = TfidfVectorizer()
	tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
	similarity_matrix = cosine_similarity(tfidf_matrix)
	return similarity_matrix

	def textrank_summary(text, num_sentences=3):
	original_sentences, preprocessed_sentences = preprocess_text(text)
	similarity_matrix = build_similarity_matrix(preprocessed_sentences)
	similarity_graph = nx.from_numpy_array(similarity_matrix)
	scores = nx.pagerank(similarity_graph)
	ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(original_sentences)), reverse=True)
	summary = ' '.join([sentence for score, sentence in ranked_sentences[:num_sentences]])
	return summary

	def tfidf_summary(text, num_sentences=3):
	original_sentences, preprocessed_sentences = preprocess_text(text)
	tfidf_vectorizer = TfidfVectorizer()
	tfidf_matrix = tfidf_vectorizer.fit_transform(preprocessed_sentences)
	sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
	ranked_sentences = [original_sentences[i] for i in np.argsort(sentence_scores, axis=0)[::-1]]
	summary = ' '.join(ranked_sentences[:num_sentences])
	return summary

	def pegasus_summary(text):
	inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
	summary_ids = model.generate(
	inputs["input_ids"],
	max_length=250,
	min_length=30, # Adjust max_length as needed
	num_beams=5,
	early_stopping=True
	)
	summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
	return summary

	def summarize_text(text, method):
	if method == "TF-IDF":
	return tfidf_summary(text)
	elif method == "TextRank":
	return textrank_summary(text)
	elif method == "Abstractive":
	return pegasus_summary(text)

	# Custom CSS for styling
	custom_css = """
	.gr-box {
	border-radius: 10px;
	padding: 20px;
	box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
	margin: 20px 0;
	background-color: #fff;
	}

	.gr-input, .gr-output {
	border: 1px solid #ccc;
	border-radius: 5px;
	padding: 10px;
	font-size: 16px;
	}

	.gr-button {
	background-color: #007bff;
	color: white;
	padding: 10px 20px;
	border: none;
	border-radius: 5px;
	font-size: 16px;
	cursor: pointer;
	transition: background-color 0.3s;
	}

	.gr-button:hover {
	background-color: #0056b3;
	}
	"""

	# Create a visually appealing Gradio interface
	interface = gr.Interface(
	fn=summarize_text,
	inputs=[
	gr.Textbox(
	lines=30,
	placeholder="Paste your text here...",
	label="Input Text",
	elem_classes="gr-input" # Apply custom CSS class
	),
	gr.Radio(
	choices=["TF-IDF", "TextRank", "Abstractive"],
	label="Summarization Method",
	value="Abstractive"
	)
	],
	outputs=gr.Textbox(
	lines=30,
	label="Concise Summary",
	elem_classes="gr-output" # Apply custom CSS class
	),
	title="Pegasus Text Summarizer",
	description="Get a clear and concise summary of your text in seconds!",
	theme="default", # Use a built-in theme
	css=custom_css # Add custom CSS
	)

	# Launch the interface
	interface.launch(
	share=True,
	debug=True # Enable debug mode for error handling (optional)
	)