textsumerizar1234 / interface.py
Nikhil2411's picture
Upload 3 files
16d1baf verified
import gradio as gr
import nltk
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import string
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
# Download required NLTK data files
nltk.download('punkt')
nltk.download('stopwords')
# Load pre-trained Pegasus model and tokenizer
model_name = "pegasus-fine_tuned_model" # Example Pegasus model
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)
def preprocess_text(text):
# Tokenize text into sentences
sentences = sent_tokenize(text)
# Remove stopwords and punctuation, and convert to lowercase
stop_words = set(stopwords.words('english'))
preprocessed_sentences = []
for sentence in sentences:
words = word_tokenize(sentence.lower())
filtered_words = [word for word in words if word not in stop_words and word not in string.punctuation]
preprocessed_sentences.append(' '.join(filtered_words))
return sentences, preprocessed_sentences
def build_similarity_matrix(sentences):
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
similarity_matrix = cosine_similarity(tfidf_matrix)
return similarity_matrix
def textrank_summary(text, num_sentences=3):
original_sentences, preprocessed_sentences = preprocess_text(text)
similarity_matrix = build_similarity_matrix(preprocessed_sentences)
similarity_graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(similarity_graph)
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(original_sentences)), reverse=True)
summary = ' '.join([sentence for score, sentence in ranked_sentences[:num_sentences]])
return summary
def tfidf_summary(text, num_sentences=3):
original_sentences, preprocessed_sentences = preprocess_text(text)
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(preprocessed_sentences)
sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
ranked_sentences = [original_sentences[i] for i in np.argsort(sentence_scores, axis=0)[::-1]]
summary = ' '.join(ranked_sentences[:num_sentences])
return summary
def pegasus_summary(text):
inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
summary_ids = model.generate(
inputs["input_ids"],
max_length=250,
min_length=30, # Adjust max_length as needed
num_beams=5,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary
def summarize_text(text, method):
if method == "TF-IDF":
return tfidf_summary(text)
elif method == "TextRank":
return textrank_summary(text)
elif method == "Abstractive":
return pegasus_summary(text)
# Custom CSS for styling
custom_css = """
.gr-box {
border-radius: 10px;
padding: 20px;
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
margin: 20px 0;
background-color: #fff;
}
.gr-input, .gr-output {
border: 1px solid #ccc;
border-radius: 5px;
padding: 10px;
font-size: 16px;
}
.gr-button {
background-color: #007bff;
color: white;
padding: 10px 20px;
border: none;
border-radius: 5px;
font-size: 16px;
cursor: pointer;
transition: background-color 0.3s;
}
.gr-button:hover {
background-color: #0056b3;
}
"""
# Create a visually appealing Gradio interface
interface = gr.Interface(
fn=summarize_text,
inputs=[
gr.Textbox(
lines=30,
placeholder="Paste your text here...",
label="Input Text",
elem_classes="gr-input" # Apply custom CSS class
),
gr.Radio(
choices=["TF-IDF", "TextRank", "Abstractive"],
label="Summarization Method",
value="Abstractive"
)
],
outputs=gr.Textbox(
lines=30,
label="Concise Summary",
elem_classes="gr-output" # Apply custom CSS class
),
title="Pegasus Text Summarizer",
description="Get a clear and concise summary of your text in seconds!",
theme="default", # Use a built-in theme
css=custom_css # Add custom CSS
)
# Launch the interface
interface.launch(
share=True,
debug=True # Enable debug mode for error handling (optional)
)