Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import nltk | |
| import numpy as np | |
| import networkx as nx | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from nltk.tokenize import sent_tokenize, word_tokenize | |
| from nltk.corpus import stopwords | |
| import string | |
| from transformers import PegasusForConditionalGeneration, PegasusTokenizer | |
| # Download required NLTK data files | |
| nltk.download('punkt') | |
| nltk.download('stopwords') | |
| # Load pre-trained Pegasus model and tokenizer | |
| model_name = "pegasus-fine_tuned_model" # Example Pegasus model | |
| tokenizer = PegasusTokenizer.from_pretrained(model_name) | |
| model = PegasusForConditionalGeneration.from_pretrained(model_name) | |
| def preprocess_text(text): | |
| # Tokenize text into sentences | |
| sentences = sent_tokenize(text) | |
| # Remove stopwords and punctuation, and convert to lowercase | |
| stop_words = set(stopwords.words('english')) | |
| preprocessed_sentences = [] | |
| for sentence in sentences: | |
| words = word_tokenize(sentence.lower()) | |
| filtered_words = [word for word in words if word not in stop_words and word not in string.punctuation] | |
| preprocessed_sentences.append(' '.join(filtered_words)) | |
| return sentences, preprocessed_sentences | |
| def build_similarity_matrix(sentences): | |
| tfidf_vectorizer = TfidfVectorizer() | |
| tfidf_matrix = tfidf_vectorizer.fit_transform(sentences) | |
| similarity_matrix = cosine_similarity(tfidf_matrix) | |
| return similarity_matrix | |
| def textrank_summary(text, num_sentences=3): | |
| original_sentences, preprocessed_sentences = preprocess_text(text) | |
| similarity_matrix = build_similarity_matrix(preprocessed_sentences) | |
| similarity_graph = nx.from_numpy_array(similarity_matrix) | |
| scores = nx.pagerank(similarity_graph) | |
| ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(original_sentences)), reverse=True) | |
| summary = ' '.join([sentence for score, sentence in ranked_sentences[:num_sentences]]) | |
| return summary | |
| def tfidf_summary(text, num_sentences=3): | |
| original_sentences, preprocessed_sentences = preprocess_text(text) | |
| tfidf_vectorizer = TfidfVectorizer() | |
| tfidf_matrix = tfidf_vectorizer.fit_transform(preprocessed_sentences) | |
| sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten() | |
| ranked_sentences = [original_sentences[i] for i in np.argsort(sentence_scores, axis=0)[::-1]] | |
| summary = ' '.join(ranked_sentences[:num_sentences]) | |
| return summary | |
| def pegasus_summary(text): | |
| inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True, padding=True) | |
| summary_ids = model.generate( | |
| inputs["input_ids"], | |
| max_length=250, | |
| min_length=30, # Adjust max_length as needed | |
| num_beams=5, | |
| early_stopping=True | |
| ) | |
| summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
| return summary | |
| def summarize_text(text, method): | |
| if method == "TF-IDF": | |
| return tfidf_summary(text) | |
| elif method == "TextRank": | |
| return textrank_summary(text) | |
| elif method == "Abstractive": | |
| return pegasus_summary(text) | |
| # Custom CSS for styling | |
| custom_css = """ | |
| .gr-box { | |
| border-radius: 10px; | |
| padding: 20px; | |
| box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1); | |
| margin: 20px 0; | |
| background-color: #fff; | |
| } | |
| .gr-input, .gr-output { | |
| border: 1px solid #ccc; | |
| border-radius: 5px; | |
| padding: 10px; | |
| font-size: 16px; | |
| } | |
| .gr-button { | |
| background-color: #007bff; | |
| color: white; | |
| padding: 10px 20px; | |
| border: none; | |
| border-radius: 5px; | |
| font-size: 16px; | |
| cursor: pointer; | |
| transition: background-color 0.3s; | |
| } | |
| .gr-button:hover { | |
| background-color: #0056b3; | |
| } | |
| """ | |
| # Create a visually appealing Gradio interface | |
| interface = gr.Interface( | |
| fn=summarize_text, | |
| inputs=[ | |
| gr.Textbox( | |
| lines=30, | |
| placeholder="Paste your text here...", | |
| label="Input Text", | |
| elem_classes="gr-input" # Apply custom CSS class | |
| ), | |
| gr.Radio( | |
| choices=["TF-IDF", "TextRank", "Abstractive"], | |
| label="Summarization Method", | |
| value="Abstractive" | |
| ) | |
| ], | |
| outputs=gr.Textbox( | |
| lines=30, | |
| label="Concise Summary", | |
| elem_classes="gr-output" # Apply custom CSS class | |
| ), | |
| title="Pegasus Text Summarizer", | |
| description="Get a clear and concise summary of your text in seconds!", | |
| theme="default", # Use a built-in theme | |
| css=custom_css # Add custom CSS | |
| ) | |
| # Launch the interface | |
| interface.launch( | |
| share=True, | |
| debug=True # Enable debug mode for error handling (optional) | |
| ) | |