Spaces:

Nikhil2411
/

textsumerizar1234

Sleeping

File size: 4,614 Bytes

16d1baf

import gradio as gr
import nltk
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import string
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

# Download required NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

# Load pre-trained Pegasus model and tokenizer
model_name = "pegasus-fine_tuned_model"  # Example Pegasus model
tokenizer = PegasusTokenizer.from_pretrained(model_name)
model = PegasusForConditionalGeneration.from_pretrained(model_name)

def preprocess_text(text):
    # Tokenize text into sentences
    sentences = sent_tokenize(text)
    # Remove stopwords and punctuation, and convert to lowercase
    stop_words = set(stopwords.words('english'))
    preprocessed_sentences = []
    for sentence in sentences:
        words = word_tokenize(sentence.lower())
        filtered_words = [word for word in words if word not in stop_words and word not in string.punctuation]
        preprocessed_sentences.append(' '.join(filtered_words))
    return sentences, preprocessed_sentences

def build_similarity_matrix(sentences):
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(sentences)
    similarity_matrix = cosine_similarity(tfidf_matrix)
    return similarity_matrix

def textrank_summary(text, num_sentences=3):
    original_sentences, preprocessed_sentences = preprocess_text(text)
    similarity_matrix = build_similarity_matrix(preprocessed_sentences)
    similarity_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(similarity_graph)
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(original_sentences)), reverse=True)
    summary = ' '.join([sentence for score, sentence in ranked_sentences[:num_sentences]])
    return summary

def tfidf_summary(text, num_sentences=3):
    original_sentences, preprocessed_sentences = preprocess_text(text)
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(preprocessed_sentences)
    sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
    ranked_sentences = [original_sentences[i] for i in np.argsort(sentence_scores, axis=0)[::-1]]
    summary = ' '.join(ranked_sentences[:num_sentences])
    return summary

def pegasus_summary(text):
    inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
    summary_ids = model.generate(
        inputs["input_ids"],
        max_length=250,
        min_length=30,  # Adjust max_length as needed
        num_beams=5,
        early_stopping=True
    )
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

def summarize_text(text, method):
    if method == "TF-IDF":
        return tfidf_summary(text)
    elif method == "TextRank":
        return textrank_summary(text)
    elif method == "Abstractive":
        return pegasus_summary(text)

# Custom CSS for styling
custom_css = """
.gr-box {
    border-radius: 10px;
    padding: 20px;
    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);
    margin: 20px 0;
    background-color: #fff;
}

.gr-input, .gr-output {
    border: 1px solid #ccc;
    border-radius: 5px;
    padding: 10px;
    font-size: 16px;
}

.gr-button {
    background-color: #007bff;
    color: white;
    padding: 10px 20px;
    border: none;
    border-radius: 5px;
    font-size: 16px;
    cursor: pointer;
    transition: background-color 0.3s;
}

.gr-button:hover {
    background-color: #0056b3;
}
"""

# Create a visually appealing Gradio interface
interface = gr.Interface(
    fn=summarize_text,
    inputs=[
        gr.Textbox(
            lines=30,
            placeholder="Paste your text here...",
            label="Input Text",
            elem_classes="gr-input"  # Apply custom CSS class
        ),
        gr.Radio(
            choices=["TF-IDF", "TextRank", "Abstractive"],
            label="Summarization Method",
            value="Abstractive"
        )
    ],
    outputs=gr.Textbox(
        lines=30,
        label="Concise Summary",
        elem_classes="gr-output"  # Apply custom CSS class
    ),
    title="Pegasus Text Summarizer",
    description="Get a clear and concise summary of your text in seconds!",
    theme="default",  # Use a built-in theme
    css=custom_css  # Add custom CSS
)

# Launch the interface
interface.launch(
    share=True,
    debug=True  # Enable debug mode for error handling (optional)
)