| import gradio as gr |
| from transformers import pipeline |
| import networkx as nx |
| import numpy as np |
| import re |
| import nltk |
| from nltk.tokenize import sent_tokenize |
| from sklearn.feature_extraction.text import TfidfVectorizer |
| from sklearn.metrics.pairwise import cosine_similarity |
|
|
| |
| nltk.download('punkt') |
|
|
| |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
| |
| def extractive_summarization(text, num_sentences=3): |
| sentences = sent_tokenize(text) |
|
|
| |
| if len(sentences) <= num_sentences: |
| return "Text is too short for extractive summarization." |
|
|
| try: |
| vectorizer = TfidfVectorizer(stop_words="english") |
| sentence_vectors = vectorizer.fit_transform(sentences) |
|
|
| |
| if sentence_vectors.shape[0] < num_sentences: |
| return "Insufficient unique content for extractive summarization." |
|
|
| similarity_matrix = cosine_similarity(sentence_vectors) |
| graph = nx.from_numpy_array(similarity_matrix) |
| scores = nx.pagerank(graph) |
|
|
| ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True) |
| return " ".join([s for _, s in ranked_sentences[:num_sentences]]) |
|
|
| except Exception as e: |
| return f"Error in extractive summarization: {str(e)}" |
|
|
|
|
| |
| def abstractive_summarization(text, length): |
| if len(text.split()) < 30: |
| return "Text is too short for summarization." |
| max_length = {"short": 50, "medium": 100, "long": 150}[length] |
| summary = summarizer(text, max_length=max_length, min_length=30, do_sample=False)[0]['summary_text'] |
| return summary |
|
|
| |
| def summarize_text(text, method, length): |
| if method == "Abstractive (BART)": |
| return abstractive_summarization(text, length) |
| else: |
| num_sentences = {"short": 2, "medium": 4, "long": 6}[length] |
| return extractive_summarization(text, num_sentences) |
|
|
| |
| def process_file(file): |
| return file.read().decode("utf-8") |
|
|
| |
| with gr.Blocks(theme=gr.themes.Soft()) as iface: |
| gr.Markdown("# 📄 AI-Powered Text Summarizer") |
| gr.Markdown("Summarize long articles, news, and research papers using advanced NLP models.") |
|
|
| with gr.Row(): |
| method_choice = gr.Radio(["Abstractive (BART)", "Extractive (TextRank)"], label="Summarization Type", value="Abstractive (BART)") |
| length_choice = gr.Radio(["short", "medium", "long"], label="Summary Length", value="medium") |
|
|
| text_input = gr.Textbox(lines=8, placeholder="Paste long text here...", label="Input Text") |
| file_input = gr.File(label="Or Upload a .txt file") |
| summarize_button = gr.Button("Summarize ✨") |
|
|
| summary_output = gr.Textbox(lines=6, label="Summarized Text", interactive=False) |
| |
| file_input.change(process_file, inputs=file_input, outputs=text_input) |
| summarize_button.click(summarize_text, inputs=[text_input, method_choice, length_choice], outputs=summary_output) |
|
|
| |
| if __name__ == "__main__": |
| iface.launch() |
|
|