import streamlit as st import torch from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import spacy import subprocess import sentencepiece @st.cache_resource def load_spacy(): try: return spacy.load("en_core_web_sm") except: subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) return spacy.load("en_core_web_sm") nlp = load_spacy() repo_id = "kodamkarthik281/t5-cnn-summary-karthi" @st.cache_resource def load_model_and_tokenizer(): tokenizer = AutoTokenizer.from_pretrained(repo_id) model = AutoModelForSeq2SeqLM.from_pretrained(repo_id) return model, tokenizer model, tokenizer = load_model_and_tokenizer() def generate_summary(text, model, tokenizer, max_input_len=512, max_output_len=150, num_beams=4): input_ids = tokenizer.encode( "summarize: " + text, return_tensors= "pt", max_length= max_input_len, truncation=True ) input_ids = input_ids.to(model.device) output_ids = model.generate(input_ids, max_length= max_output_len, num_beams= num_beams, early_stopping= True) summary = tokenizer.decode(output_ids[0], skip_special_tokens= True) return summary def extractive_summary(text): doc = nlp(text) sentences = list(doc.sents) if len(sentences) <= 3: return " ".join(str(s) for s in sentences) sorted_sents = sorted(sentences, key=lambda s: len(s), reverse=True) return " ".join(str(s) for s in sorted_sents[:3]) st.set_page_config(page_title="Text Summarizer", layout="wide") st.title("📚 Text Summarizer : Abstractive vs Extractive") st.markdown(""" Paste your paragraph below and compare: - **Abstractive Summary** : Using the fine-tuned model - **Extractive Summary** : Using spaCy's """) user_input = st.text_area("Paste your paragraph:", height=150) if "history" not in st.session_state: st.session_state.history = [] if st.button("Summarize"): if user_input.strip(): with st.spinner("Generating summaries..."): st.markdown("**Note :** This app may take 2–3 minutes to generate a summary after clicking the button.", unsafe_allow_html=True) st.markdown("""**Why is it slow? :** The model is a fine-tuned Transformer (T5) loaded from Hugging Face. Due to limited compute resources on Hugging Face Spaces (CPU-only and shared infrastructure), initial inference can take some time. Please be patient.""", unsafe_allow_html=True) abs_summary = generate_summary(user_input, model, tokenizer) ext_summary = extractive_summary(user_input) st.session_state.history.append({ "input": user_input, "abstractive": abs_summary, "extractive": ext_summary }) col1, col2 = st.columns(2) with col1: st.subheader("Abstractive Summary") st.success(abs_summary) with col2: st.subheader("Extractive Summary") st.info(ext_summary) else: st.warning("Please enter a paragraph to summarize.") if st.session_state.history: st.markdown("---") st.subheader("Summary History") for i, item in enumerate(reversed(st.session_state.history), 1): with st.expander(f"Example #{i}"): st.markdown("**Original Text:**") st.write(item["input"]) col1, col2 = st.columns(2) with col1: st.markdown("**Abstractive Summary:**") st.success(item["abstractive"]) with col2: st.markdown("**Extractive Summary:**") st.info(item["extractive"])