Text_Summarizer / streamlit_app.py
kodamkarthik281's picture
Update streamlit_app.py
56757fb verified
import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import spacy
import subprocess
import sentencepiece
@st.cache_resource
def load_spacy():
try:
return spacy.load("en_core_web_sm")
except:
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
return spacy.load("en_core_web_sm")
nlp = load_spacy()
repo_id = "kodamkarthik281/t5-cnn-summary-karthi"
@st.cache_resource
def load_model_and_tokenizer():
tokenizer = AutoTokenizer.from_pretrained(repo_id)
model = AutoModelForSeq2SeqLM.from_pretrained(repo_id)
return model, tokenizer
model, tokenizer = load_model_and_tokenizer()
def generate_summary(text, model, tokenizer, max_input_len=512, max_output_len=150, num_beams=4):
input_ids = tokenizer.encode(
"summarize: " + text,
return_tensors= "pt",
max_length= max_input_len,
truncation=True
)
input_ids = input_ids.to(model.device)
output_ids = model.generate(input_ids, max_length= max_output_len, num_beams= num_beams, early_stopping= True)
summary = tokenizer.decode(output_ids[0], skip_special_tokens= True)
return summary
def extractive_summary(text):
doc = nlp(text)
sentences = list(doc.sents)
if len(sentences) <= 3:
return " ".join(str(s) for s in sentences)
sorted_sents = sorted(sentences, key=lambda s: len(s), reverse=True)
return " ".join(str(s) for s in sorted_sents[:3])
st.set_page_config(page_title="Text Summarizer", layout="wide")
st.title("πŸ“š Text Summarizer : Abstractive vs Extractive")
st.markdown("""
Paste your paragraph below and compare:
- **Abstractive Summary** : Using the fine-tuned model
- **Extractive Summary** : Using spaCy's
""")
user_input = st.text_area("Paste your paragraph:", height=150)
if "history" not in st.session_state:
st.session_state.history = []
if st.button("Summarize"):
if user_input.strip():
with st.spinner("Generating summaries..."):
st.markdown("**Note :** This app may take 2–3 minutes to generate a summary after clicking the button.", unsafe_allow_html=True)
st.markdown("""**Why is it slow? :** The model is a fine-tuned Transformer (T5) loaded from Hugging Face. Due to limited compute resources on Hugging
Face Spaces (CPU-only and shared infrastructure), initial inference can take some time. Please be patient.""", unsafe_allow_html=True)
abs_summary = generate_summary(user_input, model, tokenizer)
ext_summary = extractive_summary(user_input)
st.session_state.history.append({
"input": user_input,
"abstractive": abs_summary,
"extractive": ext_summary
})
col1, col2 = st.columns(2)
with col1:
st.subheader("Abstractive Summary")
st.success(abs_summary)
with col2:
st.subheader("Extractive Summary")
st.info(ext_summary)
else:
st.warning("Please enter a paragraph to summarize.")
if st.session_state.history:
st.markdown("---")
st.subheader("Summary History")
for i, item in enumerate(reversed(st.session_state.history), 1):
with st.expander(f"Example #{i}"):
st.markdown("**Original Text:**")
st.write(item["input"])
col1, col2 = st.columns(2)
with col1:
st.markdown("**Abstractive Summary:**")
st.success(item["abstractive"])
with col2:
st.markdown("**Extractive Summary:**")
st.info(item["extractive"])