Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| import spacy | |
| import subprocess | |
| import sentencepiece | |
| def load_spacy(): | |
| try: | |
| return spacy.load("en_core_web_sm") | |
| except: | |
| subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) | |
| return spacy.load("en_core_web_sm") | |
| nlp = load_spacy() | |
| repo_id = "kodamkarthik281/t5-cnn-summary-karthi" | |
| def load_model_and_tokenizer(): | |
| tokenizer = AutoTokenizer.from_pretrained(repo_id) | |
| model = AutoModelForSeq2SeqLM.from_pretrained(repo_id) | |
| return model, tokenizer | |
| model, tokenizer = load_model_and_tokenizer() | |
| def generate_summary(text, model, tokenizer, max_input_len=512, max_output_len=150, num_beams=4): | |
| input_ids = tokenizer.encode( | |
| "summarize: " + text, | |
| return_tensors= "pt", | |
| max_length= max_input_len, | |
| truncation=True | |
| ) | |
| input_ids = input_ids.to(model.device) | |
| output_ids = model.generate(input_ids, max_length= max_output_len, num_beams= num_beams, early_stopping= True) | |
| summary = tokenizer.decode(output_ids[0], skip_special_tokens= True) | |
| return summary | |
| def extractive_summary(text): | |
| doc = nlp(text) | |
| sentences = list(doc.sents) | |
| if len(sentences) <= 3: | |
| return " ".join(str(s) for s in sentences) | |
| sorted_sents = sorted(sentences, key=lambda s: len(s), reverse=True) | |
| return " ".join(str(s) for s in sorted_sents[:3]) | |
| st.set_page_config(page_title="Text Summarizer", layout="wide") | |
| st.title("π Text Summarizer : Abstractive vs Extractive") | |
| st.markdown(""" | |
| Paste your paragraph below and compare: | |
| - **Abstractive Summary** : Using the fine-tuned model | |
| - **Extractive Summary** : Using spaCy's | |
| """) | |
| user_input = st.text_area("Paste your paragraph:", height=150) | |
| if "history" not in st.session_state: | |
| st.session_state.history = [] | |
| if st.button("Summarize"): | |
| if user_input.strip(): | |
| with st.spinner("Generating summaries..."): | |
| st.markdown("**Note :** This app may take 2β3 minutes to generate a summary after clicking the button.", unsafe_allow_html=True) | |
| st.markdown("""**Why is it slow? :** The model is a fine-tuned Transformer (T5) loaded from Hugging Face. Due to limited compute resources on Hugging | |
| Face Spaces (CPU-only and shared infrastructure), initial inference can take some time. Please be patient.""", unsafe_allow_html=True) | |
| abs_summary = generate_summary(user_input, model, tokenizer) | |
| ext_summary = extractive_summary(user_input) | |
| st.session_state.history.append({ | |
| "input": user_input, | |
| "abstractive": abs_summary, | |
| "extractive": ext_summary | |
| }) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Abstractive Summary") | |
| st.success(abs_summary) | |
| with col2: | |
| st.subheader("Extractive Summary") | |
| st.info(ext_summary) | |
| else: | |
| st.warning("Please enter a paragraph to summarize.") | |
| if st.session_state.history: | |
| st.markdown("---") | |
| st.subheader("Summary History") | |
| for i, item in enumerate(reversed(st.session_state.history), 1): | |
| with st.expander(f"Example #{i}"): | |
| st.markdown("**Original Text:**") | |
| st.write(item["input"]) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.markdown("**Abstractive Summary:**") | |
| st.success(item["abstractive"]) | |
| with col2: | |
| st.markdown("**Extractive Summary:**") | |
| st.info(item["extractive"]) | |