import streamlit as st
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import spacy
import subprocess
import sentencepiece

@st.cache_resource
def load_spacy():
    try:
        return spacy.load("en_core_web_sm")
    except:
        subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
        return spacy.load("en_core_web_sm")

nlp = load_spacy()

repo_id = "kodamkarthik281/t5-cnn-summary-karthi"

@st.cache_resource
def load_model_and_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained(repo_id)
    model = AutoModelForSeq2SeqLM.from_pretrained(repo_id)
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer()

def generate_summary(text, model, tokenizer, max_input_len=512, max_output_len=150, num_beams=4):
    input_ids = tokenizer.encode(
        "summarize: " + text, 
        return_tensors= "pt", 
        max_length= max_input_len, 
        truncation=True
    )
    input_ids = input_ids.to(model.device)
    
    output_ids = model.generate(input_ids, max_length= max_output_len, num_beams= num_beams, early_stopping= True)
    summary = tokenizer.decode(output_ids[0], skip_special_tokens= True)
    return summary

def extractive_summary(text):
    doc = nlp(text)
    sentences = list(doc.sents)
    if len(sentences) <= 3:
        return " ".join(str(s) for s in sentences)
    sorted_sents = sorted(sentences, key=lambda s: len(s), reverse=True)
    return " ".join(str(s) for s in sorted_sents[:3])

st.set_page_config(page_title="Text Summarizer", layout="wide")
st.title("📚 Text Summarizer : Abstractive vs Extractive")

st.markdown("""
Paste your paragraph below and compare:
- **Abstractive Summary** : Using the fine-tuned model
- **Extractive Summary** : Using spaCy's
""")

user_input = st.text_area("Paste your paragraph:", height=150)

if "history" not in st.session_state:
    st.session_state.history = []

if st.button("Summarize"):
    if user_input.strip():
        with st.spinner("Generating summaries..."):

            st.markdown("**Note :** This app may take 2–3 minutes to generate a summary after clicking the button.", unsafe_allow_html=True)
            st.markdown("""**Why is it slow? :** The model is a fine-tuned Transformer (T5) loaded from Hugging Face. Due to limited compute resources on Hugging
            Face Spaces (CPU-only and shared infrastructure), initial inference can take some time. Please be patient.""", unsafe_allow_html=True)

            abs_summary = generate_summary(user_input, model, tokenizer)
            ext_summary = extractive_summary(user_input)

            st.session_state.history.append({
                "input": user_input,
                "abstractive": abs_summary,
                "extractive": ext_summary
            })

        col1, col2 = st.columns(2)
        with col1:
            st.subheader("Abstractive Summary")
            st.success(abs_summary)
        with col2:
            st.subheader("Extractive Summary")
            st.info(ext_summary)
    else:
        st.warning("Please enter a paragraph to summarize.")

if st.session_state.history:
    st.markdown("---")
    st.subheader("Summary History")
    for i, item in enumerate(reversed(st.session_state.history), 1):
        with st.expander(f"Example #{i}"):
            st.markdown("**Original Text:**")
            st.write(item["input"])

            col1, col2 = st.columns(2)
            with col1:
                st.markdown("**Abstractive Summary:**")
                st.success(item["abstractive"])
            with col2:
                st.markdown("**Extractive Summary:**")
                st.info(item["extractive"])