import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load from Hugging Face Hub
tokenizer = AutoTokenizer.from_pretrained("Adarsh921/flan-t5-english-summarizer")
model = AutoModelForSeq2SeqLM.from_pretrained("Adarsh921/flan-t5-english-summarizer")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
MAX_INPUT_LEN = 768
MAX_TARGET_LEN = 150


def summarize(text):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=MAX_INPUT_LEN
    ).to(device)

    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=MAX_TARGET_LEN,
            min_length=40,
            num_beams=6,
            no_repeat_ngram_size=3,
            length_penalty=1.0,
            early_stopping=True
        )

    return tokenizer.decode(output[0], skip_special_tokens=True).strip()

# Gradio UI
gr.Interface(
    fn=summarize,
    inputs=gr.Textbox(lines=10, label="Paste english Article"),
    outputs=gr.Textbox(label="Generated Summary"),
    title="English Article Summarizer",
    description="Summarizer fine-tuned on ILSUM 2024 using Flan-T5"
).launch(share=True)