# app.py
# A feature-rich web app that summarizes YouTube videos using multiple models,
# chunking for long transcripts, and user-controlled output length.

import gradio as gr
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain.text_splitter import RecursiveCharacterTextSplitter
import time

# Model and Splitter Setup

MODELS = {
    "DistilBART (Default, Fast)": "sshleifer/distilbart-cnn-12-6",
    "BART (Large, More Accurate)": "facebook/bart-large-cnn",
    "Pegasus (Best for News/Articles)": "google/pegasus-xsum",
}

# Model cache to store loaded models
loaded_models = {}

def get_model(model_name):
    """
    Loads a model pipeline if not already loaded and returns it.
    Uses 'loaded_models' dictionary as a cache for performance.
    """

    if model_name not in MODELS:
        raise ValueError(f"Unknown Model: {model_name}")

    if model_name in loaded_models:
        print(f"Returning cached model: {model_name}")
        return loaded_models[model_name]

    print(f"Loading new model: {model_name} (this may take a minute)...")
    start_time = time.time()

    model_id = MODELS[model_name]
    summarizer = pipeline("summarization", model=model_id)
    loaded_models[model_name] = summarizer

    end_time = time.time()
    print(f"Model '{model_name}' loaded in {end_time - start_time:.2f} seconds.")

    return summarizer

# Instantiate the Text Splitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100
)

# Core Summarization Function
def youtube_summarizer(model_name, youtube_url, min_len, max_len):
    """
    Main function to orchestrate the summarization process.
    """
    try:
        summarizer = get_model(model_name)
    except Exception as e:
        return f"Error loading model: {e}"

    if not youtube_url or "youtube.com/watch?v=" not in youtube_url:
        return "Please enter a valid YouTube video URL."

    try:
        video_id = youtube_url.split("v=")[1].split("&")[0]
        print(f"Fetching transcript for video ID: {video_id}")
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
        transcript = " ".join([d['text'] for d in transcript_list])
        print("Transcript fetched successfully.")

        print("Splitting transcript into chunks...")
        chunks = text_splitter.split_text(transcript)

        print("Summarizing individual chunks...")
        initial_summaries = []
        for chunk in chunks:
            chunk_summary = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
            initial_summaries.append(chunk_summary[0]['summary_text'])

        print("Creating final summary...")
        combined_summary_text = " ".join(initial_summaries)
        final_summary = summarizer(
            combined_summary_text,
            max_length=max_len,
            min_length=min_len,
            do_sample=False
        )

        return final_summary[0]['summary_text']

    except TranscriptsDisabled:
        return "Error: Transcripts are disabled for this video."
    except NoTranscriptFound:
        return "Error: No English transcript found for this video. The API may not support auto-generated captions."
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Gradio Interface
print("Creating Gradio interface...")

demo = gr.Interface(
    fn=youtube_summarizer,
    inputs=[
        gr.Dropdown(
            choices=list(MODELS.keys()),
            value="DistilBART (Default, Fast)",
            label="Select Summarization Model"
        ),
        gr.Textbox(label="YouTube URL", placeholder="e.g., https://www.youtube.com/watch?v=..."),
        gr.Slider(minimum=30, maximum=200, value=70, step=10, label="Minimum Final Summary Length"),
        gr.Slider(minimum=100, maximum=500, value=350, step=10, label="Maximum Final Summary Length")
    ],
    outputs=gr.Textbox(label="Video Summary", lines=10),
    title="Advanced YouTube Video Summarizer",
    description="""
**Summarize any YouTube video with your choice of AI model!**

1.  Select a summarization model from the dropdown.
2.  Paste a YouTube video URL.
3.  Adjust the sliders to control the length of the final summary.

*Note: Loading a new model for the first time may take a minute or two. Subsequent uses will be much faster.*
""",
    allow_flagging="never",
    examples=[
        ["DistilBART (Default, Fast)", "https://www.youtube.com/watch?v=jaYN-iwgw2g", 50, 150],
        ["BART (Large, More Accurate)", "https://www.youtube.com/watch?v=Yo22h_i_5kY", 100, 300] 
    ]
)

print("Launching Gradio app...")
demo.launch()