import streamlit as st
import whisper
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import textwrap

# =========================
# 1. PAGE CONFIG
# =========================
st.set_page_config(
    page_title="AI Meeting & Lecture Summarizer",
    page_icon="🧠",
    layout="wide"
)

st.title("🧠 AI Meeting & Lecture Summarizer")
st.write(
    """
    Upload a **recorded class / meeting audio file** or paste a **transcript**, and this app will:
    - ✅ Transcribe (if audio)
    - 📝 Summarize the content
    - ✅ Extract **Action Items**
    - 📌 Extract **Key Points / Decisions**
    """
)

# =========================
# 2. LOAD MODELS (CACHED)
# =========================

@st.cache_resource
def load_whisper_model(model_name: str = "small"):
    """
    Load Whisper ASR model once and cache it.
    """
    asr_model = whisper.load_model(model_name)
    return asr_model

@st.cache_resource
def load_summarization_model():
    """
    Load BART summarization model and tokenizer once and cache them.
    """
    model_name = "facebook/bart-large-cnn"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    return tokenizer, model

tokenizer, summarizer_model = load_summarization_model()

# =========================
# 3. CORE SUMMARIZATION FUNCTIONS
# =========================

def summarize_text(text, max_len=120, min_len=40):
    """
    Summarize a given text using the BART model.
    """
    # 1. Tokenize text
    inputs = tokenizer(
        text,
        max_length=1024,    # BART max input length
        truncation=True,
        return_tensors="pt"
    )

    # 2. Generate summary token IDs
    summary_ids = summarizer_model.generate(
        inputs["input_ids"],
        num_beams=5,
        length_penalty=1.2,
        no_repeat_ngram_size=3,
        max_length=max_len,
        min_length=min_len,
        temperature=0.7,
        top_k=50,
        top_p=0.95,
        early_stopping=True,
    )

    # 3. Decode token IDs to text
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


def summarize_long_text(text, max_chunk_chars=3000, max_len=120, min_len=40):
    """
    Handle long transcripts by:
    1. Splitting into chunks
    2. Summarizing each chunk
    3. Summarizing the combined summaries
    """
    if len(text) <= max_chunk_chars:
        return summarize_text(text, max_len=max_len, min_len=min_len)

    # Split by lines into manageable chunks
    chunks = []
    current_chunk = ""

    for line in text.splitlines():
        if len(current_chunk) + len(line) + 1 <= max_chunk_chars:
            current_chunk += " " + line
        else:
            chunks.append(current_chunk.strip())
            current_chunk = line

    if current_chunk.strip():
        chunks.append(current_chunk.strip())

    partial_summaries = []
    for i, chunk in enumerate(chunks, start=1):
        with st.spinner(f"Summarizing chunk {i}/{len(chunks)} (length={len(chunk)} chars)..."):
            part = summarize_text(chunk, max_len=max_len, min_len=min_len)
            partial_summaries.append(part)

    # Combine partial summaries and summarize again
    combined = " ".join(partial_summaries)
    final_summary = summarize_text(combined, max_len=max_len, min_len=min_len)
    return final_summary


def extract_action_items(transcript_text: str) -> str:
    """
    Use summarization to extract action items via an instruction-style prompt.
    """
    prompt = (
        "You are an assistant that reads a meeting or class transcript and extracts ACTION ITEMS.\n"
        "Action items are specific tasks that someone needs to do in the future.\n\n"
        "Transcript:\n"
        f"{transcript_text}\n\n"
        "Now list the ACTION ITEMS as clear bullet points:\n"
        "- "
    )

    action_items_summary = summarize_long_text(
        prompt,
        max_chunk_chars=3000,
        max_len=200,
        min_len=60
    )
    return action_items_summary


def extract_key_points(transcript_text: str) -> str:
    """
    Use summarization to extract key points and decisions via an instruction-style prompt.
    """
    prompt = (
        "You are an assistant that reads a meeting or lecture transcript and extracts the KEY POINTS.\n"
        "Key points are the most important ideas, topics discussed, and decisions made.\n\n"
        "Transcript:\n"
        f"{transcript_text}\n\n"
        "Now summarize the KEY POINTS and DECISIONS as bullet points:\n"
        "- "
    )

    key_points_summary = summarize_long_text(
        prompt,
        max_chunk_chars=3000,
        max_len=220,
        min_len=80
    )
    return key_points_summary


# =========================
# 4. SIDEBAR: INPUT MODE
# =========================

st.sidebar.header("Input Options")
input_mode = st.sidebar.radio(
    "Choose input type:",
    ["Upload Audio File", "Paste Transcript Text"]
)

# =========================
# 5. MAIN LOGIC
# =========================

transcript_text = None

if input_mode == "Upload Audio File":
    st.subheader("🎙 Upload your meeting / lecture recording")

    audio_file = st.file_uploader(
        "Upload an audio/video file (mp3, wav, m4a, mp4, etc.)",
        type=["mp3", "wav", "m4a", "mp4"]
    )

    if audio_file is not None:
        st.audio(audio_file)

        if st.button("Transcribe & Summarize"):
            with st.spinner("Loading Whisper model and transcribing audio..."):
                asr_model = load_whisper_model("small")
                # Save uploaded file to disk for Whisper
                temp_filename = "temp_uploaded_audio"
                with open(temp_filename, "wb") as f:
                    f.write(audio_file.read())

                # Transcribe
                result = asr_model.transcribe(temp_filename, language="en")
                transcript_text = result["text"]

            st.success("✅ Transcription complete!")
            st.write("### 📄 Transcript (preview)")
            st.write(textwrap.shorten(transcript_text, width=1000, placeholder=" ..."))

elif input_mode == "Paste Transcript Text":
    st.subheader("📄 Paste your transcript text")

    transcript_text = st.text_area(
        "Paste the full transcript here (meeting, class, lecture, etc.)",
        height=250
    )

    if transcript_text.strip() == "":
        transcript_text = None

# =========================
# 6. RUN SUMMARIZATION ONCE WE HAVE TRANSCRIPT
# =========================

if transcript_text:
    st.markdown("---")
    st.subheader("🧠 AI Analysis")

    if st.button("Generate Summary & Action Items"):
        with st.spinner("Summarizing the transcript..."):
            main_summary = summarize_long_text(transcript_text)

        with st.spinner("Extracting action items..."):
            action_items = extract_action_items(transcript_text)

        with st.spinner("Extracting key points..."):
            key_points = extract_key_points(transcript_text)

        # Display results
        st.markdown("### 📝 Main Summary")
        st.write(main_summary)

        st.markdown("### ✅ Action Items")
        st.write(action_items)

        st.markdown("### 📌 Key Points & Decisions")
        st.write(key_points)

else:
    st.info("👆 Upload an audio file or paste a transcript to get started.")