import os
import gradio as gr

from llama_cpp import Llama

# -------- Model config --------
REPO_ID = os.getenv("GGUF_REPO_ID", "tiiuae/Falcon3-1B-Instruct-GGUF")
FILENAME = os.getenv("GGUF_FILENAME", "Falcon3-1B-Instruct-q4_k_m.gguf")  # good CPU balance :contentReference[oaicite:1]{index=1}

# Lazy singleton so the model loads only once per Space runtime
_LLM = None

def get_llm():
    global _LLM
    if _LLM is None:
        # llama-cpp-python supports downloading GGUFs from Hugging Face directly via from_pretrained :contentReference[oaicite:2]{index=2}
        _LLM = Llama.from_pretrained(
            repo_id=REPO_ID,
            filename=FILENAME,
            verbose=False,
            # Tweak for CPU Spaces
            n_ctx=4096,
            n_threads=int(os.getenv("OMP_NUM_THREADS", "4")),
        )
    return _LLM

def build_prompt(topic: str, audience: str, num_slides: int, tone: str, time_minutes: int):
    # Simple instruct-style format that works broadly with GGUF instruct models.
    return f"""
You are a senior presentation writer and speaking coach.

Task: Write a PowerPoint script for the topic below.

Topic: {topic}
Audience: {audience}
Tone: {tone}
Total time: {time_minutes} minutes
Number of slides: {num_slides}

Requirements:
- Output EXACTLY {num_slides} slides.
- For each slide include:
  1) Slide Title
  2) 3–6 bullet points (concise, slide-friendly)
  3) Speaker Notes (what to say, 80–140 words)
- Include a strong opening hook and a clear closing with call-to-action.
- Avoid fluff. Use concrete examples where possible.
- Format strictly like:

SLIDE 1: <title>
Bullets:
- ...
- ...
Speaker Notes:
...

SLIDE 2: ...
""".strip()

def generate_ppt_script(topic, audience, num_slides, tone, time_minutes, temperature, max_tokens):
    if not topic or not topic.strip():
        return "Please enter a topic."

    llm = get_llm()
    prompt = build_prompt(topic.strip(), audience.strip(), int(num_slides), tone, int(time_minutes))

    # Generate
    out = llm(
        prompt,
        max_tokens=int(max_tokens),
        temperature=float(temperature),
        top_p=0.95,
        stop=["</s>", "SLIDE 999:"],  # simple safety stop
    )

    text = out["choices"][0]["text"].strip()
    return text

with gr.Blocks(title="Falcon3 PPT Script Writer (GGUF)") as demo:
    gr.Markdown(
        """
# Falcon3-1B-Instruct (GGUF) — PPT Script Writer
Enter a topic and get a **slide-by-slide deck script** with **speaker notes**.
"""
    )

    with gr.Row():
        topic = gr.Textbox(label="Topic", placeholder="e.g., Agentic AI in SRE: reducing incident MTTR", lines=2)
        audience = gr.Textbox(label="Audience", placeholder="e.g., SRE + platform engineering leaders", lines=2)

    with gr.Row():
        num_slides = gr.Slider(5, 20, value=10, step=1, label="Number of slides")
        time_minutes = gr.Slider(5, 60, value=15, step=1, label="Total talk time (minutes)")

    tone = gr.Dropdown(
        ["Professional", "Conversational", "Persuasive", "Technical Deep Dive", "Executive Summary"],
        value="Professional",
        label="Tone",
    )

    with gr.Accordion("Generation settings", open=False):
        temperature = gr.Slider(0.0, 1.2, value=0.6, step=0.05, label="Temperature")
        max_tokens = gr.Slider(256, 2048, value=1200, step=64, label="Max output tokens")

    run_btn = gr.Button("Generate PPT Script")
    output = gr.Textbox(label="PPT Script Output", lines=28)

    run_btn.click(
        fn=generate_ppt_script,
        inputs=[topic, audience, num_slides, tone, time_minutes, temperature, max_tokens],
        outputs=output,
    )

demo.queue(default_concurrency_limit=1).launch()