import os
import gradio as gr
import torch
from PIL import Image
from transformers import pipeline, AutoProcessor, AutoModelForVision2Seq

# Use HF_TOKEN from environment for private models if needed (can add below if your Gemma is gated)
HF_TOKEN = os.environ.get("HF_TOKEN")

# Auto-detect device
DEVICE = 0 if torch.cuda.is_available() else -1

# Load BLIP for captioning
processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
blip_model = AutoModelForVision2Seq.from_pretrained("Salesforce/blip-image-captioning-large")
caption_pipe = pipeline(
    "image-to-text",
    model=blip_model,
    tokenizer=processor.tokenizer,
    image_processor=processor.image_processor,
    device=DEVICE,
)

# Load Gemma for text generation (pick your Gemma checkpoint here)
gemma_pipe = pipeline(
    "text-generation",
    model="google/gemma-2b-it",  # Change this to any working Gemma instruct model!
    device=DEVICE,
    # token=HF_TOKEN  # Uncomment if your Gemma model requires a token
)

def get_recommendations():
    return [
        "https://i.imgur.com/InC88PP.jpeg",
        "https://i.imgur.com/7BHfv4T.png",
        "https://i.imgur.com/wp3Wzc4.jpeg",
        "https://i.imgur.com/5e2xOA4.jpeg",
        "https://i.imgur.com/txjRk98.jpeg",
        "https://i.imgur.com/rQ4AYl0.jpeg",
        "https://i.imgur.com/bDzwD04.jpeg",
        "https://i.imgur.com/fLMngXI.jpeg",
        "https://i.imgur.com/nYEJzxt.png",
        "https://i.imgur.com/Xj92Cjv.jpeg",
    ]

def clean_output(text):
    # Remove prompt echoes if any (Gemma sometimes echoes)
    if "Description:" in text:
        text = text.split("Description:", 1)[-1]
    if "Category:" in text:
        text = text.split("Category:", 1)[-1]
    return text.strip()

def process(image: Image):
    if image is None:
        return "", "", "", get_recommendations()

    # 1. BLIP captioning
    caption_res = caption_pipe(image, max_new_tokens=64)
    desc = caption_res[0]["generated_text"].strip()

    # 2. Gemma: Category
    cat_prompt = f"Classify the following ad in one or two words. Description: {desc}"
    cat_out = gemma_pipe(cat_prompt, max_new_tokens=16)[0]['generated_text'].strip()
    cat_out = clean_output(cat_out)

    # 3. Gemma: Analysis (5 sentences)
    ana_prompt = (
        f"Describe in exactly five sentences what this ad communicates and its emotional impact. Description: {desc}"
    )
    ana_out = gemma_pipe(ana_prompt, max_new_tokens=120)[0]['generated_text'].strip()
    ana_out = clean_output(ana_out)

    # 4. Gemma: Suggestions (5 bullets)
    sug_prompt = (
        f"Suggest five practical improvements for this ad. Each suggestion must be unique, address a different aspect (message, visuals, call to action, targeting, or layout), start with '- ', and be one sentence. Description: {desc}"
    )
    sug_out = gemma_pipe(sug_prompt, max_new_tokens=120)[0]['generated_text'].strip()
    sug_out = clean_output(sug_out)
    # Keep only lines that start with '-'
    sug_lines = [line for line in sug_out.splitlines() if line.strip().startswith('-')]
    suggestions = "\n".join(sug_lines[:5]) if sug_lines else sug_out

    return cat_out, ana_out, suggestions, get_recommendations()

def main():
    with gr.Blocks(title="Smart Ad Analyzer (BLIP + Gemma)") as demo:
        gr.Markdown("## 📢 Smart Ad Analyzer (BLIP + Gemma)")
        gr.Markdown(
            """
            Upload your ad image below and instantly get expert feedback.  
            Category, analysis, improvement suggestions—and example ads for inspiration.
            """
        )
        with gr.Row():
            inp = gr.Image(type='pil', label='Upload Ad Image')
            with gr.Column():
                cat_out = gr.Textbox(label='Ad Category', interactive=False)
                ana_out = gr.Textbox(label='Ad Analysis', lines=5, interactive=False)
                sug_out = gr.Textbox(label='Improvement Suggestions', lines=5, interactive=False)
                btn = gr.Button('Analyze Ad', variant='primary')
        gallery = gr.Gallery(label='Example Ads')
        btn.click(
            fn=process,
            inputs=[inp],
            outputs=[cat_out, ana_out, sug_out, gallery],
        )
        gr.Markdown('Made by Simon Thalmay')
    return demo

if __name__ == "__main__":
    demo = main()
    demo.launch()