import os
import json
import re

os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True,max_split_size_mb:128"

# Writable cache path for Spaces without persistent storage
os.environ["HF_HOME"] = "/tmp/hf"
os.environ["HF_HUB_CACHE"] = "/tmp/hf/hub"
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf/transformers"

os.makedirs("/tmp/hf/hub", exist_ok=True)
os.makedirs("/tmp/hf/transformers", exist_ok=True)

import spaces
import torch
import gradio as gr
from PIL import Image
from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration

HF_TOKEN = os.environ.get("HF_TOKEN", "")
MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct"

processor = None
model = None


def load_model():
    global processor, model
    if model is not None and processor is not None:
        return

    print("loading processor")
    processor = AutoProcessor.from_pretrained(
        MODEL_ID,
        token=HF_TOKEN if HF_TOKEN else None,
        min_pixels=256 * 28 * 28,
        max_pixels=1024 * 28 * 28,
    )

    print("loading model:", MODEL_ID)
    model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
        MODEL_ID,
        token=HF_TOKEN if HF_TOKEN else None,
        device_map="auto",
        torch_dtype="auto",
    )

    print("model.eval started")
    model.eval()


def extract_json(text: str):
    text = (text or "").strip()

    try:
        return json.loads(text)
    except Exception:
        pass

    match = re.search(r"\{.*\}", text, flags=re.S)
    if match:
        try:
            return json.loads(match.group(0))
        except Exception:
            pass

    return {"raw_output": text}


PROMPT = """Analyze this pantry image.
Return ONLY valid JSON with this schema:
{
  "items": [
    {
      "name": "",
      "brand": "",
      "quantity": "",
      "confidence": 0.0
    }
  ],
  "summary": "",
  "uncertain_items": []
}

find all the unique recipes in detail
"""


@spaces.GPU(size="xlarge", duration=160)
def analyze_pantry(image: Image.Image):
    if image is None:
        return {"error": "Please upload a pantry image."}

    load_model()

    messages = [
        {
            "role": "system",
            "content": [
                {"type": "text", "text": "You extract pantry items from photos and respond with JSON only."}
            ],
        },
        {
            "role": "user",
            "content": [
                {"type": "image", "image": image.convert("RGB")},
                {"type": "text", "text": PROMPT},
            ],
        },
    ]

    text = processor.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
    )

    inputs = processor(
        text=[text],
        images=[image.convert("RGB")],
        padding=True,
        return_tensors="pt",
    )

    inputs = {k: v.to(model.device) if hasattr(v, "to") else v for k, v in inputs.items()}

    with torch.inference_mode():
        output_ids = model.generate(
            **inputs,
            max_new_tokens=800,
            do_sample=False,
        )

    prompt_len = inputs["input_ids"].shape[-1]
    generated_text = processor.batch_decode(
        [output_ids[0][prompt_len:]],
        skip_special_tokens=True,
        clean_up_tokenization_spaces=False,
    )[0].strip()

    print("generated_text:", generated_text)

    parsed = extract_json(generated_text)
    if isinstance(parsed, dict) and "raw_output" not in parsed:
        parsed["_raw_output"] = generated_text
    return parsed


@spaces.GPU(size="large", duration=1)
def cloud():
    return None


with gr.Blocks() as demo:
    gr.Markdown("# Pantry ingredient / item extractor")

    image_input = gr.Image(type="pil", label="Pantry image")
    analyze_btn = gr.Button("Analyze")
    cloud_btn = gr.Button("Cloud")
    output_json = gr.JSON(label="Output")

    analyze_btn.click(analyze_pantry, inputs=[image_input], outputs=[output_json], api_name="analyze")
    cloud_btn.click(cloud, inputs=[], outputs=[], api_name="cloud")


demo.queue(max_size=16)
demo.launch(ssr_mode=False)