import gradio as gr
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer

# =========================
# Model config
# =========================
MODEL_ID = "vikhyatk/moondream2"
REVISION = None 
DEVICE = "cpu"  

# =========================
# Load model
# =========================
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_ID,
    revision=REVISION,
    trust_remote_code=True
)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    revision=REVISION,
    trust_remote_code=True,
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True
).to(DEVICE)

model.eval()
print("Model loaded successfully!")

# =========================
# Inference function
# =========================
def understand_image(image, prompt):
    if image is None:
        return "❌ Please upload an image."
    
    if not prompt or prompt.strip() == "":
        return "❌ Please enter a question."
    
    try:
        image = image.convert("RGB")
        
        print(f"Processing question: {prompt}")
        
        # Moondream2の正しいAPI
        with torch.no_grad():
            # 画像をエンコード
            image_embeds = model.encode_image(image)
            
            # 質問に回答
            answer = model.answer_question(
                image_embeds=image_embeds,
                question=prompt,
                tokenizer=tokenizer
            )
        
        print(f"Answer generated: {answer}")
        return answer
    
    except Exception as e:
        error_msg = str(e)
        print(f"Error occurred: {error_msg}")
        
        # デバッグ情報を追加
        available_methods = [method for method in dir(model) if not method.startswith('_')]
        return f"❌ Error: {error_msg}\n\n🔍 Available model methods:\n{', '.join(available_methods[:20])}"

# =========================
# Gradio UI
# =========================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🌓 Moondream2 Image Understanding")
    gr.Markdown(
        "Upload an image and ask questions about it. ⚠️ CPU processing may take 20-40 seconds."
    )
    
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="📸 Upload Image")
            text_input = gr.Textbox(
                label="❓ Your Question",
                placeholder="What do you see in this image?",
                value="Describe this image in detail.",
                lines=2
            )
            btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")
        
        with gr.Column():
            output = gr.Textbox(
                label="💬 Answer", 
                lines=10,
                placeholder="The AI's response will appear here..."
            )
    
    gr.Markdown("### 💡 Example Questions:")
    gr.Examples(
        examples=[
            ["Describe what you see in this image."],
            ["What objects are in this image?"],
            ["What is the main subject?"],
            ["What colors are most prominent?"],
            ["Is this indoors or outdoors?"],
            ["How many people are in the image?"]
        ],
        inputs=text_input,
        label="Click to use these questions"
    )
    
    btn.click(
        understand_image,
        inputs=[image_input, text_input],
        outputs=output
    )
    
    text_input.submit(
        understand_image,
        inputs=[image_input, text_input],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()