import gradio as gr import torch from PIL import Image from transformers import AutoModelForCausalLM, AutoTokenizer # ========================= # Model config # ========================= MODEL_ID = "vikhyatk/moondream2" REVISION = None DEVICE = "cpu" # ========================= # Load model # ========================= print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, revision=REVISION, trust_remote_code=True ) print("Loading model...") model = AutoModelForCausalLM.from_pretrained( MODEL_ID, revision=REVISION, trust_remote_code=True, torch_dtype=torch.float32, low_cpu_mem_usage=True ).to(DEVICE) model.eval() print("Model loaded successfully!") # ========================= # Inference function # ========================= def understand_image(image, prompt): if image is None: return "❌ Please upload an image." if not prompt or prompt.strip() == "": return "❌ Please enter a question." try: image = image.convert("RGB") print(f"Processing question: {prompt}") # Moondream2の正しいAPI with torch.no_grad(): # 画像をエンコード image_embeds = model.encode_image(image) # 質問に回答 answer = model.answer_question( image_embeds=image_embeds, question=prompt, tokenizer=tokenizer ) print(f"Answer generated: {answer}") return answer except Exception as e: error_msg = str(e) print(f"Error occurred: {error_msg}") # デバッグ情報を追加 available_methods = [method for method in dir(model) if not method.startswith('_')] return f"❌ Error: {error_msg}\n\n🔍 Available model methods:\n{', '.join(available_methods[:20])}" # ========================= # Gradio UI # ========================= with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🌓 Moondream2 Image Understanding") gr.Markdown( "Upload an image and ask questions about it. ⚠️ CPU processing may take 20-40 seconds." ) with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="📸 Upload Image") text_input = gr.Textbox( label="❓ Your Question", placeholder="What do you see in this image?", value="Describe this image in detail.", lines=2 ) btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg") with gr.Column(): output = gr.Textbox( label="💬 Answer", lines=10, placeholder="The AI's response will appear here..." ) gr.Markdown("### 💡 Example Questions:") gr.Examples( examples=[ ["Describe what you see in this image."], ["What objects are in this image?"], ["What is the main subject?"], ["What colors are most prominent?"], ["Is this indoors or outdoors?"], ["How many people are in the image?"] ], inputs=text_input, label="Click to use these questions" ) btn.click( understand_image, inputs=[image_input, text_input], outputs=output ) text_input.submit( understand_image, inputs=[image_input, text_input], outputs=output ) if __name__ == "__main__": demo.launch()