stpete2's picture
Update app.py
b812f5d verified
import gradio as gr
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
# =========================
# Model config
# =========================
MODEL_ID = "vikhyatk/moondream2"
REVISION = None
DEVICE = "cpu"
# =========================
# Load model
# =========================
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
revision=REVISION,
trust_remote_code=True
)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
revision=REVISION,
trust_remote_code=True,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
).to(DEVICE)
model.eval()
print("Model loaded successfully!")
# =========================
# Inference function
# =========================
def understand_image(image, prompt):
if image is None:
return "โŒ Please upload an image."
if not prompt or prompt.strip() == "":
return "โŒ Please enter a question."
try:
image = image.convert("RGB")
print(f"Processing question: {prompt}")
# Moondream2ใฎๆญฃใ—ใ„API
with torch.no_grad():
# ็”ปๅƒใ‚’ใ‚จใƒณใ‚ณใƒผใƒ‰
image_embeds = model.encode_image(image)
# ่ณชๅ•ใซๅ›ž็ญ”
answer = model.answer_question(
image_embeds=image_embeds,
question=prompt,
tokenizer=tokenizer
)
print(f"Answer generated: {answer}")
return answer
except Exception as e:
error_msg = str(e)
print(f"Error occurred: {error_msg}")
# ใƒ‡ใƒใƒƒใ‚ฐๆƒ…ๅ ฑใ‚’่ฟฝๅŠ 
available_methods = [method for method in dir(model) if not method.startswith('_')]
return f"โŒ Error: {error_msg}\n\n๐Ÿ” Available model methods:\n{', '.join(available_methods[:20])}"
# =========================
# Gradio UI
# =========================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# ๐ŸŒ“ Moondream2 Image Understanding")
gr.Markdown(
"Upload an image and ask questions about it. โš ๏ธ CPU processing may take 20-40 seconds."
)
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="๐Ÿ“ธ Upload Image")
text_input = gr.Textbox(
label="โ“ Your Question",
placeholder="What do you see in this image?",
value="Describe this image in detail.",
lines=2
)
btn = gr.Button("๐Ÿ” Analyze Image", variant="primary", size="lg")
with gr.Column():
output = gr.Textbox(
label="๐Ÿ’ฌ Answer",
lines=10,
placeholder="The AI's response will appear here..."
)
gr.Markdown("### ๐Ÿ’ก Example Questions:")
gr.Examples(
examples=[
["Describe what you see in this image."],
["What objects are in this image?"],
["What is the main subject?"],
["What colors are most prominent?"],
["Is this indoors or outdoors?"],
["How many people are in the image?"]
],
inputs=text_input,
label="Click to use these questions"
)
btn.click(
understand_image,
inputs=[image_input, text_input],
outputs=output
)
text_input.submit(
understand_image,
inputs=[image_input, text_input],
outputs=output
)
if __name__ == "__main__":
demo.launch()