Spaces:

stpete2
/

image_understand

Sleeping

App Files Files Community

stpete2 commited on Dec 14, 2025

Commit

9fe03bb

verified ·

1 Parent(s): 94e2e79

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -23

app.py CHANGED Viewed

@@ -1,46 +1,81 @@
-import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from PIL import Image
 MODEL_ID = "vikhyatk/moondream2"
-# ---- Load model (CPU) ----
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
-    torch_dtype=torch.float32,
-    low_cpu_mem_usage=True,
     trust_remote_code=True
 )
 model.eval()
-# ---- Inference ----
-def caption_image(image, prompt):
     if image is None:
-        return "No image provided."
-    image = image.convert("RGB")
     with torch.no_grad():
-        answer = model.answer_question(
-            image,
-            prompt if prompt else "Describe the image.",
-            tokenizer
         )
     return answer
-# ---- Gradio UI ----
 with gr.Blocks() as demo:
-    gr.Markdown("# Vision Language Demo (CPU)")
-    image = gr.Image(type="pil", label="Upload Image")
-    textbox = gr.Textbox(label="Prompt", value="Describe this image.")
-    output = gr.Textbox(label="Output")
-    btn = gr.Button("Run")
-    btn.click(fn=infer, inputs=[image, textbox], outputs=output)
 demo.launch()

 import torch
+import gradio as gr
+from transformers import AutoModelForVision2Seq, AutoProcessor
+# ===============================
+# Model config
+# ===============================
 MODEL_ID = "vikhyatk/moondream2"
+device = "cpu"
+# Processor & Model
+processor = AutoProcessor.from_pretrained(
     MODEL_ID,
     trust_remote_code=True
 )
+model = AutoModelForVision2Seq.from_pretrained(
+    MODEL_ID,
+    trust_remote_code=True,
+    torch_dtype=torch.float32,  # CPU安全
+).to(device)
 model.eval()
+# ===============================
+# Inference function
+# ===============================
+def infer(image, prompt):
     if image is None:
+        return "Please upload an image."
+    if prompt is None or prompt.strip() == "":
+        prompt = "Describe this image."
     with torch.no_grad():
+        answer = model.answer(
+            image=image,
+            question=prompt
         )
     return answer
+# ===============================
+# Gradio UI
+# ===============================
 with gr.Blocks() as demo:
+    gr.Markdown("# 🖼️ Vision Language Demo (moondream2 · CPU)")
+    gr.Markdown(
+        "⚠️ Uploaded images are processed in memory and not stored permanently."
+    )
+    with gr.Row():
+        image = gr.Image(
+            type="pil",
+            label="Upload Image"
+        )
+        with gr.Column():
+            textbox = gr.Textbox(
+                label="Prompt",
+                value="Describe this image."
+            )
+            btn = gr.Button("Run")
+    output = gr.Textbox(
+        label="Output",
+        lines=6
+    )
+    btn.click(
+        fn=infer,
+        inputs=[image, textbox],
+        outputs=output
+    )
 demo.launch()