Spaces:
Sleeping
Sleeping
File size: 3,626 Bytes
9fe03bb e0ade18 c7af37d 361522e e0ade18 9fe03bb e0ade18 13cece1 b812f5d e0ade18 361522e e0ade18 5bf0cd8 e0ade18 0712e7e e0ade18 361522e c7af37d 13cece1 361522e 9fe03bb 0712e7e c7af37d 7e75cb8 5bf0cd8 7e75cb8 13cece1 0712e7e 361522e e0ade18 0712e7e e0ade18 361522e 0712e7e c7af37d 0712e7e c7af37d 0712e7e 5bf0cd8 0712e7e 5bf0cd8 0712e7e c7af37d 0712e7e e0ade18 9fe03bb e0ade18 5bf0cd8 c7af37d 9fe03bb 0712e7e 9fe03bb c7af37d 9fe03bb c7af37d 0712e7e c7af37d 0712e7e c7af37d 5bf0cd8 c7af37d 5bf0cd8 0712e7e 5bf0cd8 c7af37d 0712e7e c7af37d 0712e7e c7af37d 5bf0cd8 0712e7e c7af37d 9fe03bb e0ade18 9fe03bb 5bf0cd8 361522e 0712e7e ddb86c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import gradio as gr
import torch
from PIL import Image
from transformers import AutoModelForCausalLM, AutoTokenizer
# =========================
# Model config
# =========================
MODEL_ID = "vikhyatk/moondream2"
REVISION = None
DEVICE = "cpu"
# =========================
# Load model
# =========================
print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(
MODEL_ID,
revision=REVISION,
trust_remote_code=True
)
print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
revision=REVISION,
trust_remote_code=True,
torch_dtype=torch.float32,
low_cpu_mem_usage=True
).to(DEVICE)
model.eval()
print("Model loaded successfully!")
# =========================
# Inference function
# =========================
def understand_image(image, prompt):
if image is None:
return "โ Please upload an image."
if not prompt or prompt.strip() == "":
return "โ Please enter a question."
try:
image = image.convert("RGB")
print(f"Processing question: {prompt}")
# Moondream2ใฎๆญฃใใAPI
with torch.no_grad():
# ็ปๅใใจใณใณใผใ
image_embeds = model.encode_image(image)
# ่ณชๅใซๅ็ญ
answer = model.answer_question(
image_embeds=image_embeds,
question=prompt,
tokenizer=tokenizer
)
print(f"Answer generated: {answer}")
return answer
except Exception as e:
error_msg = str(e)
print(f"Error occurred: {error_msg}")
# ใใใใฐๆ
ๅ ฑใ่ฟฝๅ
available_methods = [method for method in dir(model) if not method.startswith('_')]
return f"โ Error: {error_msg}\n\n๐ Available model methods:\n{', '.join(available_methods[:20])}"
# =========================
# Gradio UI
# =========================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# ๐ Moondream2 Image Understanding")
gr.Markdown(
"Upload an image and ask questions about it. โ ๏ธ CPU processing may take 20-40 seconds."
)
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil", label="๐ธ Upload Image")
text_input = gr.Textbox(
label="โ Your Question",
placeholder="What do you see in this image?",
value="Describe this image in detail.",
lines=2
)
btn = gr.Button("๐ Analyze Image", variant="primary", size="lg")
with gr.Column():
output = gr.Textbox(
label="๐ฌ Answer",
lines=10,
placeholder="The AI's response will appear here..."
)
gr.Markdown("### ๐ก Example Questions:")
gr.Examples(
examples=[
["Describe what you see in this image."],
["What objects are in this image?"],
["What is the main subject?"],
["What colors are most prominent?"],
["Is this indoors or outdoors?"],
["How many people are in the image?"]
],
inputs=text_input,
label="Click to use these questions"
)
btn.click(
understand_image,
inputs=[image_input, text_input],
outputs=output
)
text_input.submit(
understand_image,
inputs=[image_input, text_input],
outputs=output
)
if __name__ == "__main__":
demo.launch()
|