stpete2 commited on
Commit
5bf0cd8
·
verified ·
1 Parent(s): 7e75cb8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -19
app.py CHANGED
@@ -11,7 +11,7 @@ REVISION = "2024-08-26"
11
  DEVICE = "cpu"
12
 
13
  # =========================
14
- # Load model (FIXED - device_mapを削除)
15
  # =========================
16
  tokenizer = AutoTokenizer.from_pretrained(
17
  MODEL_ID,
@@ -24,13 +24,13 @@ model = AutoModelForCausalLM.from_pretrained(
24
  revision=REVISION,
25
  trust_remote_code=True,
26
  torch_dtype=torch.float32,
27
- low_cpu_mem_usage=True # device_mapの代わりにこれを使用
28
  ).to(DEVICE)
29
 
30
  model.eval()
31
 
32
  # =========================
33
- # Inference function
34
  # =========================
35
  def understand_image(image, prompt):
36
  if image is None:
@@ -40,27 +40,26 @@ def understand_image(image, prompt):
40
  return "Please enter a question."
41
 
42
  try:
 
43
  image = image.convert("RGB")
44
 
45
- # Moondream2の推論
46
- enc_image = model.encode_image(image)
47
- answer = model.answer_question(
48
- enc_image,
49
- prompt,
50
- tokenizer
51
- )
52
  return answer
53
 
54
  except Exception as e:
55
- return f"Error: {str(e)}"
56
 
57
  # =========================
58
  # Gradio UI
59
  # =========================
60
- with gr.Blocks() as demo:
61
  gr.Markdown("# 🌓 Moondream2 Image Understanding")
62
  gr.Markdown(
63
- "⚠️ This space runs on CPU. Processing may take a few seconds."
64
  )
65
 
66
  with gr.Row():
@@ -71,19 +70,27 @@ with gr.Blocks() as demo:
71
  placeholder="What is in this image?",
72
  value="Describe this image."
73
  )
74
- btn = gr.Button("Run", variant="primary")
75
 
76
  with gr.Column():
77
- output = gr.Textbox(label="Answer", lines=5)
 
 
 
 
78
 
79
  # Examples
 
80
  gr.Examples(
81
  examples=[
82
- ["What objects are in this image?"],
83
- ["Describe the scene in detail."],
84
- ["What colors do you see?"]
 
 
85
  ],
86
- inputs=text_input
 
87
  )
88
 
89
  btn.click(
@@ -91,5 +98,12 @@ with gr.Blocks() as demo:
91
  inputs=[image_input, text_input],
92
  outputs=output
93
  )
 
 
 
 
 
 
 
94
 
95
  demo.launch()
 
11
  DEVICE = "cpu"
12
 
13
  # =========================
14
+ # Load model
15
  # =========================
16
  tokenizer = AutoTokenizer.from_pretrained(
17
  MODEL_ID,
 
24
  revision=REVISION,
25
  trust_remote_code=True,
26
  torch_dtype=torch.float32,
27
+ low_cpu_mem_usage=True
28
  ).to(DEVICE)
29
 
30
  model.eval()
31
 
32
  # =========================
33
+ # Inference function (修正版)
34
  # =========================
35
  def understand_image(image, prompt):
36
  if image is None:
 
40
  return "Please enter a question."
41
 
42
  try:
43
+ # 画像をRGBに変換
44
  image = image.convert("RGB")
45
 
46
+ # Moondream2の正しい使用方法
47
+ # encode_imageではなく、直接queryメソッドを使用
48
+ with torch.no_grad():
49
+ answer = model.query(image, prompt, tokenizer)
50
+
 
 
51
  return answer
52
 
53
  except Exception as e:
54
+ return f"Error: {str(e)}\n\nPlease try a different question or image."
55
 
56
  # =========================
57
  # Gradio UI
58
  # =========================
59
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
60
  gr.Markdown("# 🌓 Moondream2 Image Understanding")
61
  gr.Markdown(
62
+ "Upload an image and ask questions about it. Processing runs on CPU and may take 10-30 seconds."
63
  )
64
 
65
  with gr.Row():
 
70
  placeholder="What is in this image?",
71
  value="Describe this image."
72
  )
73
+ btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")
74
 
75
  with gr.Column():
76
+ output = gr.Textbox(
77
+ label="Answer",
78
+ lines=8,
79
+ placeholder="The answer will appear here..."
80
+ )
81
 
82
  # Examples
83
+ gr.Markdown("### Example Questions:")
84
  gr.Examples(
85
  examples=[
86
+ ["Describe this image in detail."],
87
+ ["What objects are visible in this image?"],
88
+ ["What colors are prominent in this image?"],
89
+ ["What is the main subject of this image?"],
90
+ ["Are there any people in this image?"]
91
  ],
92
+ inputs=text_input,
93
+ label="Click to use"
94
  )
95
 
96
  btn.click(
 
98
  inputs=[image_input, text_input],
99
  outputs=output
100
  )
101
+
102
+ # Enter keyでも実行できるように
103
+ text_input.submit(
104
+ understand_image,
105
+ inputs=[image_input, text_input],
106
+ outputs=output
107
+ )
108
 
109
  demo.launch()