stpete2 commited on
Commit
0712e7e
·
verified ·
1 Parent(s): 3eece5f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -35
app.py CHANGED
@@ -7,18 +7,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
7
  # Model config
8
  # =========================
9
  MODEL_ID = "vikhyatk/moondream2"
10
- REVISION = "2024-08-26"
11
  DEVICE = "cpu"
12
 
13
  # =========================
14
  # Load model
15
  # =========================
 
16
  tokenizer = AutoTokenizer.from_pretrained(
17
  MODEL_ID,
18
  revision=REVISION,
19
  trust_remote_code=True
20
  )
21
 
 
22
  model = AutoModelForCausalLM.from_pretrained(
23
  MODEL_ID,
24
  revision=REVISION,
@@ -28,76 +30,85 @@ model = AutoModelForCausalLM.from_pretrained(
28
  ).to(DEVICE)
29
 
30
  model.eval()
 
31
 
32
  # =========================
33
- # Inference function (修正版)
34
  # =========================
35
  def understand_image(image, prompt):
36
  if image is None:
37
- return "Please upload an image."
38
 
39
  if not prompt or prompt.strip() == "":
40
- return "Please enter a question."
41
 
42
  try:
43
  image = image.convert("RGB")
44
 
45
- # 代替方法1: answer_questionを直接使用
 
 
46
  with torch.no_grad():
47
- enc_image = model.encode_image(image)
48
- answer = model.answer_question(enc_image, prompt, tokenizer)
 
 
 
 
 
 
 
49
 
 
50
  return answer
51
 
52
- except AttributeError as e:
53
- # 代替方法2: 別のAPIを試す
54
- try:
55
- with torch.no_grad():
56
- answer = model(image, prompt)
57
- return answer
58
- except:
59
- return f"Model API Error: {str(e)}\n\nThis model version may not be compatible. Please check the Moondream2 documentation."
60
-
61
  except Exception as e:
62
- return f"Error: {str(e)}"
 
 
 
 
 
 
63
  # =========================
64
  # Gradio UI
65
  # =========================
66
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
67
  gr.Markdown("# 🌓 Moondream2 Image Understanding")
68
  gr.Markdown(
69
- "Upload an image and ask questions about it. Processing runs on CPU and may take 10-30 seconds."
70
  )
71
 
72
  with gr.Row():
73
  with gr.Column():
74
- image_input = gr.Image(type="pil", label="Upload Image")
75
  text_input = gr.Textbox(
76
- label="Question",
77
- placeholder="What is in this image?",
78
- value="Describe this image."
 
79
  )
80
  btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")
81
 
82
  with gr.Column():
83
  output = gr.Textbox(
84
- label="Answer",
85
- lines=8,
86
- placeholder="The answer will appear here..."
87
  )
88
 
89
- # Examples
90
- gr.Markdown("### Example Questions:")
91
  gr.Examples(
92
  examples=[
93
- ["Describe this image in detail."],
94
- ["What objects are visible in this image?"],
95
- ["What colors are prominent in this image?"],
96
- ["What is the main subject of this image?"],
97
- ["Are there any people in this image?"]
 
98
  ],
99
  inputs=text_input,
100
- label="Click to use"
101
  )
102
 
103
  btn.click(
@@ -106,11 +117,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
106
  outputs=output
107
  )
108
 
109
- # Enter keyでも実行できるように
110
  text_input.submit(
111
  understand_image,
112
  inputs=[image_input, text_input],
113
  outputs=output
114
  )
115
 
116
- demo.launch()
 
 
7
  # Model config
8
  # =========================
9
  MODEL_ID = "vikhyatk/moondream2"
10
+ REVISION = "2025-01-09" # より新しいリビジョン
11
  DEVICE = "cpu"
12
 
13
  # =========================
14
  # Load model
15
  # =========================
16
+ print("Loading tokenizer...")
17
  tokenizer = AutoTokenizer.from_pretrained(
18
  MODEL_ID,
19
  revision=REVISION,
20
  trust_remote_code=True
21
  )
22
 
23
+ print("Loading model...")
24
  model = AutoModelForCausalLM.from_pretrained(
25
  MODEL_ID,
26
  revision=REVISION,
 
30
  ).to(DEVICE)
31
 
32
  model.eval()
33
+ print("Model loaded successfully!")
34
 
35
  # =========================
36
+ # Inference function
37
  # =========================
38
  def understand_image(image, prompt):
39
  if image is None:
40
+ return "Please upload an image."
41
 
42
  if not prompt or prompt.strip() == "":
43
+ return "Please enter a question."
44
 
45
  try:
46
  image = image.convert("RGB")
47
 
48
+ print(f"Processing question: {prompt}")
49
+
50
+ # Moondream2の正しいAPI
51
  with torch.no_grad():
52
+ # 画像をエンコード
53
+ image_embeds = model.encode_image(image)
54
+
55
+ # 質問に回答
56
+ answer = model.answer_question(
57
+ image_embeds=image_embeds,
58
+ question=prompt,
59
+ tokenizer=tokenizer
60
+ )
61
 
62
+ print(f"Answer generated: {answer}")
63
  return answer
64
 
 
 
 
 
 
 
 
 
 
65
  except Exception as e:
66
+ error_msg = str(e)
67
+ print(f"Error occurred: {error_msg}")
68
+
69
+ # デバッグ情報を追加
70
+ available_methods = [method for method in dir(model) if not method.startswith('_')]
71
+ return f"❌ Error: {error_msg}\n\n🔍 Available model methods:\n{', '.join(available_methods[:20])}"
72
+
73
  # =========================
74
  # Gradio UI
75
  # =========================
76
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
  gr.Markdown("# 🌓 Moondream2 Image Understanding")
78
  gr.Markdown(
79
+ "Upload an image and ask questions about it. ⚠️ CPU processing may take 20-40 seconds."
80
  )
81
 
82
  with gr.Row():
83
  with gr.Column():
84
+ image_input = gr.Image(type="pil", label="📸 Upload Image")
85
  text_input = gr.Textbox(
86
+ label="❓ Your Question",
87
+ placeholder="What do you see in this image?",
88
+ value="Describe this image in detail.",
89
+ lines=2
90
  )
91
  btn = gr.Button("🔍 Analyze Image", variant="primary", size="lg")
92
 
93
  with gr.Column():
94
  output = gr.Textbox(
95
+ label="💬 Answer",
96
+ lines=10,
97
+ placeholder="The AI's response will appear here..."
98
  )
99
 
100
+ gr.Markdown("### 💡 Example Questions:")
 
101
  gr.Examples(
102
  examples=[
103
+ ["Describe what you see in this image."],
104
+ ["What objects are in this image?"],
105
+ ["What is the main subject?"],
106
+ ["What colors are most prominent?"],
107
+ ["Is this indoors or outdoors?"],
108
+ ["How many people are in the image?"]
109
  ],
110
  inputs=text_input,
111
+ label="Click to use these questions"
112
  )
113
 
114
  btn.click(
 
117
  outputs=output
118
  )
119
 
 
120
  text_input.submit(
121
  understand_image,
122
  inputs=[image_input, text_input],
123
  outputs=output
124
  )
125
 
126
+ if __name__ == "__main__":
127
+ demo.launch()