stpete2 commited on
Commit
e0ade18
·
verified ·
1 Parent(s): 9fe03bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -42
app.py CHANGED
@@ -1,81 +1,74 @@
1
- import torch
2
  import gradio as gr
3
- from transformers import AutoModelForVision2Seq, AutoProcessor
 
 
4
 
5
- # ===============================
6
  # Model config
7
- # ===============================
8
  MODEL_ID = "vikhyatk/moondream2"
 
9
 
10
- device = "cpu"
11
-
12
- # Processor & Model
13
- processor = AutoProcessor.from_pretrained(
14
  MODEL_ID,
15
  trust_remote_code=True
16
  )
17
 
18
- model = AutoModelForVision2Seq.from_pretrained(
19
- MODEL_ID,
20
- trust_remote_code=True,
21
- torch_dtype=torch.float32, # CPU安全
22
- ).to(device)
 
23
 
24
  model.eval()
25
 
26
- # ===============================
27
  # Inference function
28
- # ===============================
29
- def infer(image, prompt):
30
  if image is None:
31
  return "Please upload an image."
32
 
33
- if prompt is None or prompt.strip() == "":
34
- prompt = "Describe this image."
35
 
36
  with torch.no_grad():
37
- answer = model.answer(
38
- image=image,
39
- question=prompt
 
40
  )
41
 
42
  return answer
43
 
44
 
45
- # ===============================
46
  # Gradio UI
47
- # ===============================
48
  with gr.Blocks() as demo:
49
- gr.Markdown("# 🖼️ Vision Language Demo (moondream2 · CPU)")
50
  gr.Markdown(
51
  "⚠️ Uploaded images are processed in memory and not stored permanently."
52
  )
53
 
54
  with gr.Row():
55
- image = gr.Image(
56
- type="pil",
57
- label="Upload Image"
 
58
  )
59
 
60
- with gr.Column():
61
- textbox = gr.Textbox(
62
- label="Prompt",
63
- value="Describe this image."
64
- )
65
- btn = gr.Button("Run")
66
-
67
- output = gr.Textbox(
68
- label="Output",
69
- lines=6
70
- )
71
 
 
72
  btn.click(
73
- fn=infer,
74
- inputs=[image, textbox],
75
  outputs=output
76
  )
77
 
78
  demo.launch()
79
 
80
-
81
-
 
 
1
  import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import AutoTokenizer
5
 
6
+ # =========================
7
  # Model config
8
+ # =========================
9
  MODEL_ID = "vikhyatk/moondream2"
10
+ DEVICE = "cpu"
11
 
12
+ # =========================
13
+ # Load model (IMPORTANT)
14
+ # =========================
15
+ tokenizer = AutoTokenizer.from_pretrained(
16
  MODEL_ID,
17
  trust_remote_code=True
18
  )
19
 
20
+ model = torch.load(
21
+ torch.hub.load_state_dict_from_url(
22
+ f"https://huggingface.co/{MODEL_ID}/resolve/main/moondream.pt",
23
+ map_location=DEVICE
24
+ )
25
+ )
26
 
27
  model.eval()
28
 
29
+ # =========================
30
  # Inference function
31
+ # =========================
32
+ def understand_image(image, prompt):
33
  if image is None:
34
  return "Please upload an image."
35
 
36
+ image = image.convert("RGB")
 
37
 
38
  with torch.no_grad():
39
+ answer = model.answer_question(
40
+ image,
41
+ prompt,
42
+ tokenizer
43
  )
44
 
45
  return answer
46
 
47
 
48
+ # =========================
49
  # Gradio UI
50
+ # =========================
51
  with gr.Blocks() as demo:
52
+ gr.Markdown("# 🌓 Moondream2 Image Understanding (Free Tier)")
53
  gr.Markdown(
54
  "⚠️ Uploaded images are processed in memory and not stored permanently."
55
  )
56
 
57
  with gr.Row():
58
+ image_input = gr.Image(type="pil", label="Upload Image")
59
+ text_input = gr.Textbox(
60
+ label="Question",
61
+ placeholder="What is in this image?"
62
  )
63
 
64
+ output = gr.Textbox(label="Answer")
 
 
 
 
 
 
 
 
 
 
65
 
66
+ btn = gr.Button("Run")
67
  btn.click(
68
+ understand_image,
69
+ inputs=[image_input, text_input],
70
  outputs=output
71
  )
72
 
73
  demo.launch()
74