stpete2 commited on
Commit
9fe03bb
·
verified ·
1 Parent(s): 94e2e79

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -23
app.py CHANGED
@@ -1,46 +1,81 @@
1
- import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- from PIL import Image
5
 
 
 
 
6
  MODEL_ID = "vikhyatk/moondream2"
7
 
8
- # ---- Load model (CPU) ----
9
- tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
- model = AutoModelForCausalLM.from_pretrained(
 
11
  MODEL_ID,
12
- torch_dtype=torch.float32,
13
- low_cpu_mem_usage=True,
14
  trust_remote_code=True
15
  )
 
 
 
 
 
 
 
16
  model.eval()
17
 
18
- # ---- Inference ----
19
- def caption_image(image, prompt):
 
 
20
  if image is None:
21
- return "No image provided."
22
 
23
- image = image.convert("RGB")
 
24
 
25
  with torch.no_grad():
26
- answer = model.answer_question(
27
- image,
28
- prompt if prompt else "Describe the image.",
29
- tokenizer
30
  )
31
 
32
  return answer
33
 
34
- # ---- Gradio UI ----
 
 
 
35
  with gr.Blocks() as demo:
36
- gr.Markdown("# Vision Language Demo (CPU)")
37
- image = gr.Image(type="pil", label="Upload Image")
38
- textbox = gr.Textbox(label="Prompt", value="Describe this image.")
39
- output = gr.Textbox(label="Output")
 
 
 
 
 
 
40
 
41
- btn = gr.Button("Run")
42
- btn.click(fn=infer, inputs=[image, textbox], outputs=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  demo.launch()
45
 
46
 
 
 
 
1
  import torch
2
+ import gradio as gr
3
+ from transformers import AutoModelForVision2Seq, AutoProcessor
4
 
5
+ # ===============================
6
+ # Model config
7
+ # ===============================
8
  MODEL_ID = "vikhyatk/moondream2"
9
 
10
+ device = "cpu"
11
+
12
+ # Processor & Model
13
+ processor = AutoProcessor.from_pretrained(
14
  MODEL_ID,
 
 
15
  trust_remote_code=True
16
  )
17
+
18
+ model = AutoModelForVision2Seq.from_pretrained(
19
+ MODEL_ID,
20
+ trust_remote_code=True,
21
+ torch_dtype=torch.float32, # CPU安全
22
+ ).to(device)
23
+
24
  model.eval()
25
 
26
+ # ===============================
27
+ # Inference function
28
+ # ===============================
29
+ def infer(image, prompt):
30
  if image is None:
31
+ return "Please upload an image."
32
 
33
+ if prompt is None or prompt.strip() == "":
34
+ prompt = "Describe this image."
35
 
36
  with torch.no_grad():
37
+ answer = model.answer(
38
+ image=image,
39
+ question=prompt
 
40
  )
41
 
42
  return answer
43
 
44
+
45
+ # ===============================
46
+ # Gradio UI
47
+ # ===============================
48
  with gr.Blocks() as demo:
49
+ gr.Markdown("# 🖼️ Vision Language Demo (moondream2 · CPU)")
50
+ gr.Markdown(
51
+ "⚠️ Uploaded images are processed in memory and not stored permanently."
52
+ )
53
+
54
+ with gr.Row():
55
+ image = gr.Image(
56
+ type="pil",
57
+ label="Upload Image"
58
+ )
59
 
60
+ with gr.Column():
61
+ textbox = gr.Textbox(
62
+ label="Prompt",
63
+ value="Describe this image."
64
+ )
65
+ btn = gr.Button("Run")
66
+
67
+ output = gr.Textbox(
68
+ label="Output",
69
+ lines=6
70
+ )
71
+
72
+ btn.click(
73
+ fn=infer,
74
+ inputs=[image, textbox],
75
+ outputs=output
76
+ )
77
 
78
  demo.launch()
79
 
80
 
81
+