stpete2 commited on
Commit
13cece1
·
verified ·
1 Parent(s): 6a19471

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -31
app.py CHANGED
@@ -1,48 +1,53 @@
1
  import gradio as gr
2
  import torch
 
3
  from PIL import Image
4
- from transformers import AutoProcessor, AutoModelForVision2Seq
5
 
6
- MODEL_ID = "llava-hf/llava-1.5-7b-hf"
7
 
8
- processor = AutoProcessor.from_pretrained(MODEL_ID)
9
-
10
- model = AutoModelForVision2Seq.from_pretrained(
11
  MODEL_ID,
12
- dtype=torch.float32,
13
- device_map="cpu"
 
14
  )
 
15
 
16
- def image_understand(image, text):
 
17
  if image is None:
18
- return "Please upload an image."
19
 
20
  image = image.convert("RGB")
21
 
22
- prompt = f"USER: <image>\n{text}\nASSISTANT:"
23
-
24
- inputs = processor(
25
- images=image,
26
- text=prompt,
27
- return_tensors="pt"
28
- )
29
-
30
  with torch.no_grad():
31
- output = model.generate(
32
- **inputs,
33
- max_new_tokens=200
 
34
  )
35
 
36
- return processor.decode(output[0], skip_special_tokens=True)
37
-
38
- demo = gr.Interface(
39
- fn=image_understand,
40
- inputs=[
41
- gr.Image(type="pil", label="Image"),
42
- gr.Textbox(label="Question")
43
- ],
44
- outputs=gr.Textbox(label="Answer"),
45
- title="Free Vision LLM Demo (HF Spaces CPU)"
46
- )
 
 
 
 
 
 
 
 
 
47
 
48
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  from PIL import Image
 
5
 
6
+ MODEL_ID = "vikhyatk/moondream2"
7
 
8
+ # ---- Load model (CPU) ----
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
10
+ model = AutoModelForCausalLM.from_pretrained(
11
  MODEL_ID,
12
+ torch_dtype=torch.float32,
13
+ low_cpu_mem_usage=True,
14
+ trust_remote_code=True
15
  )
16
+ model.eval()
17
 
18
+ # ---- Inference ----
19
+ def caption_image(image, prompt):
20
  if image is None:
21
+ return "No image provided."
22
 
23
  image = image.convert("RGB")
24
 
 
 
 
 
 
 
 
 
25
  with torch.no_grad():
26
+ answer = model.answer_question(
27
+ image,
28
+ prompt if prompt else "Describe the image.",
29
+ tokenizer
30
  )
31
 
32
+ return answer
33
+
34
+ # ---- Gradio UI ----
35
+ with gr.Blocks() as demo:
36
+ gr.Markdown("# 🖼️ Vision Chatbot (moondream2, CPU)")
37
+ with gr.Row():
38
+ image_input = gr.Image(type="pil", label="Upload Image")
39
+ with gr.Column():
40
+ text_input = gr.Textbox(
41
+ label="Prompt",
42
+ placeholder="Describe the image / What is happening?"
43
+ )
44
+ output = gr.Textbox(label="Model Output")
45
+
46
+ btn = gr.Button("Run")
47
+ btn.click(
48
+ fn=caption_image,
49
+ inputs=[image_input, text_input],
50
+ outputs=output
51
+ )
52
 
53
  demo.launch()