stpete2 commited on
Commit
361522e
·
verified ·
1 Parent(s): a284910

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -1,46 +1,47 @@
1
  import gradio as gr
2
- import base64
3
- import os
4
- from openai import OpenAI
5
-
6
- client = OpenAI(api_key=os.environ["openai_api_key"])
7
-
8
- def image_understand(image, prompt):
9
- # PIL Image → base64
10
- import io
11
- buf = io.BytesIO()
12
- image.save(buf, format="PNG")
13
- img_b64 = base64.b64encode(buf.getvalue()).decode()
14
-
15
- response = client.chat.completions.create(
16
- model="gpt-4o-mini",
17
- messages=[
18
- {
19
- "role": "user",
20
- "content": [
21
- {"type": "text", "text": prompt},
22
- {
23
- "type": "image_url",
24
- "image_url": {
25
- "url": f"data:image/png;base64,{img_b64}"
26
- },
27
- },
28
- ],
29
- }
30
- ],
31
- max_tokens=300,
32
  )
33
- return response.choices[0].message.content
34
 
35
- gr.Interface(
 
 
 
 
 
 
 
 
36
  fn=image_understand,
37
  inputs=[
38
- gr.Image(type="pil", label="Image"),
39
- gr.Textbox(
40
- value="Describe this image objectively.",
41
- label="Prompt"
42
- )
43
  ],
44
- outputs=gr.Textbox(label="Result"),
45
- title="Image Understanding Demo (GPT-4o mini)"
46
- ).launch()
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import AutoProcessor, AutoModelForCausalLM
5
+
6
+ MODEL_ID = "llava-hf/llava-1.5-7b-hf"
7
+
8
+ processor = AutoProcessor.from_pretrained(MODEL_ID)
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ MODEL_ID,
11
+ torch_dtype=torch.float32,
12
+ device_map="cpu"
13
+ )
14
+
15
+ def image_understand(image, text):
16
+ if image is None:
17
+ return "Please upload an image."
18
+
19
+ image = image.convert("RGB")
20
+
21
+ prompt = f"USER: <image>\n{text}\nASSISTANT:"
22
+
23
+ inputs = processor(
24
+ images=image,
25
+ text=prompt,
26
+ return_tensors="pt"
 
 
 
 
 
27
  )
 
28
 
29
+ with torch.no_grad():
30
+ output = model.generate(
31
+ **inputs,
32
+ max_new_tokens=200
33
+ )
34
+
35
+ return processor.decode(output[0], skip_special_tokens=True)
36
+
37
+ demo = gr.Interface(
38
  fn=image_understand,
39
  inputs=[
40
+ gr.Image(type="pil"),
41
+ gr.Textbox(label="Question")
 
 
 
42
  ],
43
+ outputs=gr.Textbox(label="Answer"),
44
+ title="Free Vision LLM Demo (HF Spaces, CPU)"
45
+ )
46
+
47
+ demo.launch()