deepguess commited on
Commit
2788a1b
·
verified ·
1 Parent(s): 77e6097

Run Isobar-1 directly on ZeroGPU

Browse files
Files changed (4) hide show
  1. README.md +4 -21
  2. __pycache__/app.cpython-313.pyc +0 -0
  3. app.py +113 -49
  4. requirements.txt +5 -1
README.md CHANGED
@@ -10,27 +10,10 @@ pinned: false
10
 
11
  # Isobar-1 Demo
12
 
13
- This Space is a lightweight frontend for [`deepguess/Isobar-1`](https://huggingface.co/deepguess/Isobar-1).
14
-
15
- It is designed to connect to an OpenAI-compatible backend that serves the model. That is the practical deployment path for a 27B multimodal model. Running `Isobar-1` directly inside a default CPU Space is not realistic.
16
-
17
- ## Required Space variables / secrets
18
-
19
- Set these in the Space settings before expecting inference to work:
20
-
21
- - `OPENAI_BASE_URL`
22
- - Base URL for the backend, for example `https://your-host.example.com/v1`
23
- - `OPENAI_MODEL`
24
- - Model id exposed by that backend
25
- - `OPENAI_API_KEY`
26
- - Optional if the backend requires auth
27
- - `OPENAI_TIMEOUT_SECONDS`
28
- - Optional, defaults to `180`
29
 
30
  ## Notes
31
 
32
- - This Space is a UI layer only.
33
- - The model itself lives in the Hub model repo:
34
- - `deepguess/Isobar-1`
35
- - A future dedicated agent model can be published separately as:
36
- - `deepguess/Isobar-1-Agent`
 
10
 
11
  # Isobar-1 Demo
12
 
13
+ This Space runs [`deepguess/Isobar-1`](https://huggingface.co/deepguess/Isobar-1) directly on Hugging Face ZeroGPU.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  ## Notes
16
 
17
+ - First load may take a while because the model has to be initialized.
18
+ - ZeroGPU queueing and quota rules apply.
19
+ - This Space is intended for interactive testing, not high-throughput serving.
 
 
__pycache__/app.cpython-313.pyc ADDED
Binary file (9.47 kB). View file
 
app.py CHANGED
@@ -1,16 +1,19 @@
1
  from __future__ import annotations
2
 
3
- import base64
4
- import io
5
- import os
6
  from typing import Final
7
 
8
  import gradio as gr
9
- from openai import OpenAI
 
10
  from PIL import Image
 
 
11
 
12
 
13
  TITLE: Final[str] = "Isobar-1 Demo"
 
14
  DEFAULT_SYSTEM_PROMPT: Final[str] = (
15
  "You are Isobar-1, an expert meteorologist. Answer clearly, concisely, and stay grounded in the image."
16
  )
@@ -44,43 +47,59 @@ SYSTEM_PROMPT_PRESETS: Final[dict[str, str]] = {
44
  }
45
 
46
 
47
- def get_env(name: str, default: str = "") -> str:
48
- value = os.getenv(name, default)
49
- return value.strip()
50
 
51
 
52
- API_BASE_URL = get_env("OPENAI_BASE_URL")
53
- API_KEY = get_env("OPENAI_API_KEY", "EMPTY")
54
- MODEL_ID = get_env("OPENAI_MODEL", "deepguess/Isobar-1")
55
- REQUEST_TIMEOUT = float(get_env("OPENAI_TIMEOUT_SECONDS", "180"))
56
 
57
 
58
- def build_client() -> OpenAI | None:
59
- if not API_BASE_URL:
60
- return None
61
- return OpenAI(api_key=API_KEY, base_url=API_BASE_URL, timeout=REQUEST_TIMEOUT)
 
 
 
62
 
63
 
64
- CLIENT = build_client()
 
 
 
65
 
 
 
 
66
 
67
- def config_status() -> str:
68
- if CLIENT is None:
69
- return (
70
- "Space is online, but inference is not configured yet.\n\n"
71
- "Set `OPENAI_BASE_URL` and `OPENAI_MODEL` in the Space secrets/variables "
72
- "to connect this UI to an OpenAI-compatible backend serving Isobar-1."
73
  )
74
- return f"Connected to backend `{API_BASE_URL}` with model `{MODEL_ID}`."
 
 
 
 
 
 
 
 
 
 
75
 
 
 
76
 
77
- def image_to_data_url(image: Image.Image) -> str:
78
- buffer = io.BytesIO()
79
- image.convert("RGB").save(buffer, format="PNG")
80
- encoded = base64.b64encode(buffer.getvalue()).decode("utf-8")
81
- return f"data:image/png;base64,{encoded}"
82
 
83
 
 
 
84
  def run_inference(
85
  image: Image.Image | None,
86
  question: str,
@@ -88,41 +107,83 @@ def run_inference(
88
  max_tokens: int,
89
  temperature: float,
90
  ) -> str:
91
- if CLIENT is None:
92
- return config_status()
93
  if image is None:
94
  return "Upload an image first."
 
95
  question = question.strip()
96
  if not question:
97
  return "Enter a question."
98
 
 
 
99
  messages = []
100
- if system_prompt.strip():
101
- messages.append({"role": "system", "content": system_prompt.strip()})
 
 
 
 
 
 
 
102
  messages.append(
103
  {
104
  "role": "user",
105
  "content": [
106
- {"type": "text", "text": question},
107
- {"type": "image_url", "image_url": {"url": image_to_data_url(image)}},
108
  ],
109
  }
110
  )
111
 
112
- try:
113
- response = CLIENT.chat.completions.create(
114
- model=MODEL_ID,
115
- messages=messages,
116
- max_tokens=int(max_tokens),
117
- temperature=float(temperature),
118
- )
119
- return response.choices[0].message.content or ""
120
- except Exception as exc: # pragma: no cover - UI path
121
- return f"Backend request failed: {exc}"
122
-
123
 
124
- def apply_preset(preset_name: str) -> str:
125
- return SYSTEM_PROMPT_PRESETS.get(preset_name, DEFAULT_SYSTEM_PROMPT)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
 
127
 
128
  with gr.Blocks(title=TITLE) as demo:
@@ -132,7 +193,10 @@ with gr.Blocks(title=TITLE) as demo:
132
  Weather image analysis demo for radar, sounding, satellite, and forecast graphics.
133
  """
134
  )
135
- gr.Markdown(config_status())
 
 
 
136
 
137
  with gr.Row():
138
  with gr.Column(scale=1):
@@ -168,4 +232,4 @@ with gr.Blocks(title=TITLE) as demo:
168
 
169
 
170
  if __name__ == "__main__":
171
- demo.queue(default_concurrency_limit=2).launch()
 
1
  from __future__ import annotations
2
 
3
+ import inspect
4
+ import threading
 
5
  from typing import Final
6
 
7
  import gradio as gr
8
+ import spaces
9
+ import torch
10
  from PIL import Image
11
+ from qwen_vl_utils import process_vision_info
12
+ from transformers import AutoModelForImageTextToText, AutoProcessor
13
 
14
 
15
  TITLE: Final[str] = "Isobar-1 Demo"
16
+ MODEL_ID: Final[str] = "deepguess/Isobar-1"
17
  DEFAULT_SYSTEM_PROMPT: Final[str] = (
18
  "You are Isobar-1, an expert meteorologist. Answer clearly, concisely, and stay grounded in the image."
19
  )
 
47
  }
48
 
49
 
50
+ _MODEL = None
51
+ _PROCESSOR = None
52
+ _LOAD_LOCK = threading.Lock()
53
 
54
 
55
+ def apply_preset(preset_name: str) -> str:
56
+ return SYSTEM_PROMPT_PRESETS.get(preset_name, DEFAULT_SYSTEM_PROMPT)
 
 
57
 
58
 
59
+ def strip_reasoning_output(text: str) -> str:
60
+ if not text:
61
+ return text
62
+ if "</think>" in text:
63
+ text = text.split("</think>", 1)[1]
64
+ text = text.replace("<think>", "").replace("</think>", "").strip()
65
+ return text
66
 
67
 
68
+ def ensure_model_loaded():
69
+ global _MODEL, _PROCESSOR
70
+ if _MODEL is not None and _PROCESSOR is not None:
71
+ return _MODEL, _PROCESSOR
72
 
73
+ with _LOAD_LOCK:
74
+ if _MODEL is not None and _PROCESSOR is not None:
75
+ return _MODEL, _PROCESSOR
76
 
77
+ processor = AutoProcessor.from_pretrained(
78
+ MODEL_ID,
79
+ trust_remote_code=True,
80
+ min_pixels=256 * 28 * 28,
81
+ max_pixels=1024 * 28 * 28,
 
82
  )
83
+ if processor.tokenizer.pad_token is None:
84
+ processor.tokenizer.pad_token = processor.tokenizer.eos_token
85
+
86
+ model = AutoModelForImageTextToText.from_pretrained(
87
+ MODEL_ID,
88
+ trust_remote_code=True,
89
+ torch_dtype=torch.bfloat16,
90
+ device_map="auto",
91
+ attn_implementation="sdpa",
92
+ )
93
+ model.eval()
94
 
95
+ _MODEL = model
96
+ _PROCESSOR = processor
97
 
98
+ return _MODEL, _PROCESSOR
 
 
 
 
99
 
100
 
101
+ @spaces.GPU(duration=240)
102
+ @torch.inference_mode()
103
  def run_inference(
104
  image: Image.Image | None,
105
  question: str,
 
107
  max_tokens: int,
108
  temperature: float,
109
  ) -> str:
 
 
110
  if image is None:
111
  return "Upload an image first."
112
+
113
  question = question.strip()
114
  if not question:
115
  return "Enter a question."
116
 
117
+ model, processor = ensure_model_loaded()
118
+
119
  messages = []
120
+ system_prompt = system_prompt.strip()
121
+ if system_prompt:
122
+ messages.append(
123
+ {
124
+ "role": "system",
125
+ "content": [{"type": "text", "text": system_prompt}],
126
+ }
127
+ )
128
+
129
  messages.append(
130
  {
131
  "role": "user",
132
  "content": [
133
+ {"type": "text", "text": f"/no_think\n{question}"},
134
+ {"type": "image", "image": image.convert("RGB")},
135
  ],
136
  }
137
  )
138
 
139
+ apply_kwargs = {
140
+ "tokenize": False,
141
+ "add_generation_prompt": True,
142
+ }
143
+ if "chat_template_kwargs" in inspect.signature(processor.apply_chat_template).parameters:
144
+ apply_kwargs["chat_template_kwargs"] = {"enable_thinking": False}
 
 
 
 
 
145
 
146
+ try:
147
+ chat_text = processor.apply_chat_template(messages, **apply_kwargs)
148
+ except TypeError:
149
+ chat_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
150
+
151
+ image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True)
152
+ processor_kwargs = {
153
+ "text": [chat_text],
154
+ "images": image_inputs,
155
+ "return_tensors": "pt",
156
+ "padding": False,
157
+ }
158
+ if video_inputs is not None:
159
+ processor_kwargs["videos"] = video_inputs
160
+ processor_kwargs.update(video_kwargs)
161
+
162
+ inputs = processor(**processor_kwargs)
163
+ target_device = next(model.parameters()).device
164
+ inputs = {
165
+ key: value.to(target_device) if hasattr(value, "to") else value
166
+ for key, value in inputs.items()
167
+ }
168
+
169
+ do_sample = temperature > 0
170
+ generation_kwargs = {
171
+ "max_new_tokens": int(max_tokens),
172
+ "do_sample": do_sample,
173
+ "use_cache": True,
174
+ }
175
+ if do_sample:
176
+ generation_kwargs["temperature"] = float(temperature)
177
+ generation_kwargs["top_p"] = 0.9
178
+
179
+ output_ids = model.generate(**inputs, **generation_kwargs)
180
+ trimmed_ids = output_ids[:, inputs["input_ids"].shape[1] :]
181
+ text = processor.batch_decode(
182
+ trimmed_ids,
183
+ skip_special_tokens=True,
184
+ clean_up_tokenization_spaces=False,
185
+ )[0].strip()
186
+ return strip_reasoning_output(text)
187
 
188
 
189
  with gr.Blocks(title=TITLE) as demo:
 
193
  Weather image analysis demo for radar, sounding, satellite, and forecast graphics.
194
  """
195
  )
196
+ gr.Markdown(
197
+ "This Space runs `deepguess/Isobar-1` directly on Hugging Face ZeroGPU. "
198
+ "The first request may take longer while the model is loaded."
199
+ )
200
 
201
  with gr.Row():
202
  with gr.Column(scale=1):
 
232
 
233
 
234
  if __name__ == "__main__":
235
+ demo.queue(default_concurrency_limit=1).launch()
requirements.txt CHANGED
@@ -1,3 +1,7 @@
 
1
  gradio>=5.0.0
2
- openai>=1.0.0
3
  Pillow>=10.0.0
 
 
 
 
 
1
+ accelerate>=1.0.0
2
  gradio>=5.0.0
 
3
  Pillow>=10.0.0
4
+ qwen-vl-utils>=0.0.8
5
+ spaces>=0.34.0
6
+ torch>=2.6.0
7
+ transformers>=4.57.0