Spaces:

deepguess
/

Isobar-1-Demo

Running on Zero

App Files Files Community

deepguess commited on 17 days ago

Commit

2788a1b

verified ·

1 Parent(s): 77e6097

Run Isobar-1 directly on ZeroGPU

Browse files

Files changed (4) hide show

README.md +4 -21
__pycache__/app.cpython-313.pyc +0 -0
app.py +113 -49
requirements.txt +5 -1

README.md CHANGED Viewed

@@ -10,27 +10,10 @@ pinned: false
 # Isobar-1 Demo
-This Space is a lightweight frontend for [`deepguess/Isobar-1`](https://huggingface.co/deepguess/Isobar-1).
-It is designed to connect to an OpenAI-compatible backend that serves the model. That is the practical deployment path for a 27B multimodal model. Running `Isobar-1` directly inside a default CPU Space is not realistic.
-## Required Space variables / secrets
-Set these in the Space settings before expecting inference to work:
-- `OPENAI_BASE_URL`
-  - Base URL for the backend, for example `https://your-host.example.com/v1`
-- `OPENAI_MODEL`
-  - Model id exposed by that backend
-- `OPENAI_API_KEY`
-  - Optional if the backend requires auth
-- `OPENAI_TIMEOUT_SECONDS`
-  - Optional, defaults to `180`
 ## Notes
-- This Space is a UI layer only.
-- The model itself lives in the Hub model repo:
-  - `deepguess/Isobar-1`
-- A future dedicated agent model can be published separately as:
-  - `deepguess/Isobar-1-Agent`

 # Isobar-1 Demo
+This Space runs [`deepguess/Isobar-1`](https://huggingface.co/deepguess/Isobar-1) directly on Hugging Face ZeroGPU.
 ## Notes
+- First load may take a while because the model has to be initialized.
+- ZeroGPU queueing and quota rules apply.
+- This Space is intended for interactive testing, not high-throughput serving.

__pycache__/app.cpython-313.pyc ADDED Viewed

Binary file (9.47 kB). View file

app.py CHANGED Viewed

@@ -1,16 +1,19 @@
 from __future__ import annotations
-import base64
-import io
-import os
 from typing import Final
 import gradio as gr
-from openai import OpenAI
 from PIL import Image
 TITLE: Final[str] = "Isobar-1 Demo"
 DEFAULT_SYSTEM_PROMPT: Final[str] = (
     "You are Isobar-1, an expert meteorologist. Answer clearly, concisely, and stay grounded in the image."
 )
@@ -44,43 +47,59 @@ SYSTEM_PROMPT_PRESETS: Final[dict[str, str]] = {
 }
-def get_env(name: str, default: str = "") -> str:
-    value = os.getenv(name, default)
-    return value.strip()
-API_BASE_URL = get_env("OPENAI_BASE_URL")
-API_KEY = get_env("OPENAI_API_KEY", "EMPTY")
-MODEL_ID = get_env("OPENAI_MODEL", "deepguess/Isobar-1")
-REQUEST_TIMEOUT = float(get_env("OPENAI_TIMEOUT_SECONDS", "180"))
-def build_client() -> OpenAI | None:
-    if not API_BASE_URL:
-        return None
-    return OpenAI(api_key=API_KEY, base_url=API_BASE_URL, timeout=REQUEST_TIMEOUT)
-CLIENT = build_client()
-def config_status() -> str:
-    if CLIENT is None:
-        return (
-            "Space is online, but inference is not configured yet.\n\n"
-            "Set `OPENAI_BASE_URL` and `OPENAI_MODEL` in the Space secrets/variables "
-            "to connect this UI to an OpenAI-compatible backend serving Isobar-1."
         )
-    return f"Connected to backend `{API_BASE_URL}` with model `{MODEL_ID}`."
-def image_to_data_url(image: Image.Image) -> str:
-    buffer = io.BytesIO()
-    image.convert("RGB").save(buffer, format="PNG")
-    encoded = base64.b64encode(buffer.getvalue()).decode("utf-8")
-    return f"data:image/png;base64,{encoded}"
 def run_inference(
     image: Image.Image | None,
     question: str,
@@ -88,41 +107,83 @@ def run_inference(
     max_tokens: int,
     temperature: float,
 ) -> str:
-    if CLIENT is None:
-        return config_status()
     if image is None:
         return "Upload an image first."
     question = question.strip()
     if not question:
         return "Enter a question."
     messages = []
-    if system_prompt.strip():
-        messages.append({"role": "system", "content": system_prompt.strip()})
     messages.append(
         {
             "role": "user",
             "content": [
-                {"type": "text", "text": question},
-                {"type": "image_url", "image_url": {"url": image_to_data_url(image)}},
             ],
         }
     )
-    try:
-        response = CLIENT.chat.completions.create(
-            model=MODEL_ID,
-            messages=messages,
-            max_tokens=int(max_tokens),
-            temperature=float(temperature),
-        )
-        return response.choices[0].message.content or ""
-    except Exception as exc:  # pragma: no cover - UI path
-        return f"Backend request failed: {exc}"
-def apply_preset(preset_name: str) -> str:
-    return SYSTEM_PROMPT_PRESETS.get(preset_name, DEFAULT_SYSTEM_PROMPT)
 with gr.Blocks(title=TITLE) as demo:
@@ -132,7 +193,10 @@ with gr.Blocks(title=TITLE) as demo:
         Weather image analysis demo for radar, sounding, satellite, and forecast graphics.
         """
     )
-    gr.Markdown(config_status())
     with gr.Row():
         with gr.Column(scale=1):
@@ -168,4 +232,4 @@ with gr.Blocks(title=TITLE) as demo:
 if __name__ == "__main__":
-    demo.queue(default_concurrency_limit=2).launch()

 from __future__ import annotations
+import inspect
+import threading
 from typing import Final
 import gradio as gr
+import spaces
+import torch
 from PIL import Image
+from qwen_vl_utils import process_vision_info
+from transformers import AutoModelForImageTextToText, AutoProcessor
 TITLE: Final[str] = "Isobar-1 Demo"
+MODEL_ID: Final[str] = "deepguess/Isobar-1"
 DEFAULT_SYSTEM_PROMPT: Final[str] = (
     "You are Isobar-1, an expert meteorologist. Answer clearly, concisely, and stay grounded in the image."
 )
 }
+_MODEL = None
+_PROCESSOR = None
+_LOAD_LOCK = threading.Lock()
+def apply_preset(preset_name: str) -> str:
+    return SYSTEM_PROMPT_PRESETS.get(preset_name, DEFAULT_SYSTEM_PROMPT)
+def strip_reasoning_output(text: str) -> str:
+    if not text:
+        return text
+    if "</think>" in text:
+        text = text.split("</think>", 1)[1]
+    text = text.replace("<think>", "").replace("</think>", "").strip()
+    return text
+def ensure_model_loaded():
+    global _MODEL, _PROCESSOR
+    if _MODEL is not None and _PROCESSOR is not None:
+        return _MODEL, _PROCESSOR
+    with _LOAD_LOCK:
+        if _MODEL is not None and _PROCESSOR is not None:
+            return _MODEL, _PROCESSOR
+        processor = AutoProcessor.from_pretrained(
+            MODEL_ID,
+            trust_remote_code=True,
+            min_pixels=256 * 28 * 28,
+            max_pixels=1024 * 28 * 28,
         )
+        if processor.tokenizer.pad_token is None:
+            processor.tokenizer.pad_token = processor.tokenizer.eos_token
+        model = AutoModelForImageTextToText.from_pretrained(
+            MODEL_ID,
+            trust_remote_code=True,
+            torch_dtype=torch.bfloat16,
+            device_map="auto",
+            attn_implementation="sdpa",
+        )
+        model.eval()
+        _MODEL = model
+        _PROCESSOR = processor
+    return _MODEL, _PROCESSOR
+@spaces.GPU(duration=240)
+@torch.inference_mode()
 def run_inference(
     image: Image.Image | None,
     question: str,
     max_tokens: int,
     temperature: float,
 ) -> str:
     if image is None:
         return "Upload an image first."
     question = question.strip()
     if not question:
         return "Enter a question."
+    model, processor = ensure_model_loaded()
     messages = []
+    system_prompt = system_prompt.strip()
+    if system_prompt:
+        messages.append(
+            {
+                "role": "system",
+                "content": [{"type": "text", "text": system_prompt}],
+            }
+        )
     messages.append(
         {
             "role": "user",
             "content": [
+                {"type": "text", "text": f"/no_think\n{question}"},
+                {"type": "image", "image": image.convert("RGB")},
             ],
         }
     )
+    apply_kwargs = {
+        "tokenize": False,
+        "add_generation_prompt": True,
+    }
+    if "chat_template_kwargs" in inspect.signature(processor.apply_chat_template).parameters:
+        apply_kwargs["chat_template_kwargs"] = {"enable_thinking": False}
+    try:
+        chat_text = processor.apply_chat_template(messages, **apply_kwargs)
+    except TypeError:
+        chat_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    image_inputs, video_inputs, video_kwargs = process_vision_info(messages, return_video_kwargs=True)
+    processor_kwargs = {
+        "text": [chat_text],
+        "images": image_inputs,
+        "return_tensors": "pt",
+        "padding": False,
+    }
+    if video_inputs is not None:
+        processor_kwargs["videos"] = video_inputs
+    processor_kwargs.update(video_kwargs)
+    inputs = processor(**processor_kwargs)
+    target_device = next(model.parameters()).device
+    inputs = {
+        key: value.to(target_device) if hasattr(value, "to") else value
+        for key, value in inputs.items()
+    }
+    do_sample = temperature > 0
+    generation_kwargs = {
+        "max_new_tokens": int(max_tokens),
+        "do_sample": do_sample,
+        "use_cache": True,
+    }
+    if do_sample:
+        generation_kwargs["temperature"] = float(temperature)
+        generation_kwargs["top_p"] = 0.9
+    output_ids = model.generate(**inputs, **generation_kwargs)
+    trimmed_ids = output_ids[:, inputs["input_ids"].shape[1] :]
+    text = processor.batch_decode(
+        trimmed_ids,
+        skip_special_tokens=True,
+        clean_up_tokenization_spaces=False,
+    )[0].strip()
+    return strip_reasoning_output(text)
 with gr.Blocks(title=TITLE) as demo:
         Weather image analysis demo for radar, sounding, satellite, and forecast graphics.
         """
     )
+    gr.Markdown(
+        "This Space runs `deepguess/Isobar-1` directly on Hugging Face ZeroGPU. "
+        "The first request may take longer while the model is loaded."
+    )
     with gr.Row():
         with gr.Column(scale=1):
 if __name__ == "__main__":
+    demo.queue(default_concurrency_limit=1).launch()

requirements.txt CHANGED Viewed

@@ -1,3 +1,7 @@
 gradio>=5.0.0
-openai>=1.0.0
 Pillow>=10.0.0

+accelerate>=1.0.0
 gradio>=5.0.0
 Pillow>=10.0.0
+qwen-vl-utils>=0.0.8
+spaces>=0.34.0
+torch>=2.6.0
+transformers>=4.57.0