Spaces:

KarthiEz
/

gemmasign

Sleeping

App Files Files Community

KarthiEz commited on Oct 13

Commit

d35bac6

verified ·

1 Parent(s): 480e233

Create app.py

Browse files

Files changed (1) hide show

app.py +146 -0

app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# app_gradio_gemma4b_it_bnb4bit.py
+# Gradio UX for unsloth/gemma-3-4b-it-unsloth-bnb-4bit (image-text-to-text)
+from packaging import version
+import transformers
+from transformers import pipeline
+import torch
+import gradio as gr
+from PIL import Image
+# ---------- Governance: ensure pipeline task support ----------
+MIN_TF = "4.46.0"
+if version.parse(transformers.__version__) < version.parse(MIN_TF):
+    raise RuntimeError(
+        f"Transformers >= {MIN_TF} required for 'image-text-to-text'. "
+        f"Found {transformers.__version__}. Upgrade:\n"
+        f"  pip install -U 'transformers>={MIN_TF},<5'"
+    )
+# ---------- Optional dependency gate: torchvision (AutoVideoProcessor) ----------
+HAS_TV = True
+try:
+    import torchvision  # noqa: F401
+except Exception:
+    HAS_TV = False
+MODEL_ID = "unsloth/gemma-3-4b-it-unsloth-bnb-4bit"
+# ---------- Capability checks ----------
+HAS_CUDA = torch.cuda.is_available()
+# Bitsandbytes is required for 4-bit GPU loading; fail-soft if missing.
+HAS_BNB = True
+try:
+    import bitsandbytes as bnb  # noqa: F401
+except Exception:
+    HAS_BNB = False
+PIPE = None
+INIT_ERR = None
+def _build_pipe():
+    global PIPE, INIT_ERR
+    if not HAS_TV:
+        INIT_ERR = "torchvision not found; required by the processor stack."
+        return
+    if not HAS_CUDA or not HAS_BNB:
+        INIT_ERR = (
+            "This 4-bit model requires a CUDA GPU + bitsandbytes to run. "
+            "Please switch to a GPU runtime or use a CPU-compatible model."
+        )
+        return
+    try:
+        PIPE = pipeline(
+            task="image-text-to-text",
+            model=MODEL_ID,
+            device_map="auto",
+            dtype=torch.float16,   # GPU path
+            trust_remote_code=True,
+            use_fast=True,
+            # Explicit 4-bit hint (bnb). Many UnsLoTH repos infer this automatically.
+            model_kwargs={"load_in_4bit": True}
+        )
+    except Exception as e:
+        INIT_ERR = f"Pipeline initialization failed: {e}"
+_build_pipe()
+def _extract_text(obj):
+    """Normalize pipeline outputs to just the assistant text."""
+    if obj is None:
+        return ""
+    if isinstance(obj, str):
+        return obj
+    if isinstance(obj, dict):
+        gen = obj.get("generated_text")
+        if isinstance(gen, str):
+            return gen
+        if isinstance(gen, (list, tuple)) and gen:
+            # Look for assistant turn
+            for turn in reversed(gen):
+                if isinstance(turn, dict) and turn.get("role") == "assistant":
+                    content = turn.get("content")
+                    if isinstance(content, list):
+                        return " ".join(map(str, content))
+                    return str(content) if content is not None else ""
+            return _extract_text(gen[0])
+        if "text" in obj and isinstance(obj["text"], str):
+            return obj["text"]
+        return str(obj)
+    if isinstance(obj, (list, tuple)) and obj:
+        return _extract_text(obj[0])
+    return str(obj)
+def infer(image: Image.Image, question: str) -> str:
+    # Fail-soft guards to avoid exceptions surfacing to UI
+    if INIT_ERR:
+        return f"⚠️ {INIT_ERR}"
+    if image is None:
+        return "Please upload an image."
+    q = (question or "").strip()
+    if not q:
+        return "Please enter a question."
+    # Preferred: chat-style messages (auto-injects image tokens)
+    try:
+        out = PIPE(
+            text=[{
+                "role": "user",
+                "content": [
+                    {"type": "image", "image": image},
+                    {"type": "text", "text": q},
+                ],
+            }],
+            max_new_tokens=128,
+        )
+    except Exception:
+        # Fallback contract (ensure images is a LIST)
+        out = PIPE({"images": [image], "text": q}, max_new_tokens=128)
+    return _extract_text(out).strip() or "(empty response)"
+# ---------- Gradio UX ----------
+with gr.Blocks(title="Gemma 3 4B IT (UnsLoTH 4-bit) — Image Q&A") as demo:
+    gr.Markdown("## 🖼️💬 Gemma-3-4B-IT (UnsLoTH 4-bit) — Image Q&A\n"
+                "- Upload an image, ask a question.\n"
+                "- This Space expects a **CUDA GPU + bitsandbytes** for this 4-bit model.\n")
+    if INIT_ERR:
+        gr.Markdown(f"**Startup status:** `{INIT_ERR}`")
+    with gr.Row():
+        img = gr.Image(type="pil", label="Upload an image")
+        with gr.Column():
+            prompt = gr.Textbox(
+                label="Question",
+                placeholder='e.g., What animal is on the candy?',
+                lines=2,
+            )
+            submit = gr.Button("Ask")
+            output = gr.TextArea(label="Answer", lines=6)
+    submit.click(infer, [img, prompt], output)
+    prompt.submit(infer, [img, prompt], output)
+if __name__ == "__main__":
+    demo.queue().launch(debug=True)