Spaces:

reygml
/

vlm_grounding

Sleeping

App Files Files Community

reygml commited on Sep 2, 2025

Commit

eee3392

1 Parent(s): a122b63

feat:hehe

Browse files

Files changed (4) hide show

Dockerfile +9 -4
requirements.txt +1 -0
start.sh +29 -0
ui.py +123 -0

Dockerfile CHANGED Viewed

@@ -15,13 +15,18 @@ RUN rm -rf /var/lib/apt/lists/* \
       git git-lfs ca-certificates \
  && rm -rf /var/lib/apt/lists/*
-WORKDIR /app
-RUN pip install --no-deps "https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.0.post2/flash_attn-2.7.0.post2+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl"
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
-COPY --chown=user . /app
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

       git git-lfs ca-certificates \
  && rm -rf /var/lib/apt/lists/*
+RUN apt-get update && apt-get install -y --no-install-recommends tini && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
 RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# Bring over your existing FastAPI + util, plus Streamlit UI and launcher
+COPY util.py app.py ui.py start.sh /app/
+ENV PORT=7860
+EXPOSE 7860
+ENTRYPOINT ["/usr/bin/tini","--"]
+CMD ["bash","/app/start.sh"]

requirements.txt CHANGED Viewed

@@ -18,6 +18,7 @@ icecream
 einops
 Pillow
 gradio
 xformers==0.0.27.post2
 spconv-cu120==2.3.6
 transformers==4.46.3

 einops
 Pillow
 gradio
+streamlit
 xformers==0.0.27.post2
 spconv-cu120==2.3.6
 transformers==4.46.3

start.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# ---- Make all caches writable in Spaces (avoid /.cache) ----
+export HF_CACHE_DIR="${HF_CACHE_DIR:-/tmp/hf-cache}"
+mkdir -p "$HF_CACHE_DIR" /tmp/pycache
+export HOME=/tmp
+export XDG_CACHE_HOME="$HF_CACHE_DIR"
+export HF_HOME="$HF_CACHE_DIR"
+export HUGGINGFACE_HUB_CACHE="$HF_CACHE_DIR"
+export TRANSFORMERS_CACHE="$HF_CACHE_DIR"
+export HF_DATASETS_CACHE="$HF_CACHE_DIR/datasets"
+export TORCH_HOME="$HF_CACHE_DIR"
+export PYTHONPYCACHEPREFIX=/tmp/pycache
+# ---- Disable FlashAttention by default (safe on T4/L4) ----
+export SMOLVLM_ATTN="${SMOLVLM_ATTN:-sdpa}"
+# ---- Start FastAPI (internal) ----
+uvicorn app:app --host 0.0.0.0 --port 8000 --no-server-header --forwarded-allow-ips="*" &
+# ---- Start Streamlit (public) ----
+exec streamlit run ui.py \
+  --server.address=0.0.0.0 \
+  --server.port="${PORT:-7860}" \
+  --server.headless=true \
+  --server.enableCORS=false \
+  --server.enableXsrfProtection=false

ui.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# ui.py
+import os
+import io
+import json
+import requests
+import streamlit as st
+from PIL import Image
+st.set_page_config(page_title="SmolVLM UI", layout="wide")
+st.title("SmolVLM")
+API_BASE = os.getenv("API_BASE", "http://127.0.0.1:8000")
+with st.sidebar:
+    st.subheader("Generation settings")
+    max_new_tokens = st.slider("max_new_tokens", 1, 1024, 300, step=1)
+    temperature_on = st.toggle("Set temperature?", value=False)
+    temperature = st.slider("temperature", 0.0, 2.0, 0.2, step=0.05) if temperature_on else None
+    topp_on = st.toggle("Set top_p?", value=False)
+    top_p = st.slider("top_p", 0.05, 1.0, 0.95, step=0.05) if topp_on else None
+    st.caption("API base: " + API_BASE)
+tabs = st.tabs(["Upload images", "Image URLs"])
+prompt = st.text_area("Prompt", "Can you describe the image(s)?", height=80)
+def show_metrics(metrics: dict):
+    if not metrics:
+        return
+    info = metrics
+    cols = st.columns(4)
+    tt = info.get("timings_ms", {}).get("total")
+    it = info.get("timings_ms", {}).get("inference")
+    tps = info.get("throughput", {}).get("tokens_per_sec_inference")
+    vram = info.get("gpu_memory_mb", {}).get("max_reserved")
+    cols[0].metric("Total (ms)", f"{tt:.0f}" if tt is not None else "—")
+    cols[1].metric("Inference (ms)", f"{it:.0f}" if it is not None else "—")
+    cols[2].metric("Tok/s (infer)", f"{tps:.1f}" if tps is not None else "—")
+    cols[3].metric("GPU reserved (MB)", f"{vram:.0f}" if vram is not None else "—")
+    st.expander("All metrics").json(info)
+with tabs[0]:
+    st.subheader("Upload one or more images")
+    files = st.file_uploader("Images", type=["png", "jpg", "jpeg", "webp"], accept_multiple_files=True)
+    run = st.button("Generate from uploads", type="primary", use_container_width=True, key="run_files")
+    if run:
+        if not files or not prompt.strip():
+            st.error("Please add at least one image and a prompt.")
+        else:
+            with st.spinner("Calling FastAPI…"):
+                data = {
+                    "prompt": prompt,
+                    "max_new_tokens": str(max_new_tokens),  # form fields are strings
+                }
+                if temperature is not None:
+                    data["temperature"] = str(temperature)
+                if top_p is not None:
+                    data["top_p"] = str(top_p)
+                multipart = []
+                previews = []
+                for f in files:
+                    content = f.read()
+                    multipart.append(("images", (f.name, content, f.type or "application/octet-stream")))
+                    try:
+                        previews.append(Image.open(io.BytesIO(content)))
+                    except Exception:
+                        pass
+                try:
+                    r = requests.post(f"{API_BASE}/generate", data=data, files=multipart, timeout=300)
+                    r.raise_for_status()
+                    out = r.json()
+                    st.success("Done!")
+                    if previews:
+                        st.image(previews, caption=[f.name for f in files], use_column_width=True)
+                    st.subheader("Answer")
+                    st.write(out.get("text", ""))
+                    show_metrics(out.get("metrics", {}))
+                except requests.RequestException as e:
+                    st.error(f"Request failed: {e}")
+                    if hasattr(e, "response") and e.response is not None:
+                        try:
+                            st.code(e.response.text, language="json")
+                        except Exception:
+                            st.write(e.response.text)
+with tabs[1]:
+    st.subheader("Use remote image URLs")
+    urls_raw = st.text_area("One URL per line", "", height=120, placeholder="https://example.com/a.jpg\nhttps://example.com/b.png")
+    run2 = st.button("Generate from URLs", type="primary", use_container_width=True, key="run_urls")
+    if run2:
+        urls = [u.strip() for u in urls_raw.splitlines() if u.strip()]
+        if not urls or not prompt.strip():
+            st.error("Please add at least one URL and a prompt.")
+        else:
+            with st.spinner("Calling FastAPI…"):
+                body = {
+                    "prompt": prompt,
+                    "image_urls": urls,
+                    "max_new_tokens": max_new_tokens,
+                    "temperature": temperature,  # FastAPI model allows null
+                    "top_p": top_p,
+                }
+                try:
+                    r = requests.post(f"{API_BASE}/generate_urls", json=body, timeout=300)
+                    r.raise_for_status()
+                    out = r.json()
+                    st.success("Done!")
+                    st.subheader("Answer")
+                    st.write(out.get("text", ""))
+                    show_metrics(out.get("metrics", {}))
+                    st.caption("Fetched URLs:")
+                    st.code(json.dumps(urls, indent=2))
+                except requests.RequestException as e:
+                    st.error(f"Request failed: {e}")
+                    if hasattr(e, "response") and e.response is not None:
+                        try:
+                            st.code(e.response.text, language="json")
+                        except Exception:
+                            st.write(e.response.text)