Spaces:

reygml
/

vlm_grounding

Sleeping

App Files Files Community

reygml commited on Sep 3, 2025

Commit

b6e71f1

1 Parent(s): be6e716

haha

Browse files

Files changed (1) hide show

ui.py +8 -47

ui.py CHANGED Viewed

@@ -8,7 +8,7 @@ import streamlit as st
 from PIL import Image
 st.set_page_config(page_title="SmolVLM UI", layout="wide")
-st.title("SmolVLM — Streamlit + FastAPI")
 API_BASE = os.getenv("API_BASE", "http://127.0.0.1:8000")
@@ -36,14 +36,14 @@ def show_metrics(metrics: dict):
     cols[3].metric("GPU reserved (MB)", f"{vram:.0f}" if vram is not None else "—")
     st.expander("All metrics").json(info)
-tab_upload, tab_urls, tab_detect = st.tabs(["Upload images", "Image URLs", "Detect & Describe"])
 # -------------------- Tab 1: uploads -> /generate --------------------
 with tab_upload:
-    st.subheader("Upload one or more images")
     files = st.file_uploader("Images", type=["png", "jpg", "jpeg", "webp"], accept_multiple_files=True)
-    prompt = st.text_area("Prompt", "Can you describe the image(s)?", height=80)
-    run = st.button("Generate from uploads", type="primary", use_container_width=True, key="run_files")
     if run:
         if not files or not prompt.strip():
@@ -88,48 +88,9 @@ with tab_upload:
                         except Exception:
                             st.write(e.response.text)
-# -------------------- Tab 2: URLs -> /generate_urls --------------------
-with tab_urls:
-    st.subheader("Use remote image URLs")
-    prompt2 = st.text_area("Prompt", "Can you describe the image(s)?", height=80, key="prompt_urls")
-    urls_raw = st.text_area("One URL per line", "", height=120,
-                            placeholder="https://example.com/a.jpg\nhttps://example.com/b.png")
-    run2 = st.button("Generate from URLs", type="primary", use_container_width=True, key="run_urls")
-    if run2:
-        urls = [u.strip() for u in urls_raw.splitlines() if u.strip()]
-        if not urls or not prompt2.strip():
-            st.error("Please add at least one URL and a prompt.")
-        else:
-            with st.spinner("Calling FastAPI…"):
-                body = {
-                    "prompt": prompt2,
-                    "image_urls": urls,
-                    "max_new_tokens": max_new_tokens,
-                    "temperature": temperature,  # FastAPI model allows null
-                    "top_p": top_p,
-                }
-                try:
-                    r = requests.post(f"{API_BASE}/generate_urls", json=body, timeout=300)
-                    r.raise_for_status()
-                    out = r.json()
-                    st.success("Done!")
-                    st.subheader("Answer")
-                    st.write(out.get("text", ""))
-                    show_metrics(out.get("metrics", {}))
-                    st.caption("Fetched URLs:")
-                    st.code(json.dumps(urls, indent=2))
-                except requests.RequestException as e:
-                    st.error(f"Request failed: {e}")
-                    if hasattr(e, "response") and e.response is not None:
-                        try:
-                            st.code(e.response.text, language="json")
-                        except Exception:
-                            st.write(e.response.text)
-# -------------------- Tab 3: Detect & Describe -> /detect_describe --------------------
 with tab_detect:
-    st.subheader("Grounding DINO → SmolVLM (Detect & Describe)")
     # Upload + labels
     det_image = st.file_uploader("Image", type=["jpg", "jpeg", "png", "webp"], accept_multiple_files=False)
@@ -151,7 +112,7 @@ with tab_detect:
     det_pad = st.slider("crop padding (fraction)", 0.0, 0.2, 0.06, 0.01)
     det_max_new = st.slider("max_new_tokens", 1, 512, 160, 1)
-    run_det = st.button("Detect & Describe", type="primary", use_container_width=True)
     if run_det:
         if not det_bytes or not det_labels.strip():
             st.error("Please provide an image and at least one label.")

 from PIL import Image
 st.set_page_config(page_title="SmolVLM UI", layout="wide")
+st.title("SmolVLM Grounding")
 API_BASE = os.getenv("API_BASE", "http://127.0.0.1:8000")
     cols[3].metric("GPU reserved (MB)", f"{vram:.0f}" if vram is not None else "—")
     st.expander("All metrics").json(info)
+tab_upload, tab_detect = st.tabs(["SmolVLM Detection", "Grounded Detection"])
 # -------------------- Tab 1: uploads -> /generate --------------------
 with tab_upload:
+    st.subheader("Upload an image")
     files = st.file_uploader("Images", type=["png", "jpg", "jpeg", "webp"], accept_multiple_files=True)
+    prompt = st.text_area("Prompt", "Can you describe the image?", height=80)
+    run = st.button("Generate", type="primary", use_container_width=True, key="run_files")
     if run:
         if not files or not prompt.strip():
                         except Exception:
                             st.write(e.response.text)
+# -------------------- Tab 2: Detect & Describe -> /detect_describe --------------------
 with tab_detect:
+    st.subheader("SmolVLM Grounded Detection")
     # Upload + labels
     det_image = st.file_uploader("Image", type=["jpg", "jpeg", "png", "webp"], accept_multiple_files=False)
     det_pad = st.slider("crop padding (fraction)", 0.0, 0.2, 0.06, 0.01)
     det_max_new = st.slider("max_new_tokens", 1, 512, 160, 1)
+    run_det = st.button("Detect", type="primary", use_container_width=True)
     if run_det:
         if not det_bytes or not det_labels.strip():
             st.error("Please provide an image and at least one label.")