Spaces:

Afsha001
/

Image_captioning

Running

Afsha001 commited on 19 days ago

Commit

7fff147

1 Parent(s): 81617eb

replace Qwen2-VL with Florence-2 API for caption generation

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ JINA_KEY = os.environ.get("JINA_KEY", "")
 DEVICE   = "cpu"
 # ── Correct API endpoints ──
-QWEN_VL_URL  = "https://api-inference.huggingface.co/v1/chat/completions"
 QWEN_LM_URL  = "https://api-inference.huggingface.co/v1/chat/completions"
 JINA_URL     = "https://api.jina.ai/v1/rerank"
 HF_HEADERS   = {"Authorization": "Bearer " + HF_TOKEN, "Content-Type": "application/json"}
@@ -254,7 +254,7 @@ with st.sidebar:
     st.title(" Image Caption Fusion")
     st.markdown("---")
     st.markdown("###  Pipeline Steps")
-    st.markdown("1.  Qwen2-VL-2B — Generate 5 captions")
     st.markdown("2.  BLIP ITM — Image-text matching")
     st.markdown("3.  Jina Reranker M0 — Semantic reranking")
     st.markdown("4.  Cosine Similarity — Embedding similarity")
@@ -263,7 +263,7 @@ with st.sidebar:
     st.markdown("7.  Qwen2.5-1.5B — Caption fusion")
     st.markdown("---")
     st.markdown("**Local:** BLIP ITM, DINO")
-    st.markdown("**API:** Qwen2-VL, Jina, Qwen2.5")
 # ── MAIN UI ──
 st.title(" Image Caption Fusion System")
@@ -285,7 +285,7 @@ if uploaded:
             progress = st.progress(0)
             status   = st.empty()
-            status.info(" Step 1/7 — Generating 5 captions with Qwen2-VL...")
             captions = generate_captions_api(image)
             progress.progress(14)
             with st.expander(" 5 Generated Captions"):

 DEVICE   = "cpu"
 # ── Correct API endpoints ──
+FLORENCE_URL = "https://api-inference.huggingface.co/models/microsoft/Florence-2-large"
 QWEN_LM_URL  = "https://api-inference.huggingface.co/v1/chat/completions"
 JINA_URL     = "https://api.jina.ai/v1/rerank"
 HF_HEADERS   = {"Authorization": "Bearer " + HF_TOKEN, "Content-Type": "application/json"}
     st.title(" Image Caption Fusion")
     st.markdown("---")
     st.markdown("###  Pipeline Steps")
+    st.markdown("1.  Florence-2 — Generate 4 captions + BLIP local")
     st.markdown("2.  BLIP ITM — Image-text matching")
     st.markdown("3.  Jina Reranker M0 — Semantic reranking")
     st.markdown("4.  Cosine Similarity — Embedding similarity")
     st.markdown("7.  Qwen2.5-1.5B — Caption fusion")
     st.markdown("---")
     st.markdown("**Local:** BLIP ITM, DINO")
+    st.markdown("**API:** Florence-2, Jina, Qwen2.5")
 # ── MAIN UI ──
 st.title(" Image Caption Fusion System")
             progress = st.progress(0)
             status   = st.empty()
+            status.info(" Step 1/7 — Generating captions with Florence-2 + BLIP...")
             captions = generate_captions_api(image)
             progress.progress(14)
             with st.expander(" 5 Generated Captions"):