Afsha001 commited on
Commit
7fff147
Β·
1 Parent(s): 81617eb

replace Qwen2-VL with Florence-2 API for caption generation

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -19,7 +19,7 @@ JINA_KEY = os.environ.get("JINA_KEY", "")
19
  DEVICE = "cpu"
20
 
21
  # ── Correct API endpoints ──
22
- QWEN_VL_URL = "https://api-inference.huggingface.co/v1/chat/completions"
23
  QWEN_LM_URL = "https://api-inference.huggingface.co/v1/chat/completions"
24
  JINA_URL = "https://api.jina.ai/v1/rerank"
25
  HF_HEADERS = {"Authorization": "Bearer " + HF_TOKEN, "Content-Type": "application/json"}
@@ -254,7 +254,7 @@ with st.sidebar:
254
  st.title(" Image Caption Fusion")
255
  st.markdown("---")
256
  st.markdown("### Pipeline Steps")
257
- st.markdown("1. Qwen2-VL-2B β€” Generate 5 captions")
258
  st.markdown("2. BLIP ITM β€” Image-text matching")
259
  st.markdown("3. Jina Reranker M0 β€” Semantic reranking")
260
  st.markdown("4. Cosine Similarity β€” Embedding similarity")
@@ -263,7 +263,7 @@ with st.sidebar:
263
  st.markdown("7. Qwen2.5-1.5B β€” Caption fusion")
264
  st.markdown("---")
265
  st.markdown("**Local:** BLIP ITM, DINO")
266
- st.markdown("**API:** Qwen2-VL, Jina, Qwen2.5")
267
 
268
  # ── MAIN UI ──
269
  st.title(" Image Caption Fusion System")
@@ -285,7 +285,7 @@ if uploaded:
285
  progress = st.progress(0)
286
  status = st.empty()
287
 
288
- status.info(" Step 1/7 β€” Generating 5 captions with Qwen2-VL...")
289
  captions = generate_captions_api(image)
290
  progress.progress(14)
291
  with st.expander(" 5 Generated Captions"):
 
19
  DEVICE = "cpu"
20
 
21
  # ── Correct API endpoints ──
22
+ FLORENCE_URL = "https://api-inference.huggingface.co/models/microsoft/Florence-2-large"
23
  QWEN_LM_URL = "https://api-inference.huggingface.co/v1/chat/completions"
24
  JINA_URL = "https://api.jina.ai/v1/rerank"
25
  HF_HEADERS = {"Authorization": "Bearer " + HF_TOKEN, "Content-Type": "application/json"}
 
254
  st.title(" Image Caption Fusion")
255
  st.markdown("---")
256
  st.markdown("### Pipeline Steps")
257
+ st.markdown("1. Florence-2 β€” Generate 4 captions + BLIP local")
258
  st.markdown("2. BLIP ITM β€” Image-text matching")
259
  st.markdown("3. Jina Reranker M0 β€” Semantic reranking")
260
  st.markdown("4. Cosine Similarity β€” Embedding similarity")
 
263
  st.markdown("7. Qwen2.5-1.5B β€” Caption fusion")
264
  st.markdown("---")
265
  st.markdown("**Local:** BLIP ITM, DINO")
266
+ st.markdown("**API:** Florence-2, Jina, Qwen2.5")
267
 
268
  # ── MAIN UI ──
269
  st.title(" Image Caption Fusion System")
 
285
  progress = st.progress(0)
286
  status = st.empty()
287
 
288
+ status.info(" Step 1/7 β€” Generating captions with Florence-2 + BLIP...")
289
  captions = generate_captions_api(image)
290
  progress.progress(14)
291
  with st.expander(" 5 Generated Captions"):