Spaces:

stephenebert
/

retrieval-demo

Sleeping

App Files Files Community

stephenebert commited on Jul 15, 2025

Commit

1e33c60

verified ·

1 Parent(s): 30bc636

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -171

app.py CHANGED Viewed

@@ -1,198 +1,110 @@
 import os
 import requests
 import gradio as gr
 import torch
 from transformers import CLIPProcessor, CLIPModel
-import logging
-# Set up logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-# 1) Load CLIP text encoder
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 model.eval()
 def embed_text(text: str) -> list[float]:
-    """Turn a string into a normalized CLIP embedding."""
-    try:
-        # Clean and preprocess text
-        text = text.strip()
-        if not text:
-            raise ValueError("Empty text input")
-        # Tokenize with proper handling
-        inputs = processor(
-            text=[text],
-            return_tensors="pt",
-            padding=True,
-            truncation=True,
-            max_length=77  # CLIP's max token length
-        )
-        with torch.no_grad():
-            # Get text features
-            feats = model.get_text_features(**inputs)
-            # Normalize to unit vector (L2 normalization)
-            feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
-            # Convert to list and ensure proper shape
-            embedding = feats.squeeze().cpu().tolist()
-            logger.info(f"Generated embedding with shape: {len(embedding)}")
-            return embedding
-    except Exception as e:
-        logger.error(f"Error in embed_text: {str(e)}")
-        raise
-# 2) API configuration
 API_BASE = os.getenv("API_URL", "https://capstone-retrieval-api.onrender.com").rstrip("/")
 def call_search(caption: str, k: int):
-    """Embed `caption`, POST to /search, return JSON (or error dict)."""
-    try:
-        # Input validation
-        if not caption or not caption.strip():
-            return {"error": "Please enter a caption to search."}
-        caption = caption.strip()
-        k = max(1, min(int(k), 10))  # Clamp k between 1 and 10
-        logger.info(f"Searching for: '{caption}' with k={k}")
-        # 1) Embed locally
-        vec = embed_text(caption)
-        # Verify embedding dimensions
-        if len(vec) != 512:
-            return {"error": f"Unexpected embedding dimension: {len(vec)} (expected 512)"}
-        payload = {
-            "query_vec": vec,
-            "k": k,
-            "query_text": caption  # Include original text for debugging
-        }
-        # 2) POST to API
-        headers = {
-            "Content-Type": "application/json",
-            "User-Agent": "HuggingFace-Gradio-Client"
-        }
-        response = requests.post(
-            f"{API_BASE}/search",
-            json=payload,
-            headers=headers,
-            timeout=30  # Increased timeout
-        )
-        response.raise_for_status()
-        result = response.json()
-        logger.info(f"API response status: {response.status_code}")
-        # Add metadata to result
-        if isinstance(result, dict):
-            result["_metadata"] = {
-                "query": caption,
-                "k": k,
-                "embedding_dim": len(vec),
-                "api_status": response.status_code
-            }
-        return result
-    except requests.exceptions.Timeout:
-        return {"error": "Request timed out. Please try again."}
-    except requests.exceptions.ConnectionError:
-        return {"error": "Could not connect to the API. Please check your internet connection."}
-    except requests.exceptions.HTTPError as e:
-        error_msg = f"HTTP {response.status_code}"
-        try:
-            error_detail = response.json().get("detail", response.text)
-            error_msg += f": {error_detail}"
-        except:
-            error_msg += f": {response.text}"
-        return {"error": error_msg}
-    except Exception as e:
-        logger.error(f"Unexpected error in call_search: {str(e)}")
-        return {"error": f"Unexpected error: {str(e)}"}
-def validate_api_connection():
-    """Test API connection and return status."""
     try:
-        response = requests.get(f"{API_BASE}/health", timeout=10)
-        return f"API is reachable (Status: {response.status_code})"
     except Exception as e:
-        return f"API connection failed: {str(e)}"
 # 3) Gradio UI
-with gr.Blocks(title="Image ↔ Text Retrieval (small dataset)", theme=gr.themes.Soft()) as demo:
     gr.Markdown(
-        "### Image ↔ Text Retrieval (small dataset)\n"
-        "Type a caption, pick *k*, click **Submit** – we encode your text with CLIP, "
-        "POST it to your FastAPI+FAISS service, and show the top-K JSON results."
     )
-    # API status indicator
     with gr.Row():
-        api_status = gr.Textbox(
-            value=validate_api_connection(),
-            label="API Status",
-            interactive=False
         )
-        refresh_btn = gr.Button("Refresh Status", size="sm")
-        refresh_btn.click(fn=validate_api_connection, outputs=api_status)
-    with gr.Row():
-        with gr.Column(scale=2):
-            caption_input = gr.Textbox(
-                lines=3,
-                placeholder="type something",
-                label="Caption",
-                info="Enter a descriptive text to search for similar images"
-            )
-        with gr.Column(scale=1):
-            k_input = gr.Slider(
-                minimum=1,
-                maximum=10,
-                value=3,
-                step=1,
-                label="Top-K Results"
-            )
-    with gr.Row():
-        btn = gr.Button("Submit", variant="primary")
-        clear_btn = gr.Button("Clear", variant="secondary")
-    output = gr.JSON(label="Search Results")
-    # Event handlers
     btn.click(
-        fn=call_search,
-        inputs=[caption_input, k_input],
-        outputs=output
-    )
-    clear_btn.click(
-        fn=lambda: ("", 3, {}),
-        outputs=[caption_input, k_input, output]
-    )
-    # Allow Enter key to submit
-    caption_input.submit(
-        fn=call_search,
-        inputs=[caption_input, k_input],
-        outputs=output
     )
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        show_error=True
-    )

+# app.py
 import os
 import requests
 import gradio as gr
 import torch
 from transformers import CLIPProcessor, CLIPModel
+# -----------------------------------------------------------------------------
+# 1) Load CLIP text‐encoder locally (no GPU required for small demo)
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+model     = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 model.eval()
 def embed_text(text: str) -> list[float]:
+    """Turn a string into a normalized 512-dim CLIP vector."""
+    inputs = processor(
+        text=[text],
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+    )
+    with torch.no_grad():
+        feats = model.get_text_features(**inputs)
+    # normalize to unit length for cosine‐as‐inner‐product
+    feats = feats / feats.norm(p=2, dim=-1, keepdim=True)
+    return feats.squeeze().cpu().tolist()
+# -----------------------------------------------------------------------------
+# 2) Where’s your FastAPI service?
+#    In HF Space → Settings → Variables, set:
+#      API_URL = https://capstone-retrieval-api.onrender.com
 API_BASE = os.getenv("API_URL", "https://capstone-retrieval-api.onrender.com").rstrip("/")
 def call_search(caption: str, k: int):
+    """Encode `caption` → POST to /search → parse JSON → return list of (img, caption)."""
+    if not caption:
+        return [], "Please enter a caption."
+    # 2a) embed locally
+    vec = embed_text(caption)
+    payload = {"query_vec": vec, "k": k}
     try:
+        r = requests.post(f"{API_BASE}/search", json=payload, timeout=15)
+        r.raise_for_status()
+        data = r.json()
     except Exception as e:
+        # any network / HTTP error
+        return [], f"Error: {e!s}"
+    # 2b) build gallery list [ (path, label), ... ]
+    gallery_items = []
+    for rec in data.get("results", []):
+        path  = rec["image_path"]
+        label = f"{rec['caption']} ({rec['score']:.3f})"
+        gallery_items.append((path, label))
+    return gallery_items, None
+# -----------------------------------------------------------------------------
 # 3) Gradio UI
+with gr.Blocks(title="Image ↔ Text Retrieval") as demo:
     gr.Markdown(
+        """
+        ## Image ↔ Text Retrieval
+        Type a caption, pick *k*, click **Submit** → we embed your text with CLIP,
+        call your FastAPI + FAISS service, and show the top-K **images**.
+        """
     )
     with gr.Row():
+        caption_in = gr.Textbox(
+            label="Caption",
+            placeholder="e.g. painting of King Henry VIII carrying an umbrella",
+            lines=2,
         )
+        k_in = gr.Slider(
+            label="Top-K",
+            minimum=1, maximum=10, step=1, value=3
+        )
+    gallery = gr.Gallery(
+        label="Results",
+        show_label=False,
+        elem_id="result_gallery",
+    ).style(grid=[3], height="auto")  # if this errors in your gradio version, just drop .style()
+    error_box = gr.Markdown(visible=False)
+    def _wrapped(caption, k):
+        imgs, err = call_search(caption, k)
+        if err:
+            return gr.update(visible=True, value=f"**{err}**"), []
+        return gr.update(visible=False), imgs
+    btn = gr.Button("Submit")
     btn.click(
+        fn=_wrapped,
+        inputs=[caption_in, k_in],
+        outputs=[error_box, gallery],
     )
 if __name__ == "__main__":
+    # locally: python app.py  →  http://localhost:7860
+    demo.launch()