Spaces:

onurcopur
/

tattoo_search_engine

Sleeping

App Files Files Community

onurcopur commited on Sep 30, 2025

Commit

8880ccb

1 Parent(s): 917a5d3

unfreeze caption generator

Browse files

Files changed (1) hide show

main.py +89 -50

main.py CHANGED Viewed

@@ -12,14 +12,15 @@ import requests
 import torch
 import torch.nn.functional as F
 from dotenv import load_dotenv
-from fastapi import FastAPI, File, HTTPException, UploadFile, Query
 from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import InferenceClient
 from PIL import Image
 from search_engines import SearchEngineManager
 from utils import SearchCache, URLValidator
-from embeddings import EmbeddingModelFactory, EmbeddingModel, get_default_model_configs
-from patch_attention import PatchAttentionAnalyzer
 # Load environment variables from .env file
 load_dotenv()
@@ -95,26 +96,25 @@ class TattooSearchEngine:
             image_b64 = base64.b64encode(img_buffer.getvalue()).decode()
             image_url = f"data:image/jpeg;base64,{image_b64}"
-            # completion = self.client.chat.completions.create(
-            #     model=self.vlm_model,
-            #     messages=[
-            #         {
-            #             "role": "user",
-            #             "content": [
-            #                 {
-            #                     "type": "text",
-            #                     "text": "Generate a one search engine query to find the most similar tattoos to this image. Response in json format",
-            #                 },
-            #                 {
-            #                     "type": "image_url",
-            #                     "image_url": {"url": image_url},
-            #                 },
-            #             ],
-            #         }
-            #     ],
-            # )
-            caption = '<|begin_of_box|>{"search_query": "hand tattoo geometric human figure abstract blackwork"}<|end_of_box|>'
-            # caption = completion.choices[0].message.content
             if caption:
                 match = re.search(r"\{.*\}", caption)
                 if match:
@@ -256,8 +256,11 @@ class TattooSearchEngine:
         return None
     def download_and_process_image(
-        self, url: str, query_features: torch.Tensor, query_image: Image.Image = None,
-        include_patch_attention: bool = False
     ) -> Dict[str, Any]:
         """Download and compute similarity for a single image"""
         candidate_image = self.download_image(url)
@@ -266,7 +269,9 @@ class TattooSearchEngine:
         try:
             candidate_features = self.embedding_model.encode_image(candidate_image)
-            similarity = self.embedding_model.compute_similarity(query_features, candidate_features)
             result = {"score": float(similarity), "url": url}
@@ -274,12 +279,16 @@ class TattooSearchEngine:
             if include_patch_attention and query_image is not None:
                 try:
                     analyzer = PatchAttentionAnalyzer(self.embedding_model)
-                    patch_data = analyzer.compute_patch_similarities(query_image, candidate_image)
                     result["patch_attention"] = {
                         "overall_similarity": patch_data["overall_similarity"],
                         "query_grid_size": patch_data["query_grid_size"],
                         "candidate_grid_size": patch_data["candidate_grid_size"],
-                        "attention_summary": analyzer.get_similarity_summary(patch_data)
                     }
                 except Exception as e:
                     logger.warning(f"Failed to compute patch attention for {url}: {e}")
@@ -292,7 +301,10 @@ class TattooSearchEngine:
             return None
     def compute_similarity(
-        self, query_image: Image.Image, candidate_urls: List[str], include_patch_attention: bool = False
     ) -> List[Dict[str, Any]]:
         # Encode query image using the selected embedding model
         query_features = self.embedding_model.encode_image(query_image)
@@ -306,7 +318,11 @@ class TattooSearchEngine:
             # Submit all download tasks
             future_to_url = {
                 executor.submit(
-                    self.download_and_process_image, url, query_features, query_image, include_patch_attention
                 ): url
                 for url in candidate_urls
             }
@@ -343,10 +359,14 @@ class TattooSearchEngine:
 # Global variable to store search engine instance
 search_engine = None
 def get_search_engine(embedding_model: str = "clip") -> TattooSearchEngine:
     """Get or create search engine instance with specified embedding model."""
     global search_engine
-    if search_engine is None or search_engine.embedding_model.get_model_name().lower() != embedding_model:
         search_engine = TattooSearchEngine(embedding_model)
     return search_engine
@@ -354,8 +374,12 @@ def get_search_engine(embedding_model: str = "clip") -> TattooSearchEngine:
 @app.post("/search")
 async def search_tattoos(
     file: UploadFile = File(...),
-    embedding_model: str = Query(default="clip", description="Embedding model to use (clip, dinov2, siglip)"),
-    include_patch_attention: bool = Query(default=False, description="Include patch-level attention analysis")
 ):
     if not file.content_type.startswith("image/"):
         raise HTTPException(status_code=400, detail="File must be an image")
@@ -366,7 +390,7 @@ async def search_tattoos(
         if embedding_model not in available_models:
             raise HTTPException(
                 status_code=400,
-                detail=f"Invalid embedding model. Available: {available_models}"
             )
         # Get search engine with specified embedding model
@@ -386,17 +410,23 @@ async def search_tattoos(
         candidate_urls = engine.search_images(caption, max_results=100)
         if not candidate_urls:
-            return {"caption": caption, "results": [], "embedding_model": engine.embedding_model.get_model_name()}
         # Compute similarities and rank
         logger.info("Computing similarities...")
-        results = engine.compute_similarity(query_image, candidate_urls, include_patch_attention)
         return {
             "caption": caption,
             "results": results,
             "embedding_model": engine.embedding_model.get_model_name(),
-            "patch_attention_enabled": include_patch_attention
         }
     except Exception as e:
@@ -407,9 +437,15 @@ async def search_tattoos(
 @app.post("/analyze-attention")
 async def analyze_patch_attention(
     query_file: UploadFile = File(...),
-    candidate_url: str = Query(..., description="URL of the candidate image to compare"),
-    embedding_model: str = Query(default="clip", description="Embedding model to use (clip, dinov2, siglip)"),
-    include_visualizations: bool = Query(default=True, description="Include attention visualizations")
 ):
     """Analyze patch-level attention between query image and a specific candidate image."""
     if not query_file.content_type.startswith("image/"):
@@ -421,7 +457,7 @@ async def analyze_patch_attention(
         if embedding_model not in available_models:
             raise HTTPException(
                 status_code=400,
-                detail=f"Invalid embedding model. Available: {available_models}"
             )
         # Get search engine with specified embedding model
@@ -434,11 +470,15 @@ async def analyze_patch_attention(
         # Download candidate image
         candidate_image = engine.download_image(candidate_url)
         if candidate_image is None:
-            raise HTTPException(status_code=400, detail="Failed to download candidate image")
         # Analyze patch attention
         analyzer = PatchAttentionAnalyzer(engine.embedding_model)
-        similarity_data = analyzer.compute_patch_similarities(query_image, candidate_image)
         result = {
             "query_image_size": query_image.size,
@@ -446,8 +486,10 @@ async def analyze_patch_attention(
             "candidate_url": candidate_url,
             "embedding_model": engine.embedding_model.get_model_name(),
             "similarity_analysis": analyzer.get_similarity_summary(similarity_data),
-            "attention_matrix_shape": similarity_data['attention_matrix'].shape,
-            "top_correspondences": similarity_data['top_correspondences'][:10]  # Top 10
         }
         # Add visualizations if requested
@@ -462,7 +504,7 @@ async def analyze_patch_attention(
                 result["visualizations"] = {
                     "attention_heatmap": f"data:image/png;base64,{attention_heatmap}",
-                    "top_correspondences": f"data:image/png;base64,{top_correspondences_viz}"
                 }
             except Exception as e:
                 logger.warning(f"Failed to generate visualizations: {e}")
@@ -480,10 +522,7 @@ async def get_available_models():
     """Get list of available embedding models and their configurations."""
     models = EmbeddingModelFactory.get_available_models()
     configs = get_default_model_configs()
-    return {
-        "available_models": models,
-        "model_configs": configs
-    }
 @app.get("/health")

 import torch
 import torch.nn.functional as F
 from dotenv import load_dotenv
+from fastapi import FastAPI, File, HTTPException, Query, UploadFile
 from fastapi.middleware.cors import CORSMiddleware
 from huggingface_hub import InferenceClient
 from PIL import Image
+from embeddings import EmbeddingModel, EmbeddingModelFactory, get_default_model_configs
+from patch_attention import PatchAttentionAnalyzer
 from search_engines import SearchEngineManager
 from utils import SearchCache, URLValidator
 # Load environment variables from .env file
 load_dotenv()
             image_b64 = base64.b64encode(img_buffer.getvalue()).decode()
             image_url = f"data:image/jpeg;base64,{image_b64}"
+            completion = self.client.chat.completions.create(
+                model=self.vlm_model,
+                messages=[
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": "Generate a one search engine query to find the most similar tattoos to this image. Response in json format",
+                            },
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": image_url},
+                            },
+                        ],
+                    }
+                ],
+            )
+            caption = completion.choices[0].message.content
             if caption:
                 match = re.search(r"\{.*\}", caption)
                 if match:
         return None
     def download_and_process_image(
+        self,
+        url: str,
+        query_features: torch.Tensor,
+        query_image: Image.Image = None,
+        include_patch_attention: bool = False,
     ) -> Dict[str, Any]:
         """Download and compute similarity for a single image"""
         candidate_image = self.download_image(url)
         try:
             candidate_features = self.embedding_model.encode_image(candidate_image)
+            similarity = self.embedding_model.compute_similarity(
+                query_features, candidate_features
+            )
             result = {"score": float(similarity), "url": url}
             if include_patch_attention and query_image is not None:
                 try:
                     analyzer = PatchAttentionAnalyzer(self.embedding_model)
+                    patch_data = analyzer.compute_patch_similarities(
+                        query_image, candidate_image
+                    )
                     result["patch_attention"] = {
                         "overall_similarity": patch_data["overall_similarity"],
                         "query_grid_size": patch_data["query_grid_size"],
                         "candidate_grid_size": patch_data["candidate_grid_size"],
+                        "attention_summary": analyzer.get_similarity_summary(
+                            patch_data
+                        ),
                     }
                 except Exception as e:
                     logger.warning(f"Failed to compute patch attention for {url}: {e}")
             return None
     def compute_similarity(
+        self,
+        query_image: Image.Image,
+        candidate_urls: List[str],
+        include_patch_attention: bool = False,
     ) -> List[Dict[str, Any]]:
         # Encode query image using the selected embedding model
         query_features = self.embedding_model.encode_image(query_image)
             # Submit all download tasks
             future_to_url = {
                 executor.submit(
+                    self.download_and_process_image,
+                    url,
+                    query_features,
+                    query_image,
+                    include_patch_attention,
                 ): url
                 for url in candidate_urls
             }
 # Global variable to store search engine instance
 search_engine = None
 def get_search_engine(embedding_model: str = "clip") -> TattooSearchEngine:
     """Get or create search engine instance with specified embedding model."""
     global search_engine
+    if (
+        search_engine is None
+        or search_engine.embedding_model.get_model_name().lower() != embedding_model
+    ):
         search_engine = TattooSearchEngine(embedding_model)
     return search_engine
 @app.post("/search")
 async def search_tattoos(
     file: UploadFile = File(...),
+    embedding_model: str = Query(
+        default="clip", description="Embedding model to use (clip, dinov2, siglip)"
+    ),
+    include_patch_attention: bool = Query(
+        default=False, description="Include patch-level attention analysis"
+    ),
 ):
     if not file.content_type.startswith("image/"):
         raise HTTPException(status_code=400, detail="File must be an image")
         if embedding_model not in available_models:
             raise HTTPException(
                 status_code=400,
+                detail=f"Invalid embedding model. Available: {available_models}",
             )
         # Get search engine with specified embedding model
         candidate_urls = engine.search_images(caption, max_results=100)
         if not candidate_urls:
+            return {
+                "caption": caption,
+                "results": [],
+                "embedding_model": engine.embedding_model.get_model_name(),
+            }
         # Compute similarities and rank
         logger.info("Computing similarities...")
+        results = engine.compute_similarity(
+            query_image, candidate_urls, include_patch_attention
+        )
         return {
             "caption": caption,
             "results": results,
             "embedding_model": engine.embedding_model.get_model_name(),
+            "patch_attention_enabled": include_patch_attention,
         }
     except Exception as e:
 @app.post("/analyze-attention")
 async def analyze_patch_attention(
     query_file: UploadFile = File(...),
+    candidate_url: str = Query(
+        ..., description="URL of the candidate image to compare"
+    ),
+    embedding_model: str = Query(
+        default="clip", description="Embedding model to use (clip, dinov2, siglip)"
+    ),
+    include_visualizations: bool = Query(
+        default=True, description="Include attention visualizations"
+    ),
 ):
     """Analyze patch-level attention between query image and a specific candidate image."""
     if not query_file.content_type.startswith("image/"):
         if embedding_model not in available_models:
             raise HTTPException(
                 status_code=400,
+                detail=f"Invalid embedding model. Available: {available_models}",
             )
         # Get search engine with specified embedding model
         # Download candidate image
         candidate_image = engine.download_image(candidate_url)
         if candidate_image is None:
+            raise HTTPException(
+                status_code=400, detail="Failed to download candidate image"
+            )
         # Analyze patch attention
         analyzer = PatchAttentionAnalyzer(engine.embedding_model)
+        similarity_data = analyzer.compute_patch_similarities(
+            query_image, candidate_image
+        )
         result = {
             "query_image_size": query_image.size,
             "candidate_url": candidate_url,
             "embedding_model": engine.embedding_model.get_model_name(),
             "similarity_analysis": analyzer.get_similarity_summary(similarity_data),
+            "attention_matrix_shape": similarity_data["attention_matrix"].shape,
+            "top_correspondences": similarity_data["top_correspondences"][
+                :10
+            ],  # Top 10
         }
         # Add visualizations if requested
                 result["visualizations"] = {
                     "attention_heatmap": f"data:image/png;base64,{attention_heatmap}",
+                    "top_correspondences": f"data:image/png;base64,{top_correspondences_viz}",
                 }
             except Exception as e:
                 logger.warning(f"Failed to generate visualizations: {e}")
     """Get list of available embedding models and their configurations."""
     models = EmbeddingModelFactory.get_available_models()
     configs = get_default_model_configs()
+    return {"available_models": models, "model_configs": configs}
 @app.get("/health")