Spaces:

safinal
/

compositional-retrieval

Sleeping

App Files Files Community

safinal commited on Nov 23, 2025

Commit

af5379c

verified ·

1 Parent(s): 566c156

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -27

app.py CHANGED Viewed

@@ -5,20 +5,18 @@ from PIL import Image
 import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
 from tqdm import tqdm
 from token_classifier import load_token_classifier, predict
 from model import Model
 from dataset import RetrievalDataset
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 batch_size = 512
-import zipfile
-import os
 def unzip_file(zip_path, extract_path):
     # Create the target directory if it doesn't exist
     os.makedirs(extract_path, exist_ok=True)
@@ -28,40 +26,39 @@ def unzip_file(zip_path, extract_path):
         # Extract all contents to the specified directory
         zip_ref.extractall(extract_path)
-# Example usage
 zip_path = "sample_evaluation.zip"
 extract_path = "sample_evaluation"
-unzip_file(zip_path, extract_path)
-from huggingface_hub import hf_hub_download
-hf_hub_download(repo_id="safinal/compositional-image-retrieval", filename="weights.pth", local_dir='.')
-def encode_database(model, df: pd.DataFrame) -> np.ndarray :
     """
     Process database images and generate embeddings.
-    Args:
-    df (pd. DataFrame ): DataFrame with column:
-    - target_image: str, paths to database images
-    Returns:
-    np.ndarray: Embeddings array (num_images, embedding_dim)
     """
     model.eval()
     all_embeddings = []
     for i in tqdm(range(0, len(df), batch_size)):
-        target_imgs = torch.stack([model.processor(Image.open(target_image_path)) for target_image_path in df['target_image'][i:i+batch_size]]).to(device)
         with torch.no_grad():
-            # target_imgs_embedding = model.encode_database_image(target_imgs)
             target_imgs_embedding = model.feature_extractor.encode_image(target_imgs)
         target_imgs_embedding = torch.nn.functional.normalize(target_imgs_embedding, dim=1, p=2)
         all_embeddings.append(target_imgs_embedding.detach().cpu().numpy())
     return np.concatenate(all_embeddings)
-# Load model and configurations
 def load_model():
     model = Model(model_name="ViTamin-L-384", pretrained=None)
     model.load("weights.pth")
@@ -70,7 +67,6 @@ def load_model():
 def process_single_query(model, query_image_path, query_text, database_embeddings, database_df):
     # Process query image
     query_img = model.processor(Image.open(query_image_path)).unsqueeze(0).to(device)
@@ -131,9 +127,12 @@ def process_single_query(model, query_image_path, query_text, database_embedding
     return most_similar_image_path
-# Initialize model and database
 model = load_model()
 test_dataset = RetrievalDataset(
     img_dir_path="sample_evaluation/images",
     annotations_file_path="sample_evaluation/data.csv",
@@ -142,19 +141,27 @@ test_dataset = RetrievalDataset(
     tokenizer=model.tokenizer
 )
-database_embeddings = encode_database(model, test_dataset.load_database())  # Using your existing function
 def interface_fn(selected_image, query_text):
     result_image_path = process_single_query(
         model,
         selected_image,
         query_text,
         database_embeddings,
-        test_dataset.load_database()
     )
     return Image.open(result_image_path)
-# Create Gradio interface
 demo = gr.Interface(
     fn=interface_fn,
     inputs=[
@@ -170,7 +177,7 @@ demo = gr.Interface(
         ["sample_evaluation/images/455007.png", "Discard chair in the beginning, then proceed to bring car into play."],
         ["sample_evaluation/images/612311.png", "Get rid of train initially, and then follow up by including snowboard."]
     ],
-    allow_flagging=False,
     cache_examples=False
 )

 import pandas as pd
 from sklearn.metrics.pairwise import cosine_similarity
 from tqdm import tqdm
+import zipfile
+import os
+from huggingface_hub import hf_hub_download
 from token_classifier import load_token_classifier, predict
 from model import Model
 from dataset import RetrievalDataset
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 batch_size = 512
 def unzip_file(zip_path, extract_path):
     # Create the target directory if it doesn't exist
     os.makedirs(extract_path, exist_ok=True)
         # Extract all contents to the specified directory
         zip_ref.extractall(extract_path)
+# Setup files
 zip_path = "sample_evaluation.zip"
 extract_path = "sample_evaluation"
+if os.path.exists(zip_path): # Check exists to prevent errors if already unzipped
+    unzip_file(zip_path, extract_path)
+# Download weights if not present
+if not os.path.exists("weights.pth"):
+    hf_hub_download(repo_id="safinal/compositional-image-retrieval", filename="weights.pth", local_dir='.')
+def encode_database(model, df: pd.DataFrame) -> np.ndarray:
     """
     Process database images and generate embeddings.
     """
     model.eval()
     all_embeddings = []
+    # Ensure batching handles empty or small datasets gracefully
     for i in tqdm(range(0, len(df), batch_size)):
+        batch_df = df['target_image'][i:i+batch_size]
+        if len(batch_df) == 0: continue
+        target_imgs = torch.stack([model.processor(Image.open(target_image_path)) for target_image_path in batch_df]).to(device)
         with torch.no_grad():
             target_imgs_embedding = model.feature_extractor.encode_image(target_imgs)
         target_imgs_embedding = torch.nn.functional.normalize(target_imgs_embedding, dim=1, p=2)
         all_embeddings.append(target_imgs_embedding.detach().cpu().numpy())
+    if not all_embeddings:
+        return np.array([])
     return np.concatenate(all_embeddings)
 def load_model():
     model = Model(model_name="ViTamin-L-384", pretrained=None)
     model.load("weights.pth")
 def process_single_query(model, query_image_path, query_text, database_embeddings, database_df):
     # Process query image
     query_img = model.processor(Image.open(query_image_path)).unsqueeze(0).to(device)
     return most_similar_image_path
+# --- Initialization ---
+print("Loading model...")
 model = load_model()
+print("Loading dataset...")
 test_dataset = RetrievalDataset(
     img_dir_path="sample_evaluation/images",
     annotations_file_path="sample_evaluation/data.csv",
     tokenizer=model.tokenizer
 )
+# Load database once globally to avoid reloading it on every user request
+print("Encoding database...")
+database_df = test_dataset.load_database()
+database_embeddings = encode_database(model, database_df)
 def interface_fn(selected_image, query_text):
+    if selected_image is None:
+        return None
     result_image_path = process_single_query(
         model,
         selected_image,
         query_text,
         database_embeddings,
+        database_df  # Pass the pre-loaded DataFrame
     )
     return Image.open(result_image_path)
+# --- Gradio Interface ---
 demo = gr.Interface(
     fn=interface_fn,
     inputs=[
         ["sample_evaluation/images/455007.png", "Discard chair in the beginning, then proceed to bring car into play."],
         ["sample_evaluation/images/612311.png", "Get rid of train initially, and then follow up by including snowboard."]
     ],
+    flagging_mode="never",
     cache_examples=False
 )