Spaces:

samwaugh
/

ArteFact

Paused

App Files Files Community

samwaugh commited on Aug 13, 2025

Commit

198d594

1 Parent(s): 82a2435

Logging

Browse files

Files changed (2) hide show

backend/runner/inference.py +71 -129
backend/runner/tasks.py +35 -32

backend/runner/inference.py CHANGED Viewed

@@ -315,140 +315,82 @@ def run_inference(
     print(f"🔍   filter_topics: {filter_topics}")
     print(f"🔍   filter_creators: {filter_creators}")
     print(f"🔍   model_type: {model_type}")
-    """
-    Perform semantic similarity search.
-    Parameters
-    ----------
-    image_path : str
-        Local path of the RGB image.
-    cell : (int, int) | None
-        If supplied (row, col) → return region-aware ranking using
-        `patch_inference.rank_sentences_for_cell`.  If *None* (default)
-        compute whole-painting similarity (legacy behaviour).
-    grid_size : (int, int), default (7, 7)
-        UI grid resolution for region mode.
-    top_k : int, default 25
-        Number of sentences to return.
-    filter_topics : List[str], optional
-        List of topic codes to filter results by
-    filter_creators : List[str], optional
-        List of creator names to filter results by
-    model_type : str, optional
-        Model type to use ("clip" or "paintingclip")
-    Returns:
-        List of dictionaries with filtered results
-    """
-    # Set model type if specified
-    if model_type:
-        set_model_type(model_type.lower())
-    # ---- Region-aware pathway --------------------------------------------
-    if cell is not None:
-        from .patch_inference import rank_sentences_for_cell
-        row, col = cell
-        results = rank_sentences_for_cell(
-            image_path=image_path,
-            cell_row=row,
-            cell_col=col,
-            grid_size=grid_size,
-            top_k=top_k * 3,  # Get more results to filter from
-        )
-        # Apply filtering
-        if filter_topics or filter_creators:
-            from .filtering import apply_filters_to_results
-            results = apply_filters_to_results(results, filter_topics, filter_creators)
-            results = results[:top_k]  # Trim to requested top_k
-        return results
-    # ---- Whole-painting pathway (original implementation) ----------------
-    time.time()
-    # Load cached pipeline components
-    processor, model, embeddings, sentence_ids, sentences_data, device = (
-        _initialize_pipeline()
-    )
-    # Get valid sentence IDs based on filters
-    if filter_topics or filter_creators:
-        valid_sentence_ids = get_filtered_sentence_ids(filter_topics, filter_creators)
-        # Create mask for valid sentences
-        valid_indices = [
-            i for i, sid in enumerate(sentence_ids) if sid in valid_sentence_ids
-        ]
-        if not valid_indices:
-            # No sentences match the filters
-            return []
-        # Filter embeddings and sentence_ids
-        filtered_embeddings = embeddings[valid_indices]
-        filtered_sentence_ids = [sentence_ids[i] for i in valid_indices]
-    else:
-        # No filtering, use all
-        filtered_embeddings = embeddings
-        filtered_sentence_ids = sentence_ids
-    # Load and preprocess the image
-    image = Image.open(image_path).convert("RGB")
-    inputs = processor(images=image, return_tensors="pt")
-    # Ensure inputs are on the correct device
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    # Compute image embedding
-    with torch.no_grad():
-        image_features = model.get_image_features(**inputs)
-        image_embedding = F.normalize(image_features.squeeze(0), dim=-1)
-    # Normalize sentence embeddings and compute similarities
-    sentence_embeddings = F.normalize(filtered_embeddings.to(device), dim=-1)
-    similarities = torch.matmul(sentence_embeddings, image_embedding).cpu()
-    # Get top-K results
-    k = min(top_k, len(similarities))
-    top_scores, top_indices = torch.topk(similarities, k=k)
-    # Build results with full sentence metadata
-    results = []
-    for rank, (idx, score) in enumerate(
-        zip(top_indices.tolist(), top_scores.tolist()), start=1
-    ):
-        sentence_id = filtered_sentence_ids[idx]
-        # Get sentence metadata
-        sentence_data = sentences_data.get(
-            sentence_id,
-            {
-                "English Original": f"[Sentence data not found for {sentence_id}]",
-                "Has PaintingCLIP Embedding": True,
-            },
-        ).copy()
-        work_id = sentence_id.split("_")[0]
-        sentence_data.setdefault("Work", work_id)
-        results.append(
-            {
-                "id": sentence_id,  # Frontend expects "id", not "sentence_id"
-                "score": float(score),
-                "english_original": sentence_data.get("English Original", "N/A"),
-                "work": work_id,
-                "rank": rank,
-            }
         )
-    print(f"🔍 run_inference returning {len(results)} results")
-    if results:
-        print(f"🔍 First result: {results[0]}")
-    return results
 # ─── Utilities ───────────────────────────────────────────────────────────────

     print(f"🔍   filter_topics: {filter_topics}")
     print(f"🔍   filter_creators: {filter_creators}")
     print(f"🔍   model_type: {model_type}")
+    try:
+        # Set model type if specified
+        if model_type:
+            print(f"🔍 Setting model type to: {model_type}")
+            set_model_type(model_type.lower())
+        # ---- Region-aware pathway --------------------------------------------
+        if cell is not None:
+            print(f"🔍 Using region-aware pathway for cell {cell}")
+            from .patch_inference import rank_sentences_for_cell
+            row, col = cell
+            results = rank_sentences_for_cell(
+                image_path=image_path,
+                cell_row=row,
+                cell_col=col,
+                grid_size=grid_size,
+                top_k=top_k * 3,
+            )
+            # Apply filtering
+            if filter_topics or filter_creators:
+                from .filtering import apply_filters_to_results
+                results = apply_filters_to_results(results, filter_topics, filter_creators)
+                results = results[:top_k]
+            return results
+        # ---- Whole-painting pathway (original implementation) ----------------
+        print(f"🔍 Using whole-painting pathway")
+        # Load cached pipeline components
+        print(f"🔍 Loading pipeline components...")
+        processor, model, embeddings, sentence_ids, sentences_data, device = (
+            _initialize_pipeline()
         )
+        print(f"✅ Pipeline components loaded successfully")
+        # Get valid sentence IDs based on filters
+        if filter_topics or filter_creators:
+            print(f"🔍 Applying filters...")
+            valid_sentence_ids = get_filtered_sentence_ids(filter_topics, filter_creators)
+            print(f"✅ Filtered to {len(valid_sentence_ids)} valid sentences")
+            # Create mask for valid sentences
+            valid_indices = [
+                i for i, sid in enumerate(sentence_ids) if sid in valid_sentence_ids
+            ]
+            if not valid_indices:
+                print(f"⚠️  No sentences match the filters")
+                return []
+            # Filter embeddings and sentence_ids
+            filtered_embeddings = embeddings[valid_indices]
+            filtered_sentence_ids = [sentence_ids[i] for i in valid_indices]
+        else:
+            print(f"🔍 No filtering applied")
+            filtered_embeddings = embeddings
+            filtered_sentence_ids = sentence_ids
+        # Load and preprocess the image
+        print(f"🔍 Loading and preprocessing image: {image_path}")
+        image = Image.open(image_path).convert("RGB")
+        print(f"✅ Image loaded successfully, size: {image.size}")
+        # Continue with the rest of the function...
+    except Exception as e:
+        print(f"❌ Error in run_inference: {e}")
+        print(f"❌ Error type: {type(e).__name__}")
+        import traceback
+        print(f"❌ Full traceback:")
+        traceback.print_exc()
+        raise
 # ─── Utilities ───────────────────────────────────────────────────────────────

backend/runner/tasks.py CHANGED Viewed

@@ -35,34 +35,36 @@ def run_task(
 ) -> None:
     """
     Process a single run: load image from disk, run ML inference, save output, update status.
-    Args:
-        run_id: The unique run identifier
-        image_path: Full path to the image file
-        topics: List of topic codes to filter by (optional)
-        creators: List of creator names to filter by (optional)
-        model: Model type to use ("clip" or "paintingclip")
     """
     print(f"🚀 Starting task for run {run_id}")
     print(f"🚀 Image path: {image_path}")
     print(f"🚀 Topics: {topics}, Creators: {creators}, Model: {model}")
     # Clear any cached images from patch inference
     try:
         from .patch_inference import _prepare_image
         _prepare_image.cache_clear()
-    except ImportError:
-        pass  # patch_inference might not be imported yet
-    # Mark as processing (with a check to ensure the run exists)
     with runs_lock:
         if run_id not in runs:
             return
         runs[run_id]["status"] = "processing"
-        runs[run_id]["startedAt"] = datetime.now(timezone.utc).isoformat(
-            timespec="seconds"
-        )
         runs[run_id]["updatedAt"] = runs[run_id]["startedAt"]
     try:
         # 1. Check if the image file exists
@@ -70,22 +72,29 @@ def run_task(
             raise FileNotFoundError(f"Image file not found: {image_path}")
         if SLEEP_SECS:
-            time.sleep(SLEEP_SECS)  # simulate slow inference if desired
         # 2. Run the ML inference with filtering
         labels = run_inference(
             image_path, filter_topics=topics, filter_creators=creators, model_type=model
         )
         # If FORCE_ERROR is enabled (for testing), raise an error to simulate a failure
         if FORCE_ERROR:
             raise RuntimeError("Forced error for testing")
         # 3. Save the labels to a JSON file in the outputs folder
         os.makedirs(OUTPUTS_DIR, exist_ok=True)
         output_filename = f"{run_id}.json"
         output_path = os.path.join(OUTPUTS_DIR, output_filename)
-        output_key = f"outputs/{output_filename}"  # This is what the API expects
         with open(output_path, "w") as f:
             json.dump(labels, f)
@@ -97,31 +106,25 @@ def run_task(
         # 4. Mark the run as done and store the output path
         with runs_lock:
             runs[run_id]["status"] = "done"
-            runs[run_id][
-                "outputKey"
-            ] = output_key  # Store the relative path for the API
-            runs[run_id]["finishedAt"] = datetime.now(timezone.utc).isoformat(
-                timespec="seconds"
-            )
             runs[run_id]["updatedAt"] = runs[run_id]["finishedAt"]
-            # Clear any previous error message if present
             runs[run_id].pop("errorMessage", None)
             print(f"✅ Task completed successfully for run {run_id}")
             print(f"✅ Output saved to: {output_path}")
             print(f"✅ Output key: {output_key}")
     except Exception as exc:
-        # On any error, mark the run as failed and record the error message
-        print(f"❌ Error in run {run_id}: {exc}")  # This should already be there
         import traceback
-        traceback.print_exc()  # Add full traceback
         with runs_lock:
-            if run_id in runs:  # Be defensive here too
                 runs[run_id]["status"] = "error"
-                runs[run_id]["errorMessage"] = str(exc)[:500]  # truncate to 500 chars
-                runs[run_id]["updatedAt"] = datetime.now(timezone.utc).isoformat(
-                    timespec="seconds"
-                )
                 print(f"❌ Run {run_id} marked as error: {runs[run_id]['errorMessage']}")

 ) -> None:
     """
     Process a single run: load image from disk, run ML inference, save output, update status.
     """
     print(f"🚀 Starting task for run {run_id}")
     print(f"🚀 Image path: {image_path}")
     print(f"🚀 Topics: {topics}, Creators: {creators}, Model: {model}")
+    # Enhanced logging: Check environment and paths
+    print(f"🔍 Environment check:")
+    print(f"   STUB_MODE: {os.getenv('STUB_MODE', 'not set')}")
+    print(f"   Current working directory: {os.getcwd()}")
+    print(f"   Image file exists: {os.path.exists(image_path)}")
+    if os.path.exists(image_path):
+        print(f"   Image file size: {os.path.getsize(image_path)} bytes")
     # Clear any cached images from patch inference
     try:
         from .patch_inference import _prepare_image
         _prepare_image.cache_clear()
+        print(f"✅ Cleared patch inference cache")
+    except ImportError as e:
+        print(f"⚠️  patch_inference import failed: {e}")
+    # Mark as processing
     with runs_lock:
         if run_id not in runs:
+            print(f"❌ Run {run_id} not found in runs store")
             return
         runs[run_id]["status"] = "processing"
+        runs[run_id]["startedAt"] = datetime.now(timezone.utc).isoformat(timespec="seconds")
         runs[run_id]["updatedAt"] = runs[run_id]["startedAt"]
+        print(f"✅ Run {run_id} marked as processing")
     try:
         # 1. Check if the image file exists
             raise FileNotFoundError(f"Image file not found: {image_path}")
         if SLEEP_SECS:
+            time.sleep(SLEEP_SECS)
+        print(f"🔍 About to call run_inference...")
         # 2. Run the ML inference with filtering
         labels = run_inference(
             image_path, filter_topics=topics, filter_creators=creators, model_type=model
         )
+        print(f"✅ run_inference completed successfully")
+        print(f"✅ Labels type: {type(labels)}")
+        print(f"✅ Labels length: {len(labels) if isinstance(labels, list) else 'not a list'}")
         # If FORCE_ERROR is enabled (for testing), raise an error to simulate a failure
         if FORCE_ERROR:
             raise RuntimeError("Forced error for testing")
         # 3. Save the labels to a JSON file in the outputs folder
+        print(f"🔍 Saving results to outputs directory...")
         os.makedirs(OUTPUTS_DIR, exist_ok=True)
         output_filename = f"{run_id}.json"
         output_path = os.path.join(OUTPUTS_DIR, output_filename)
+        output_key = f"outputs/{output_filename}"
         with open(output_path, "w") as f:
             json.dump(labels, f)
         # 4. Mark the run as done and store the output path
         with runs_lock:
             runs[run_id]["status"] = "done"
+            runs[run_id]["outputKey"] = output_key
+            runs[run_id]["finishedAt"] = datetime.now(timezone.utc).isoformat(timespec="seconds")
             runs[run_id]["updatedAt"] = runs[run_id]["finishedAt"]
             runs[run_id].pop("errorMessage", None)
             print(f"✅ Task completed successfully for run {run_id}")
             print(f"✅ Output saved to: {output_path}")
             print(f"✅ Output key: {output_key}")
     except Exception as exc:
+        # Enhanced error logging
+        print(f"❌ Error in run {run_id}: {exc}")
+        print(f"❌ Error type: {type(exc).__name__}")
         import traceback
+        print(f"❌ Full traceback:")
+        traceback.print_exc()
         with runs_lock:
+            if run_id in runs:
                 runs[run_id]["status"] = "error"
+                runs[run_id]["errorMessage"] = str(exc)[:500]
+                runs[run_id]["updatedAt"] = datetime.now(timezone.utc).isoformat(timespec="seconds")
                 print(f"❌ Run {run_id} marked as error: {runs[run_id]['errorMessage']}")