Spaces:

samwaugh
/

ArteFact

Paused

App Files Files Community

samwaugh commited on Aug 13, 2025

Commit

4ac1f80

1 Parent(s): 9c01cdc

Agent attempted fix

Browse files

Files changed (7) hide show

backend/runner/app.py +23 -0
backend/runner/filtering.py +2 -2
backend/runner/inference.py +32 -8
backend/runner/patch_inference.py +1 -1
backend/runner/tasks.py +8 -1
backend/tests/test_patch_inference.py +3 -3
frontend/js/artefact-context.js +30 -3

backend/runner/app.py CHANGED Viewed

@@ -103,6 +103,14 @@ creators = _load_json(JSON_INFO_DIR / "creators.json", {})
 topics = _load_json(JSON_INFO_DIR / "topics.json", {})
 topic_names = _load_json(JSON_INFO_DIR / "topic_names.json", {})
 # --------------------------------------------------------------------------- #
 #  Routes                                                                     #
@@ -182,17 +190,23 @@ def upload_file(run_id: str):
     """
     Receives the image file upload for the given runId and saves it to disk.
     """
     if "file" not in request.files:
         return jsonify({"error": "no-file"}), 400
     file = request.files["file"]
     # Ensure artifacts directory exists
     os.makedirs(ARTIFACTS_DIR, exist_ok=True)
     # Save the file as artifacts/<runId>.jpg
     file_path = os.path.join(ARTIFACTS_DIR, f"{run_id}.jpg")
     file.save(file_path)
     # Check file exists otherwise 400
     if not os.path.exists(file_path):
         return jsonify({"error": "file-not-saved"}), 500
     # Respond with 204 No Content (success, no response body)
     return "{}", 204
@@ -210,12 +224,16 @@ def create_run():
     - Launch background thread for processing
     """
     payload = request.get_json(force=True)
     run_id = payload["runId"]
     image_key = payload["imageKey"]
     topics = payload.get("topics", [])
     creators = payload.get("creators", [])
     model = payload.get("model", "paintingclip")
     now = datetime.now(timezone.utc).isoformat(timespec="seconds")
     # Store initial run info in the in-memory dictionary
     with RUNS_LOCK:
@@ -231,6 +249,7 @@ def create_run():
         }
     if STUB_MODE:
         # Write a tiny fake result so the UI flows
         results = {
             "runId": run_id,
@@ -248,6 +267,7 @@ def create_run():
             ],
         }
         out_path = OUTPUTS_DIR / f"{run_id}.json"
         out_path.write_text(json.dumps(results, ensure_ascii=False, indent=2), encoding="utf-8")
         with RUNS_LOCK:
@@ -257,10 +277,13 @@ def create_run():
                 "finishedAt": now,
                 "updatedAt": now
             })
         return jsonify(results), 200
     else:
         # Submit the background inference task to the thread pool
         image_path = ARTIFACTS_DIR / f"{run_id}.jpg"
         executor.submit(tasks.run_task, run_id, str(image_path), topics, creators, model)
         return jsonify({"status": "accepted"}), 202

 topics = _load_json(JSON_INFO_DIR / "topics.json", {})
 topic_names = _load_json(JSON_INFO_DIR / "topic_names.json", {})
+# Debug logging for data loading
+print(f"📊 Data loaded:")
+print(f"📊   Sentences: {len(sentences)} entries")
+print(f"📊   Works: {len(works)} entries")
+print(f"📊   Topics: {len(topics)} entries")
+print(f"📊   Creators: {len(creators)} entries")
+print(f"📊   Topic names: {len(topic_names)} entries")
 # --------------------------------------------------------------------------- #
 #  Routes                                                                     #
     """
     Receives the image file upload for the given runId and saves it to disk.
     """
+    print(f"📤 Upload request for run {run_id}")
     if "file" not in request.files:
+        print(f"❌ No file in request for run {run_id}")
         return jsonify({"error": "no-file"}), 400
     file = request.files["file"]
     # Ensure artifacts directory exists
     os.makedirs(ARTIFACTS_DIR, exist_ok=True)
     # Save the file as artifacts/<runId>.jpg
     file_path = os.path.join(ARTIFACTS_DIR, f"{run_id}.jpg")
+    print(f"📤 Saving file to {file_path}")
     file.save(file_path)
     # Check file exists otherwise 400
     if not os.path.exists(file_path):
+        print(f"❌ File not saved for run {run_id}")
         return jsonify({"error": "file-not-saved"}), 500
+    print(f"✅ File saved successfully for run {run_id}")
     # Respond with 204 No Content (success, no response body)
     return "{}", 204
     - Launch background thread for processing
     """
     payload = request.get_json(force=True)
+    print(f"🔍 create_run called with payload: {payload}")
     run_id = payload["runId"]
     image_key = payload["imageKey"]
     topics = payload.get("topics", [])
     creators = payload.get("creators", [])
     model = payload.get("model", "paintingclip")
     now = datetime.now(timezone.utc).isoformat(timespec="seconds")
+    print(f"🔍 Parsed: run_id={run_id}, image_key={image_key}, topics={topics}, creators={creators}, model={model}")
     # Store initial run info in the in-memory dictionary
     with RUNS_LOCK:
         }
     if STUB_MODE:
+        print(f"🔍 Stub mode: generating fake results for {run_id}")
         # Write a tiny fake result so the UI flows
         results = {
             "runId": run_id,
             ],
         }
         out_path = OUTPUTS_DIR / f"{run_id}.json"
+        print(f"🔍 Stub mode: writing results to {out_path}")
         out_path.write_text(json.dumps(results, ensure_ascii=False, indent=2), encoding="utf-8")
         with RUNS_LOCK:
                 "finishedAt": now,
                 "updatedAt": now
             })
+        print(f"🔍 Stub mode: returning results directly for {run_id}")
         return jsonify(results), 200
     else:
         # Submit the background inference task to the thread pool
         image_path = ARTIFACTS_DIR / f"{run_id}.jpg"
+        print(f"🔍 Real ML mode: submitting task for {run_id} with image {image_path}")
+        print(f"🔍 Topics: {topics}, Creators: {creators}, Model: {model}")
         executor.submit(tasks.run_task, run_id, str(image_path), topics, creators, model)
         return jsonify({"status": "accepted"}), 202

backend/runner/filtering.py CHANGED Viewed

@@ -98,7 +98,7 @@ def apply_filters_to_results(
     Filter a list of results based on topics and creators.
     Args:
-        results: List of result dictionaries with 'sentence_id' field
         filter_topics: List of topic codes to filter by
         filter_creators: List of creator names to filter by
@@ -112,7 +112,7 @@ def apply_filters_to_results(
     # Filter results to only include valid sentences
     filtered_results = [
-        result for result in results if result.get("sentence_id") in valid_sentence_ids
     ]
     # Re-rank the filtered results

     Filter a list of results based on topics and creators.
     Args:
+        results: List of result dictionaries with 'id' field
         filter_topics: List of topic codes to filter by
         filter_creators: List of creator names to filter by
     # Filter results to only include valid sentences
     filtered_results = [
+        result for result in results if result.get("id") in valid_sentence_ids
     ]
     # Re-rank the filtered results

backend/runner/inference.py CHANGED Viewed

@@ -212,16 +212,30 @@ def _initialize_pipeline():
     model = model.eval()
     # Load pre-computed embeddings - USE CONSOLIDATED LOADING
-    if MODEL_TYPE == "clip":
-        embeddings, sentence_ids = load_embeddings_for_model("clip")
-    else:
-        embeddings, sentence_ids = load_embeddings_for_model("paintingclip")
-    if embeddings is None or sentence_ids is None:
-        raise ValueError(f"Failed to load embeddings for model type: {MODEL_TYPE}")
     # Load sentence metadata
-    sentences_data = _load_sentences_metadata(SENTENCES_JSON)
     return processor, model, embeddings, sentence_ids, sentences_data, device
@@ -295,6 +309,12 @@ def run_inference(
     filter_creators: List[str] = None,
     model_type: str = None,
 ) -> List[Dict[str, Any]]:
     """
     Perform semantic similarity search.
@@ -416,7 +436,7 @@ def run_inference(
         results.append(
             {
-                "sentence_id": sentence_id,
                 "score": float(score),
                 "english_original": sentence_data.get("English Original", "N/A"),
                 "work": work_id,
@@ -424,6 +444,10 @@ def run_inference(
             }
         )
     return results

     model = model.eval()
     # Load pre-computed embeddings - USE CONSOLIDATED LOADING
+    try:
+        if MODEL_TYPE == "clip":
+            embeddings, sentence_ids = load_embeddings_for_model("clip")
+        else:
+            embeddings, sentence_ids = load_embeddings_for_model("paintingclip")
+        if embeddings is None or sentence_ids is None:
+            raise ValueError(f"Failed to load embeddings for model type: {MODEL_TYPE}")
+        print(f"🔍 Loaded {len(sentence_ids)} embeddings with shape {embeddings.shape}")
+    except Exception as e:
+        print(f"❌ Error loading embeddings: {e}")
+        raise
     # Load sentence metadata
+    try:
+        sentences_data = _load_sentences_metadata(SENTENCES_JSON)
+        print(f"🔍 Loaded {len(sentences_data)} sentence metadata entries")
+        if sentences_data:
+            sample_key = next(iter(sentences_data.keys()))
+            print(f"🔍 Sample sentence data structure: {sentences_data[sample_key]}")
+    except Exception as e:
+        print(f"❌ Error loading sentence metadata: {e}")
+        sentences_data = {}
     return processor, model, embeddings, sentence_ids, sentences_data, device
     filter_creators: List[str] = None,
     model_type: str = None,
 ) -> List[Dict[str, Any]]:
+    print(f"🔍 run_inference called with:")
+    print(f"🔍   image_path: {image_path}")
+    print(f"🔍   cell: {cell}")
+    print(f"🔍   filter_topics: {filter_topics}")
+    print(f"🔍   filter_creators: {filter_creators}")
+    print(f"🔍   model_type: {model_type}")
     """
     Perform semantic similarity search.
         results.append(
             {
+                "id": sentence_id,  # Frontend expects "id", not "sentence_id"
                 "score": float(score),
                 "english_original": sentence_data.get("English Original", "N/A"),
                 "work": work_id,
             }
         )
+    print(f"🔍 run_inference returning {len(results)} results")
+    if results:
+        print(f"🔍 First result: {results[0]}")
     return results

backend/runner/patch_inference.py CHANGED Viewed

@@ -175,7 +175,7 @@ def rank_sentences_for_cell(
         work_id = sid.split("_")[0]
         out.append(
             {
-                "sentence_id": sid,
                 "score": float(sc),
                 "english_original": meta.get("English Original", ""),
                 "work": work_id,

         work_id = sid.split("_")[0]
         out.append(
             {
+                "id": sid,  # Frontend expects "id", not "sentence_id"
                 "score": float(sc),
                 "english_original": meta.get("English Original", ""),
                 "work": work_id,

backend/runner/tasks.py CHANGED Viewed

@@ -43,6 +43,9 @@ def run_task(
         creators: List of creator names to filter by (optional)
         model: Model type to use ("clip" or "paintingclip")
     """
     # Clear any cached images from patch inference
     try:
         from .patch_inference import _prepare_image
@@ -103,10 +106,13 @@ def run_task(
             runs[run_id]["updatedAt"] = runs[run_id]["finishedAt"]
             # Clear any previous error message if present
             runs[run_id].pop("errorMessage", None)
     except Exception as exc:
         # On any error, mark the run as failed and record the error message
-        print(f"Error in run {run_id}: {exc}")  # This should already be there
         import traceback
         traceback.print_exc()  # Add full traceback
@@ -118,3 +124,4 @@ def run_task(
                 runs[run_id]["updatedAt"] = datetime.now(timezone.utc).isoformat(
                     timespec="seconds"
                 )

         creators: List of creator names to filter by (optional)
         model: Model type to use ("clip" or "paintingclip")
     """
+    print(f"🚀 Starting task for run {run_id}")
+    print(f"🚀 Image path: {image_path}")
+    print(f"🚀 Topics: {topics}, Creators: {creators}, Model: {model}")
     # Clear any cached images from patch inference
     try:
         from .patch_inference import _prepare_image
             runs[run_id]["updatedAt"] = runs[run_id]["finishedAt"]
             # Clear any previous error message if present
             runs[run_id].pop("errorMessage", None)
+            print(f"✅ Task completed successfully for run {run_id}")
+            print(f"✅ Output saved to: {output_path}")
+            print(f"✅ Output key: {output_key}")
     except Exception as exc:
         # On any error, mark the run as failed and record the error message
+        print(f"❌ Error in run {run_id}: {exc}")  # This should already be there
         import traceback
         traceback.print_exc()  # Add full traceback
                 runs[run_id]["updatedAt"] = datetime.now(timezone.utc).isoformat(
                     timespec="seconds"
                 )
+                print(f"❌ Run {run_id} marked as error: {runs[run_id]['errorMessage']}")

backend/tests/test_patch_inference.py CHANGED Viewed

@@ -35,7 +35,7 @@ def print_results(results: List[Dict[str, Any]], title: str, max_display: int =
         print(f"\n{i}. {result['english_original'][:100]}...")
         print(f"   Score: {result['score']:.4f}")
         print(f"   Work: {result['work']}")
-        print(f"   ID: {result['sentence_id']}")
     if len(results) > max_display:
         print(f"\n... and {len(results) - max_display} more results")
@@ -151,7 +151,7 @@ def main():
     # Get the top sentence from whole image
     if whole_image_results:
-        top_sentence_id = whole_image_results[0]["sentence_id"]
         top_sentence_text = whole_image_results[0]["english_original"]
         print(f"Top whole-image sentence: {top_sentence_text[:80]}...")
@@ -175,7 +175,7 @@ def main():
             region_score = None
             region_rank = None
             for rank, result in enumerate(region_results, 1):
-                if result["sentence_id"] == top_sentence_id:
                     region_score = result["score"]
                     region_rank = rank
                     break

         print(f"\n{i}. {result['english_original'][:100]}...")
         print(f"   Score: {result['score']:.4f}")
         print(f"   Work: {result['work']}")
+        print(f"   ID: {result['id']}")
     if len(results) > max_display:
         print(f"\n... and {len(results) - max_display} more results")
     # Get the top sentence from whole image
     if whole_image_results:
+        top_sentence_id = whole_image_results[0]["id"]
         top_sentence_text = whole_image_results[0]["english_original"]
         print(f"Top whole-image sentence: {top_sentence_text[:80]}...")
             region_score = None
             region_rank = None
             for rank, result in enumerate(region_results, 1):
+                if result["id"] == top_sentence_id:
                     region_score = result["score"]
                     region_rank = rank
                     break

frontend/js/artefact-context.js CHANGED Viewed

@@ -515,7 +515,16 @@ function fetchPresign() {
          .then(response => {
           logWorkingMessage('Run registered successfully', 'text-white');
           $('#debugStatus').text('Run submitted');
-          pollRunStatus(runId);
          })
         .catch(err => {
           console.error('Upload or /runs error:', err);
@@ -562,6 +571,7 @@ function pollRunStatus(runId) {
               .then(res => res.json())
               .then(output => {
                 logWorkingMessage('Outputs received', 'text-white');
                 display_sentences(output);
                 $('#workingOverlay').addClass('d-none');
               })
@@ -603,6 +613,11 @@ function pollRunStatus(runId) {
  * @returns {string}
  */
 function escapeHTML(str) {
   return str.replace(/[&<>'"]/g, tag => (
     {'&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&#39;', '"': '&quot;'}[tag]
   ));
@@ -613,10 +628,14 @@ function escapeHTML(str) {
  * @param {Array|Object} data - Array of sentence objects or {sentences:[…]}
  */
 function display_sentences(data) {
   // normalise payload
   if (!Array.isArray(data)) {
     data = (data && Array.isArray(data.sentences)) ? data.sentences : [];
   }
   if (!data.length) {                       // nothing to show ⇒ just hide overlay
     $('#workingOverlay').addClass('d-none');
     return;
@@ -627,10 +646,18 @@ function display_sentences(data) {
   $('#sentenceList').empty();
   /* ---------- sentence list construction ---------- */
-  data.forEach(item => {
     const li = $(`
       <li class="list-group-item sentence-item mb-1"
-          data-work="${item.work}"
           data-sentence="${escapeHTML(item.english_original)}">
         <div class="d-flex align-items-center">
           <span class="flex-grow-1">${escapeHTML(item.english_original)}</span>

          .then(response => {
           logWorkingMessage('Run registered successfully', 'text-white');
           $('#debugStatus').text('Run submitted');
+          // If we got sentences directly (stub mode), display them
+          if (response.sentences && response.sentences.length > 0) {
+            logWorkingMessage('Stub mode: displaying sentences directly', 'text-white');
+            display_sentences(response);
+            $('#workingOverlay').addClass('d-none');
+          } else {
+            // Real ML mode: poll for results
+            pollRunStatus(runId);
+          }
          })
         .catch(err => {
           console.error('Upload or /runs error:', err);
               .then(res => res.json())
               .then(output => {
                 logWorkingMessage('Outputs received', 'text-white');
+                console.log('Raw output data:', output); // Debug logging
                 display_sentences(output);
                 $('#workingOverlay').addClass('d-none');
               })
  * @returns {string}
  */
 function escapeHTML(str) {
+  // Defensive: ensure str is a string
+  if (typeof str !== 'string') {
+    console.warn('escapeHTML called with non-string:', str);
+    return String(str || '');
+  }
   return str.replace(/[&<>'"]/g, tag => (
     {'&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&#39;', '"': '&quot;'}[tag]
   ));
  * @param {Array|Object} data - Array of sentence objects or {sentences:[…]}
  */
 function display_sentences(data) {
+  console.log('display_sentences called with:', data); // Debug logging
   // normalise payload
   if (!Array.isArray(data)) {
     data = (data && Array.isArray(data.sentences)) ? data.sentences : [];
   }
+  console.log('Normalized data:', data); // Debug logging
   if (!data.length) {                       // nothing to show ⇒ just hide overlay
     $('#workingOverlay').addClass('d-none');
     return;
   $('#sentenceList').empty();
   /* ---------- sentence list construction ---------- */
+  data.forEach((item, index) => {
+    console.log(`Processing item ${index}:`, item); // Debug logging
+    // Validate required fields
+    if (!item.english_original || typeof item.english_original !== 'string') {
+      console.warn(`Item ${index} has invalid english_original:`, item.english_original);
+      return; // Skip this item
+    }
     const li = $(`
       <li class="list-group-item sentence-item mb-1"
+          data-work="${item.work || 'unknown'}"
           data-sentence="${escapeHTML(item.english_original)}">
         <div class="d-flex align-items-center">
           <span class="flex-grow-1">${escapeHTML(item.english_original)}</span>