samwaugh commited on
Commit
4ac1f80
·
1 Parent(s): 9c01cdc

Agent attempted fix

Browse files
backend/runner/app.py CHANGED
@@ -103,6 +103,14 @@ creators = _load_json(JSON_INFO_DIR / "creators.json", {})
103
  topics = _load_json(JSON_INFO_DIR / "topics.json", {})
104
  topic_names = _load_json(JSON_INFO_DIR / "topic_names.json", {})
105
 
 
 
 
 
 
 
 
 
106
 
107
  # --------------------------------------------------------------------------- #
108
  # Routes #
@@ -182,17 +190,23 @@ def upload_file(run_id: str):
182
  """
183
  Receives the image file upload for the given runId and saves it to disk.
184
  """
 
 
185
  if "file" not in request.files:
 
186
  return jsonify({"error": "no-file"}), 400
187
  file = request.files["file"]
188
  # Ensure artifacts directory exists
189
  os.makedirs(ARTIFACTS_DIR, exist_ok=True)
190
  # Save the file as artifacts/<runId>.jpg
191
  file_path = os.path.join(ARTIFACTS_DIR, f"{run_id}.jpg")
 
192
  file.save(file_path)
193
  # Check file exists otherwise 400
194
  if not os.path.exists(file_path):
 
195
  return jsonify({"error": "file-not-saved"}), 500
 
196
  # Respond with 204 No Content (success, no response body)
197
  return "{}", 204
198
 
@@ -210,12 +224,16 @@ def create_run():
210
  - Launch background thread for processing
211
  """
212
  payload = request.get_json(force=True)
 
 
213
  run_id = payload["runId"]
214
  image_key = payload["imageKey"]
215
  topics = payload.get("topics", [])
216
  creators = payload.get("creators", [])
217
  model = payload.get("model", "paintingclip")
218
  now = datetime.now(timezone.utc).isoformat(timespec="seconds")
 
 
219
 
220
  # Store initial run info in the in-memory dictionary
221
  with RUNS_LOCK:
@@ -231,6 +249,7 @@ def create_run():
231
  }
232
 
233
  if STUB_MODE:
 
234
  # Write a tiny fake result so the UI flows
235
  results = {
236
  "runId": run_id,
@@ -248,6 +267,7 @@ def create_run():
248
  ],
249
  }
250
  out_path = OUTPUTS_DIR / f"{run_id}.json"
 
251
  out_path.write_text(json.dumps(results, ensure_ascii=False, indent=2), encoding="utf-8")
252
 
253
  with RUNS_LOCK:
@@ -257,10 +277,13 @@ def create_run():
257
  "finishedAt": now,
258
  "updatedAt": now
259
  })
 
260
  return jsonify(results), 200
261
  else:
262
  # Submit the background inference task to the thread pool
263
  image_path = ARTIFACTS_DIR / f"{run_id}.jpg"
 
 
264
  executor.submit(tasks.run_task, run_id, str(image_path), topics, creators, model)
265
  return jsonify({"status": "accepted"}), 202
266
 
 
103
  topics = _load_json(JSON_INFO_DIR / "topics.json", {})
104
  topic_names = _load_json(JSON_INFO_DIR / "topic_names.json", {})
105
 
106
+ # Debug logging for data loading
107
+ print(f"📊 Data loaded:")
108
+ print(f"📊 Sentences: {len(sentences)} entries")
109
+ print(f"📊 Works: {len(works)} entries")
110
+ print(f"📊 Topics: {len(topics)} entries")
111
+ print(f"📊 Creators: {len(creators)} entries")
112
+ print(f"📊 Topic names: {len(topic_names)} entries")
113
+
114
 
115
  # --------------------------------------------------------------------------- #
116
  # Routes #
 
190
  """
191
  Receives the image file upload for the given runId and saves it to disk.
192
  """
193
+ print(f"📤 Upload request for run {run_id}")
194
+
195
  if "file" not in request.files:
196
+ print(f"❌ No file in request for run {run_id}")
197
  return jsonify({"error": "no-file"}), 400
198
  file = request.files["file"]
199
  # Ensure artifacts directory exists
200
  os.makedirs(ARTIFACTS_DIR, exist_ok=True)
201
  # Save the file as artifacts/<runId>.jpg
202
  file_path = os.path.join(ARTIFACTS_DIR, f"{run_id}.jpg")
203
+ print(f"📤 Saving file to {file_path}")
204
  file.save(file_path)
205
  # Check file exists otherwise 400
206
  if not os.path.exists(file_path):
207
+ print(f"❌ File not saved for run {run_id}")
208
  return jsonify({"error": "file-not-saved"}), 500
209
+ print(f"✅ File saved successfully for run {run_id}")
210
  # Respond with 204 No Content (success, no response body)
211
  return "{}", 204
212
 
 
224
  - Launch background thread for processing
225
  """
226
  payload = request.get_json(force=True)
227
+ print(f"🔍 create_run called with payload: {payload}")
228
+
229
  run_id = payload["runId"]
230
  image_key = payload["imageKey"]
231
  topics = payload.get("topics", [])
232
  creators = payload.get("creators", [])
233
  model = payload.get("model", "paintingclip")
234
  now = datetime.now(timezone.utc).isoformat(timespec="seconds")
235
+
236
+ print(f"🔍 Parsed: run_id={run_id}, image_key={image_key}, topics={topics}, creators={creators}, model={model}")
237
 
238
  # Store initial run info in the in-memory dictionary
239
  with RUNS_LOCK:
 
249
  }
250
 
251
  if STUB_MODE:
252
+ print(f"🔍 Stub mode: generating fake results for {run_id}")
253
  # Write a tiny fake result so the UI flows
254
  results = {
255
  "runId": run_id,
 
267
  ],
268
  }
269
  out_path = OUTPUTS_DIR / f"{run_id}.json"
270
+ print(f"🔍 Stub mode: writing results to {out_path}")
271
  out_path.write_text(json.dumps(results, ensure_ascii=False, indent=2), encoding="utf-8")
272
 
273
  with RUNS_LOCK:
 
277
  "finishedAt": now,
278
  "updatedAt": now
279
  })
280
+ print(f"🔍 Stub mode: returning results directly for {run_id}")
281
  return jsonify(results), 200
282
  else:
283
  # Submit the background inference task to the thread pool
284
  image_path = ARTIFACTS_DIR / f"{run_id}.jpg"
285
+ print(f"🔍 Real ML mode: submitting task for {run_id} with image {image_path}")
286
+ print(f"🔍 Topics: {topics}, Creators: {creators}, Model: {model}")
287
  executor.submit(tasks.run_task, run_id, str(image_path), topics, creators, model)
288
  return jsonify({"status": "accepted"}), 202
289
 
backend/runner/filtering.py CHANGED
@@ -98,7 +98,7 @@ def apply_filters_to_results(
98
  Filter a list of results based on topics and creators.
99
 
100
  Args:
101
- results: List of result dictionaries with 'sentence_id' field
102
  filter_topics: List of topic codes to filter by
103
  filter_creators: List of creator names to filter by
104
 
@@ -112,7 +112,7 @@ def apply_filters_to_results(
112
 
113
  # Filter results to only include valid sentences
114
  filtered_results = [
115
- result for result in results if result.get("sentence_id") in valid_sentence_ids
116
  ]
117
 
118
  # Re-rank the filtered results
 
98
  Filter a list of results based on topics and creators.
99
 
100
  Args:
101
+ results: List of result dictionaries with 'id' field
102
  filter_topics: List of topic codes to filter by
103
  filter_creators: List of creator names to filter by
104
 
 
112
 
113
  # Filter results to only include valid sentences
114
  filtered_results = [
115
+ result for result in results if result.get("id") in valid_sentence_ids
116
  ]
117
 
118
  # Re-rank the filtered results
backend/runner/inference.py CHANGED
@@ -212,16 +212,30 @@ def _initialize_pipeline():
212
  model = model.eval()
213
 
214
  # Load pre-computed embeddings - USE CONSOLIDATED LOADING
215
- if MODEL_TYPE == "clip":
216
- embeddings, sentence_ids = load_embeddings_for_model("clip")
217
- else:
218
- embeddings, sentence_ids = load_embeddings_for_model("paintingclip")
 
219
 
220
- if embeddings is None or sentence_ids is None:
221
- raise ValueError(f"Failed to load embeddings for model type: {MODEL_TYPE}")
 
 
 
 
 
222
 
223
  # Load sentence metadata
224
- sentences_data = _load_sentences_metadata(SENTENCES_JSON)
 
 
 
 
 
 
 
 
225
 
226
  return processor, model, embeddings, sentence_ids, sentences_data, device
227
 
@@ -295,6 +309,12 @@ def run_inference(
295
  filter_creators: List[str] = None,
296
  model_type: str = None,
297
  ) -> List[Dict[str, Any]]:
 
 
 
 
 
 
298
  """
299
  Perform semantic similarity search.
300
 
@@ -416,7 +436,7 @@ def run_inference(
416
 
417
  results.append(
418
  {
419
- "sentence_id": sentence_id,
420
  "score": float(score),
421
  "english_original": sentence_data.get("English Original", "N/A"),
422
  "work": work_id,
@@ -424,6 +444,10 @@ def run_inference(
424
  }
425
  )
426
 
 
 
 
 
427
  return results
428
 
429
 
 
212
  model = model.eval()
213
 
214
  # Load pre-computed embeddings - USE CONSOLIDATED LOADING
215
+ try:
216
+ if MODEL_TYPE == "clip":
217
+ embeddings, sentence_ids = load_embeddings_for_model("clip")
218
+ else:
219
+ embeddings, sentence_ids = load_embeddings_for_model("paintingclip")
220
 
221
+ if embeddings is None or sentence_ids is None:
222
+ raise ValueError(f"Failed to load embeddings for model type: {MODEL_TYPE}")
223
+
224
+ print(f"🔍 Loaded {len(sentence_ids)} embeddings with shape {embeddings.shape}")
225
+ except Exception as e:
226
+ print(f"❌ Error loading embeddings: {e}")
227
+ raise
228
 
229
  # Load sentence metadata
230
+ try:
231
+ sentences_data = _load_sentences_metadata(SENTENCES_JSON)
232
+ print(f"🔍 Loaded {len(sentences_data)} sentence metadata entries")
233
+ if sentences_data:
234
+ sample_key = next(iter(sentences_data.keys()))
235
+ print(f"🔍 Sample sentence data structure: {sentences_data[sample_key]}")
236
+ except Exception as e:
237
+ print(f"❌ Error loading sentence metadata: {e}")
238
+ sentences_data = {}
239
 
240
  return processor, model, embeddings, sentence_ids, sentences_data, device
241
 
 
309
  filter_creators: List[str] = None,
310
  model_type: str = None,
311
  ) -> List[Dict[str, Any]]:
312
+ print(f"🔍 run_inference called with:")
313
+ print(f"🔍 image_path: {image_path}")
314
+ print(f"🔍 cell: {cell}")
315
+ print(f"🔍 filter_topics: {filter_topics}")
316
+ print(f"🔍 filter_creators: {filter_creators}")
317
+ print(f"🔍 model_type: {model_type}")
318
  """
319
  Perform semantic similarity search.
320
 
 
436
 
437
  results.append(
438
  {
439
+ "id": sentence_id, # Frontend expects "id", not "sentence_id"
440
  "score": float(score),
441
  "english_original": sentence_data.get("English Original", "N/A"),
442
  "work": work_id,
 
444
  }
445
  )
446
 
447
+ print(f"🔍 run_inference returning {len(results)} results")
448
+ if results:
449
+ print(f"🔍 First result: {results[0]}")
450
+
451
  return results
452
 
453
 
backend/runner/patch_inference.py CHANGED
@@ -175,7 +175,7 @@ def rank_sentences_for_cell(
175
  work_id = sid.split("_")[0]
176
  out.append(
177
  {
178
- "sentence_id": sid,
179
  "score": float(sc),
180
  "english_original": meta.get("English Original", ""),
181
  "work": work_id,
 
175
  work_id = sid.split("_")[0]
176
  out.append(
177
  {
178
+ "id": sid, # Frontend expects "id", not "sentence_id"
179
  "score": float(sc),
180
  "english_original": meta.get("English Original", ""),
181
  "work": work_id,
backend/runner/tasks.py CHANGED
@@ -43,6 +43,9 @@ def run_task(
43
  creators: List of creator names to filter by (optional)
44
  model: Model type to use ("clip" or "paintingclip")
45
  """
 
 
 
46
  # Clear any cached images from patch inference
47
  try:
48
  from .patch_inference import _prepare_image
@@ -103,10 +106,13 @@ def run_task(
103
  runs[run_id]["updatedAt"] = runs[run_id]["finishedAt"]
104
  # Clear any previous error message if present
105
  runs[run_id].pop("errorMessage", None)
 
 
 
106
 
107
  except Exception as exc:
108
  # On any error, mark the run as failed and record the error message
109
- print(f"Error in run {run_id}: {exc}") # This should already be there
110
  import traceback
111
 
112
  traceback.print_exc() # Add full traceback
@@ -118,3 +124,4 @@ def run_task(
118
  runs[run_id]["updatedAt"] = datetime.now(timezone.utc).isoformat(
119
  timespec="seconds"
120
  )
 
 
43
  creators: List of creator names to filter by (optional)
44
  model: Model type to use ("clip" or "paintingclip")
45
  """
46
+ print(f"🚀 Starting task for run {run_id}")
47
+ print(f"🚀 Image path: {image_path}")
48
+ print(f"🚀 Topics: {topics}, Creators: {creators}, Model: {model}")
49
  # Clear any cached images from patch inference
50
  try:
51
  from .patch_inference import _prepare_image
 
106
  runs[run_id]["updatedAt"] = runs[run_id]["finishedAt"]
107
  # Clear any previous error message if present
108
  runs[run_id].pop("errorMessage", None)
109
+ print(f"✅ Task completed successfully for run {run_id}")
110
+ print(f"✅ Output saved to: {output_path}")
111
+ print(f"✅ Output key: {output_key}")
112
 
113
  except Exception as exc:
114
  # On any error, mark the run as failed and record the error message
115
+ print(f"Error in run {run_id}: {exc}") # This should already be there
116
  import traceback
117
 
118
  traceback.print_exc() # Add full traceback
 
124
  runs[run_id]["updatedAt"] = datetime.now(timezone.utc).isoformat(
125
  timespec="seconds"
126
  )
127
+ print(f"❌ Run {run_id} marked as error: {runs[run_id]['errorMessage']}")
backend/tests/test_patch_inference.py CHANGED
@@ -35,7 +35,7 @@ def print_results(results: List[Dict[str, Any]], title: str, max_display: int =
35
  print(f"\n{i}. {result['english_original'][:100]}...")
36
  print(f" Score: {result['score']:.4f}")
37
  print(f" Work: {result['work']}")
38
- print(f" ID: {result['sentence_id']}")
39
 
40
  if len(results) > max_display:
41
  print(f"\n... and {len(results) - max_display} more results")
@@ -151,7 +151,7 @@ def main():
151
 
152
  # Get the top sentence from whole image
153
  if whole_image_results:
154
- top_sentence_id = whole_image_results[0]["sentence_id"]
155
  top_sentence_text = whole_image_results[0]["english_original"]
156
 
157
  print(f"Top whole-image sentence: {top_sentence_text[:80]}...")
@@ -175,7 +175,7 @@ def main():
175
  region_score = None
176
  region_rank = None
177
  for rank, result in enumerate(region_results, 1):
178
- if result["sentence_id"] == top_sentence_id:
179
  region_score = result["score"]
180
  region_rank = rank
181
  break
 
35
  print(f"\n{i}. {result['english_original'][:100]}...")
36
  print(f" Score: {result['score']:.4f}")
37
  print(f" Work: {result['work']}")
38
+ print(f" ID: {result['id']}")
39
 
40
  if len(results) > max_display:
41
  print(f"\n... and {len(results) - max_display} more results")
 
151
 
152
  # Get the top sentence from whole image
153
  if whole_image_results:
154
+ top_sentence_id = whole_image_results[0]["id"]
155
  top_sentence_text = whole_image_results[0]["english_original"]
156
 
157
  print(f"Top whole-image sentence: {top_sentence_text[:80]}...")
 
175
  region_score = None
176
  region_rank = None
177
  for rank, result in enumerate(region_results, 1):
178
+ if result["id"] == top_sentence_id:
179
  region_score = result["score"]
180
  region_rank = rank
181
  break
frontend/js/artefact-context.js CHANGED
@@ -515,7 +515,16 @@ function fetchPresign() {
515
  .then(response => {
516
  logWorkingMessage('Run registered successfully', 'text-white');
517
  $('#debugStatus').text('Run submitted');
518
- pollRunStatus(runId);
 
 
 
 
 
 
 
 
 
519
  })
520
  .catch(err => {
521
  console.error('Upload or /runs error:', err);
@@ -562,6 +571,7 @@ function pollRunStatus(runId) {
562
  .then(res => res.json())
563
  .then(output => {
564
  logWorkingMessage('Outputs received', 'text-white');
 
565
  display_sentences(output);
566
  $('#workingOverlay').addClass('d-none');
567
  })
@@ -603,6 +613,11 @@ function pollRunStatus(runId) {
603
  * @returns {string}
604
  */
605
  function escapeHTML(str) {
 
 
 
 
 
606
  return str.replace(/[&<>'"]/g, tag => (
607
  {'&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&#39;', '"': '&quot;'}[tag]
608
  ));
@@ -613,10 +628,14 @@ function escapeHTML(str) {
613
  * @param {Array|Object} data - Array of sentence objects or {sentences:[…]}
614
  */
615
  function display_sentences(data) {
 
 
616
  // normalise payload
617
  if (!Array.isArray(data)) {
618
  data = (data && Array.isArray(data.sentences)) ? data.sentences : [];
619
  }
 
 
620
  if (!data.length) { // nothing to show ⇒ just hide overlay
621
  $('#workingOverlay').addClass('d-none');
622
  return;
@@ -627,10 +646,18 @@ function display_sentences(data) {
627
  $('#sentenceList').empty();
628
 
629
  /* ---------- sentence list construction ---------- */
630
- data.forEach(item => {
 
 
 
 
 
 
 
 
631
  const li = $(`
632
  <li class="list-group-item sentence-item mb-1"
633
- data-work="${item.work}"
634
  data-sentence="${escapeHTML(item.english_original)}">
635
  <div class="d-flex align-items-center">
636
  <span class="flex-grow-1">${escapeHTML(item.english_original)}</span>
 
515
  .then(response => {
516
  logWorkingMessage('Run registered successfully', 'text-white');
517
  $('#debugStatus').text('Run submitted');
518
+
519
+ // If we got sentences directly (stub mode), display them
520
+ if (response.sentences && response.sentences.length > 0) {
521
+ logWorkingMessage('Stub mode: displaying sentences directly', 'text-white');
522
+ display_sentences(response);
523
+ $('#workingOverlay').addClass('d-none');
524
+ } else {
525
+ // Real ML mode: poll for results
526
+ pollRunStatus(runId);
527
+ }
528
  })
529
  .catch(err => {
530
  console.error('Upload or /runs error:', err);
 
571
  .then(res => res.json())
572
  .then(output => {
573
  logWorkingMessage('Outputs received', 'text-white');
574
+ console.log('Raw output data:', output); // Debug logging
575
  display_sentences(output);
576
  $('#workingOverlay').addClass('d-none');
577
  })
 
613
  * @returns {string}
614
  */
615
  function escapeHTML(str) {
616
+ // Defensive: ensure str is a string
617
+ if (typeof str !== 'string') {
618
+ console.warn('escapeHTML called with non-string:', str);
619
+ return String(str || '');
620
+ }
621
  return str.replace(/[&<>'"]/g, tag => (
622
  {'&': '&amp;', '<': '&lt;', '>': '&gt;', "'": '&#39;', '"': '&quot;'}[tag]
623
  ));
 
628
  * @param {Array|Object} data - Array of sentence objects or {sentences:[…]}
629
  */
630
  function display_sentences(data) {
631
+ console.log('display_sentences called with:', data); // Debug logging
632
+
633
  // normalise payload
634
  if (!Array.isArray(data)) {
635
  data = (data && Array.isArray(data.sentences)) ? data.sentences : [];
636
  }
637
+ console.log('Normalized data:', data); // Debug logging
638
+
639
  if (!data.length) { // nothing to show ⇒ just hide overlay
640
  $('#workingOverlay').addClass('d-none');
641
  return;
 
646
  $('#sentenceList').empty();
647
 
648
  /* ---------- sentence list construction ---------- */
649
+ data.forEach((item, index) => {
650
+ console.log(`Processing item ${index}:`, item); // Debug logging
651
+
652
+ // Validate required fields
653
+ if (!item.english_original || typeof item.english_original !== 'string') {
654
+ console.warn(`Item ${index} has invalid english_original:`, item.english_original);
655
+ return; // Skip this item
656
+ }
657
+
658
  const li = $(`
659
  <li class="list-group-item sentence-item mb-1"
660
+ data-work="${item.work || 'unknown'}"
661
  data-sentence="${escapeHTML(item.english_original)}">
662
  <div class="d-flex align-items-center">
663
  <span class="flex-grow-1">${escapeHTML(item.english_original)}</span>