ISR

Paused

zye0616 Claude Opus 4.6 (1M context) commited on 21 days ago

Commit

75ec737

1 Parent(s): 240e068

test: add AssessDetections BAML tests with real SAR frame

Two test cases using an actual flood rescue aerial frame:
- SAR_PersonOnRooftop: person on rooftop satisfies rescue mission
- SAR_DogNotMissionTarget: neither person nor dog satisfies cargo mission

Also fixes AssessDetections prompt to use proper system/user roles
for OpenAI image compatibility.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (3) hide show

baml_client/inlinedbaml.py +1 -1
baml_src/fixtures/sar_rooftop.jpg +3 -0
baml_src/isr.baml +74 -2

baml_client/inlinedbaml.py CHANGED Viewed

@@ -14,7 +14,7 @@ _file_map = {
     "clients.baml": "// ISR LLM clients\n\nclient<llm> GPT4oMini {\n  provider openai\n  retry_policy Retry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n    temperature 0.1\n  }\n}\n\nclient<llm> GPT4o {\n  provider openai\n  retry_policy Retry\n  options {\n    model \"gpt-4o\"\n    api_key env.OPENAI_API_KEY\n    temperature 0.2\n  }\n}\n\nretry_policy Retry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n    delay_ms 500\n    multiplier 2.0\n    max_delay_ms 5000\n  }\n}\n",
     "generators.baml": "// This helps use auto generate libraries you can use in the language of\n// your choice. You can have multiple generators if you use multiple languages.\n// Just ensure that the output_dir is different for each generator.\ngenerator target {\n    // Valid values: \"python/pydantic\", \"typescript\", \"go\", \"rust\", \"ruby/sorbet\", \"rest/openapi\"\n    output_type \"python/pydantic\"\n\n    // Where the generated code will be saved (relative to baml_src/)\n    output_dir \"../\"\n\n    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).\n    // The BAML VSCode extension version should also match this version.\n    version \"0.220.0\"\n\n    // Valid values: \"sync\", \"async\"\n    // This controls what `b.FunctionName()` will be (sync or async).\n    default_client_mode sync\n}\n",
-    "isr.baml": "// ISR Mission Planning & Assessment Functions\n\n// ── Mission Planning ─────────────────────────────────────────────\n// Takes a free-form mission objective and produces:\n// 1. Concrete object class queries for the detector (YOLO/DETR/GDINO)\n// 2. A refined mission statement for downstream assessment\n\nclass MissionPlan {\n  detector_queries string[] @description(\"Object class labels to feed to the detector. Use common COCO class names (person, car, truck, bicycle, motorcycle, bus, dog, cat, etc.) or short noun phrases for open-vocabulary detectors. 3-8 items.\")\n  refined_mission string @description(\"A clear, one-sentence restatement of the mission objective that an analyst can evaluate each detection against.\")\n  reasoning string @description(\"Brief explanation of why these queries were chosen.\")\n}\n\nfunction PlanMission(mission_text: string) -> MissionPlan {\n  client GPT4oMini\n  prompt #\"\n    You are an ISR (Intelligence, Surveillance, Reconnaissance) mission planner.\n\n    Given a free-form mission objective, determine:\n    1. What object classes a visual detector should look for (use common COCO labels when possible: person, car, truck, bus, motorcycle, bicycle, dog, cat, backpack, suitcase, etc.)\n    2. A refined mission statement that an analyst can use to evaluate each detection.\n\n    Mission objective: \"{{ mission_text }}\"\n\n    {{ ctx.output_format }}\n  \"#\n}\n\n\n// ── Detection Assessment ─────────────────────────────────────────\n// Replaces hand-rolled JSON parsing with type-safe BAML output\n\nclass DetectionInfo {\n  track_id string\n  class_label string\n  bbox_width_px int\n  bbox_height_px int\n  area_ratio float @description(\"Fraction of frame area occupied by this detection\")\n  speed_kph float\n  direction string @description(\"Clock direction (e.g. '3h' for rightward) or 'unknown'\")\n}\n\nclass DetectionVerdict {\n  track_id string\n  mission_relevant bool @description(\"Does this CLASS of object relate to the mission at all?\")\n  satisfies bool? @description(\"Does THIS SPECIFIC detection meet the mission criteria? null if uncertain.\")\n  reason string @description(\"1-2 sentences explaining the assessment\")\n  features map<string, string> @description(\"2-5 key-value pairs of observable properties relevant to the mission\")\n}\n\n// ── PlanMission Tests ────────────────────────────────────────────\n\ntest HeavyCargoVehicles {\n  functions [PlanMission]\n  args {\n    mission_text \"identify vehicles that can carry heavy cargos\"\n  }\n  @@assert( {{ this.detector_queries|length >= 3 }} )\n  @@assert( {{ \"truck\" in this.detector_queries }} )\n}\n\ntest PersonOnRooftop {\n  functions [PlanMission]\n  args {\n    mission_text \"identify person stranded on rooftop\"\n  }\n  @@assert( {{ this.detector_queries|length >= 3 }} )\n  @@assert( {{ \"person\" in this.detector_queries }} )\n}\n\n\n// ── Detection Assessment ─────────────────────────────────────────\n// Replaces hand-rolled JSON parsing with type-safe BAML output\n\nfunction AssessDetections(mission: string, detections: DetectionInfo[], frame_image: image) -> DetectionVerdict[] {\n  client GPT4oMini\n  prompt #\"\n    {_.role(\"user\")}\n\n    You are an ISR analyst assessing aerial drone detections against a mission objective.\n\n    Mission: \"{{ mission }}\"\n\n    Detected objects:\n    {% for d in detections %}\n    - {{ d.track_id }}: class={{ d.class_label }}, bbox={{ d.bbox_width_px }}x{{ d.bbox_height_px }}px, area_ratio={{ d.area_ratio }}, speed={{ d.speed_kph }}kph, direction={{ d.direction }}\n    {% endfor %}\n\n    Frame context (showing all detections):\n    {{ frame_image }}\n\n    Assess each detection against the mission.\n\n    {{ ctx.output_format }}\n  \"#\n}\n",
 }
 def get_baml_files():

     "clients.baml": "// ISR LLM clients\n\nclient<llm> GPT4oMini {\n  provider openai\n  retry_policy Retry\n  options {\n    model \"gpt-4o-mini\"\n    api_key env.OPENAI_API_KEY\n    temperature 0.1\n  }\n}\n\nclient<llm> GPT4o {\n  provider openai\n  retry_policy Retry\n  options {\n    model \"gpt-4o\"\n    api_key env.OPENAI_API_KEY\n    temperature 0.2\n  }\n}\n\nretry_policy Retry {\n  max_retries 2\n  strategy {\n    type exponential_backoff\n    delay_ms 500\n    multiplier 2.0\n    max_delay_ms 5000\n  }\n}\n",
     "generators.baml": "// This helps use auto generate libraries you can use in the language of\n// your choice. You can have multiple generators if you use multiple languages.\n// Just ensure that the output_dir is different for each generator.\ngenerator target {\n    // Valid values: \"python/pydantic\", \"typescript\", \"go\", \"rust\", \"ruby/sorbet\", \"rest/openapi\"\n    output_type \"python/pydantic\"\n\n    // Where the generated code will be saved (relative to baml_src/)\n    output_dir \"../\"\n\n    // The version of the BAML package you have installed (e.g. same version as your baml-py or @boundaryml/baml).\n    // The BAML VSCode extension version should also match this version.\n    version \"0.220.0\"\n\n    // Valid values: \"sync\", \"async\"\n    // This controls what `b.FunctionName()` will be (sync or async).\n    default_client_mode sync\n}\n",
+    "isr.baml": "// ISR Mission Planning & Assessment Functions\n\n// ── Mission Planning ─────────────────────────────────────────────\n// Takes a free-form mission objective and produces:\n// 1. Concrete object class queries for the detector (YOLO/DETR/GDINO)\n// 2. A refined mission statement for downstream assessment\n\nclass MissionPlan {\n  detector_queries string[] @description(\"Object class labels to feed to the detector. Use common COCO class names (person, car, truck, bicycle, motorcycle, bus, dog, cat, etc.) or short noun phrases for open-vocabulary detectors. 3-8 items.\")\n  refined_mission string @description(\"A clear, one-sentence restatement of the mission objective that an analyst can evaluate each detection against.\")\n  reasoning string @description(\"Brief explanation of why these queries were chosen.\")\n}\n\nfunction PlanMission(mission_text: string) -> MissionPlan {\n  client GPT4oMini\n  prompt #\"\n    You are an ISR (Intelligence, Surveillance, Reconnaissance) mission planner.\n\n    Given a free-form mission objective, determine:\n    1. What object classes a visual detector should look for (use common COCO labels when possible: person, car, truck, bus, motorcycle, bicycle, dog, cat, backpack, suitcase, etc.)\n    2. A refined mission statement that an analyst can use to evaluate each detection.\n\n    Mission objective: \"{{ mission_text }}\"\n\n    {{ ctx.output_format }}\n  \"#\n}\n\n\n// ── Detection Assessment ─────────────────────────────────────────\n// Replaces hand-rolled JSON parsing with type-safe BAML output\n\nclass DetectionInfo {\n  track_id string\n  class_label string\n  bbox_width_px int\n  bbox_height_px int\n  area_ratio float @description(\"Fraction of frame area occupied by this detection\")\n  speed_kph float\n  direction string @description(\"Clock direction (e.g. '3h' for rightward) or 'unknown'\")\n}\n\nclass DetectionVerdict {\n  track_id string\n  mission_relevant bool @description(\"Does this CLASS of object relate to the mission at all?\")\n  satisfies bool? @description(\"Does THIS SPECIFIC detection meet the mission criteria? null if uncertain.\")\n  reason string @description(\"1-2 sentences explaining the assessment\")\n  features map<string, string> @description(\"2-5 key-value pairs of observable properties relevant to the mission\")\n}\n\n// ── PlanMission Tests ────────────────────────────────────────────\n\ntest HeavyCargoVehicles {\n  functions [PlanMission]\n  args {\n    mission_text \"identify vehicles that can carry heavy cargos\"\n  }\n  @@assert( {{ this.detector_queries|length >= 3 }} )\n  @@assert( {{ \"truck\" in this.detector_queries }} )\n}\n\ntest PersonOnRooftop {\n  functions [PlanMission]\n  args {\n    mission_text \"identify person stranded on rooftop\"\n  }\n  @@assert( {{ this.detector_queries|length >= 3 }} )\n  @@assert( {{ \"person\" in this.detector_queries }} )\n}\n\n\n// ── Detection Assessment ─────────────────────────────────────────\n// Replaces hand-rolled JSON parsing with type-safe BAML output\n\nfunction AssessDetections(mission: string, detections: DetectionInfo[], frame_image: image) -> DetectionVerdict[] {\n  client GPT4oMini\n  prompt #\"\n    {{ _.role(\"system\") }}\n    You are an ISR analyst assessing aerial drone detections against a mission objective.\n\n    {{ _.role(\"user\") }}\n    Mission: \"{{ mission }}\"\n\n    Detected objects:\n    {% for d in detections %}\n    - {{ d.track_id }}: class={{ d.class_label }}, bbox={{ d.bbox_width_px }}x{{ d.bbox_height_px }}px, area_ratio={{ d.area_ratio }}, speed={{ d.speed_kph }}kph, direction={{ d.direction }}\n    {% endfor %}\n\n    Frame context (showing all detections):\n    {{ frame_image }}\n\n    Assess each detection against the mission.\n\n    {{ ctx.output_format }}\n  \"#\n}\n\n// ── AssessDetections Tests ───────────────────────────────────────\n// SAR flood scene: person + dog on rooftop, surrounded by floodwater\n\ntest SAR_PersonOnRooftop {\n  functions [AssessDetections]\n  args {\n    mission \"identify person stranded on rooftop needing rescue\"\n    detections [\n      {\n        track_id \"T01\"\n        class_label \"person\"\n        bbox_width_px 120\n        bbox_height_px 280\n        area_ratio 0.0059\n        speed_kph 0.0\n        direction \"unknown\"\n      },\n      {\n        track_id \"T02\"\n        class_label \"dog\"\n        bbox_width_px 80\n        bbox_height_px 50\n        area_ratio 0.0007\n        speed_kph 0.0\n        direction \"unknown\"\n      }\n    ]\n    frame_image {\n      file \"fixtures/sar_rooftop.jpg\"\n      media_type \"image/jpeg\"\n    }\n  }\n  @@assert( {{ this|length == 2 }} )\n  @@assert( {{ this[0].track_id == \"T01\" }} )\n  @@assert( {{ this[0].mission_relevant == true }} )\n  @@assert( {{ this[0].satisfies == true }} )\n}\n\ntest SAR_DogNotMissionTarget {\n  functions [AssessDetections]\n  args {\n    mission \"identify vehicles capable of transporting heavy cargo\"\n    detections [\n      {\n        track_id \"T01\"\n        class_label \"person\"\n        bbox_width_px 120\n        bbox_height_px 280\n        area_ratio 0.0059\n        speed_kph 0.0\n        direction \"unknown\"\n      },\n      {\n        track_id \"T02\"\n        class_label \"dog\"\n        bbox_width_px 80\n        bbox_height_px 50\n        area_ratio 0.0007\n        speed_kph 0.0\n        direction \"unknown\"\n      }\n    ]\n    frame_image {\n      file \"fixtures/sar_rooftop.jpg\"\n      media_type \"image/jpeg\"\n    }\n  }\n  @@assert( {{ this|length == 2 }} )\n  @@assert( {{ this[0].satisfies != true }} )\n  @@assert( {{ this[1].satisfies != true }} )\n}\n",
 }
 def get_baml_files():

baml_src/fixtures/sar_rooftop.jpg ADDED Viewed

Git LFS Details

SHA256: 3eb820eaf8ffb1861c6a3e7574c673f4eeec553bf1cdbc9fcdaad962e5392d35
Pointer size: 131 Bytes
Size of remote file: 269 kB

baml_src/isr.baml CHANGED Viewed

@@ -75,10 +75,10 @@ test PersonOnRooftop {
 function AssessDetections(mission: string, detections: DetectionInfo[], frame_image: image) -> DetectionVerdict[] {
   client GPT4oMini
   prompt #"
-    {_.role("user")}
     You are an ISR analyst assessing aerial drone detections against a mission objective.
     Mission: "{{ mission }}"
     Detected objects:
@@ -94,3 +94,75 @@ function AssessDetections(mission: string, detections: DetectionInfo[], frame_im
     {{ ctx.output_format }}
   "#
 }

 function AssessDetections(mission: string, detections: DetectionInfo[], frame_image: image) -> DetectionVerdict[] {
   client GPT4oMini
   prompt #"
+    {{ _.role("system") }}
     You are an ISR analyst assessing aerial drone detections against a mission objective.
+    {{ _.role("user") }}
     Mission: "{{ mission }}"
     Detected objects:
     {{ ctx.output_format }}
   "#
 }
+// ── AssessDetections Tests ───────────────────────────────────────
+// SAR flood scene: person + dog on rooftop, surrounded by floodwater
+test SAR_PersonOnRooftop {
+  functions [AssessDetections]
+  args {
+    mission "identify person stranded on rooftop needing rescue"
+    detections [
+      {
+        track_id "T01"
+        class_label "person"
+        bbox_width_px 120
+        bbox_height_px 280
+        area_ratio 0.0059
+        speed_kph 0.0
+        direction "unknown"
+      },
+      {
+        track_id "T02"
+        class_label "dog"
+        bbox_width_px 80
+        bbox_height_px 50
+        area_ratio 0.0007
+        speed_kph 0.0
+        direction "unknown"
+      }
+    ]
+    frame_image {
+      file "fixtures/sar_rooftop.jpg"
+      media_type "image/jpeg"
+    }
+  }
+  @@assert( {{ this|length == 2 }} )
+  @@assert( {{ this[0].track_id == "T01" }} )
+  @@assert( {{ this[0].mission_relevant == true }} )
+  @@assert( {{ this[0].satisfies == true }} )
+}
+test SAR_DogNotMissionTarget {
+  functions [AssessDetections]
+  args {
+    mission "identify vehicles capable of transporting heavy cargo"
+    detections [
+      {
+        track_id "T01"
+        class_label "person"
+        bbox_width_px 120
+        bbox_height_px 280
+        area_ratio 0.0059
+        speed_kph 0.0
+        direction "unknown"
+      },
+      {
+        track_id "T02"
+        class_label "dog"
+        bbox_width_px 80
+        bbox_height_px 50
+        area_ratio 0.0007
+        speed_kph 0.0
+        direction "unknown"
+      }
+    ]
+    frame_image {
+      file "fixtures/sar_rooftop.jpg"
+      media_type "image/jpeg"
+    }
+  }
+  @@assert( {{ this|length == 2 }} )
+  @@assert( {{ this[0].satisfies != true }} )
+  @@assert( {{ this[1].satisfies != true }} )
+}