Hola-Gordon commited on
Commit
23ee959
·
1 Parent(s): 22888d4

Still tuning accuracy of detection prompts to reduce false positives

Browse files
Files changed (4) hide show
  1. .DS_Store +0 -0
  2. .gitignore +0 -0
  3. main2.py +552 -4
  4. promt_yaml.md → prompt_yaml.md +53 -15
.DS_Store ADDED
Binary file (8.2 kB). View file
 
.gitignore ADDED
File without changes
main2.py CHANGED
@@ -592,7 +592,8 @@ def setup_directories():
592
  output_dirs = {
593
  "standard": os.path.join("output", "standard"),
594
  "shifted": os.path.join("output", "shifted"),
595
- "crops": os.path.join("output", "crops")
 
596
  }
597
 
598
  # Create the images directory if it doesn't exist
@@ -729,6 +730,387 @@ def process_images(images_dir, output_dir, image_files):
729
  opacity=opacity
730
  )
731
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
732
  def main():
733
  """
734
  Main function to run the grid numbering script.
@@ -736,21 +1118,186 @@ def main():
736
  # Setup directories
737
  images_dir, output_dirs = setup_directories()
738
 
 
 
 
 
 
 
 
739
  # Get image files
740
  image_files = get_image_files(images_dir)
741
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
742
  if image_files:
743
  # Choose which operation to perform
744
- operation = "grid" # Options: "grid", "shift", "crop", "crop_cells", "all"
745
 
746
- if operation == "grid" or operation == "all":
 
747
  print("\nProcessing images with standard grid pattern...")
748
  process_images(images_dir, output_dirs["standard"], image_files)
749
 
750
- if operation == "shift" or operation == "all":
 
751
  print("\nProcessing images with shifted grid pattern...")
752
  process_images_with_shift(images_dir, output_dirs["shifted"], image_files)
753
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
754
  if operation == "crop" or operation == "all":
755
  print("\nCropping images around specific dots...")
756
  # Example: crop around these dot numbers
@@ -772,5 +1319,6 @@ def main():
772
  else:
773
  print("No images to process.")
774
 
 
775
  if __name__ == "__main__":
776
  main()
 
592
  output_dirs = {
593
  "standard": os.path.join("output", "standard"),
594
  "shifted": os.path.join("output", "shifted"),
595
+ "crops": os.path.join("output", "crops"),
596
+ "verification": os.path.join("output", "verification")
597
  }
598
 
599
  # Create the images directory if it doesn't exist
 
730
  opacity=opacity
731
  )
732
 
733
+ def convert_to_supported_format(input_path, output_format="jpg"):
734
+ """
735
+ Convert an image to a format supported by OpenAI's Vision API.
736
+
737
+ Args:
738
+ input_path (str): Path to the input image
739
+ output_format (str): Output format ('jpg', 'png', 'webp', or 'gif')
740
+
741
+ Returns:
742
+ str: Path to the converted image
743
+ """
744
+ try:
745
+ # Read the image
746
+ img = cv2.imread(input_path)
747
+ if img is None:
748
+ try:
749
+ pil_img = Image.open(input_path)
750
+ img = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
751
+ except Exception as e:
752
+ print(f"Error: Could not read image with PIL either: {e}")
753
+ return None
754
+
755
+ # Create output path
756
+ file_dir = os.path.dirname(input_path)
757
+ file_name, _ = os.path.splitext(os.path.basename(input_path))
758
+ output_path = os.path.join(file_dir, f"{file_name}.{output_format}")
759
+
760
+ # Save in the new format
761
+ cv2.imwrite(output_path, img)
762
+ print(f"Converted image saved to {output_path}")
763
+
764
+ return output_path
765
+ except Exception as e:
766
+ print(f"Error converting image: {e}")
767
+ return None
768
+
769
+
770
+ def resize_image_if_needed(image_path, max_size=4096):
771
+ """Resize an image if either dimension exceeds max_size"""
772
+ img = cv2.imread(image_path)
773
+ if img is None:
774
+ return image_path
775
+
776
+ height, width = img.shape[:2]
777
+ if max(height, width) > max_size:
778
+ # Calculate new dimensions
779
+ if width > height:
780
+ new_width = max_size
781
+ new_height = int(height * (max_size / width))
782
+ else:
783
+ new_height = max_size
784
+ new_width = int(width * (max_size / height))
785
+
786
+ # Resize the image
787
+ img_resized = cv2.resize(img, (new_width, new_height))
788
+
789
+ # Save the resized image
790
+ file_dir = os.path.dirname(image_path)
791
+ file_name, file_ext = os.path.splitext(os.path.basename(image_path))
792
+ output_path = os.path.join(file_dir, f"{file_name}_resized{file_ext}")
793
+ cv2.imwrite(output_path, img_resized)
794
+ print(f"Resized image saved to {output_path}")
795
+ return output_path
796
+
797
+ return image_path
798
+
799
+
800
+ def call_openai_api(standard_grid_path, shifted_grid_path, prompt_text, api_key):
801
+ """
802
+ Call the OpenAI API with both grid images in a single request.
803
+
804
+ Args:
805
+ standard_grid_path: Path to the standard grid image
806
+ shifted_grid_path: Path to the shifted grid image
807
+ prompt_text: The prompt text to send to the API
808
+ api_key: Your OpenAI API key
809
+
810
+ Returns:
811
+ The API response text
812
+ """
813
+ import openai
814
+ import base64
815
+ from openai import OpenAI
816
+
817
+ # After converting format
818
+ standard_grid_path = resize_image_if_needed(standard_grid_path)
819
+ shifted_grid_path = resize_image_if_needed(shifted_grid_path)
820
+
821
+ # Initialize the client with your API key
822
+ client = OpenAI(api_key=api_key)
823
+
824
+ # Convert images to supported formats if needed
825
+ if standard_grid_path.lower().endswith(('.tiff', '.tif')):
826
+ standard_grid_path = convert_to_supported_format(standard_grid_path, "jpg")
827
+
828
+ if shifted_grid_path.lower().endswith(('.tiff', '.tif')):
829
+ shifted_grid_path = convert_to_supported_format(shifted_grid_path, "jpg")
830
+
831
+ # Read and encode the images
832
+ def encode_image(image_path):
833
+ with open(image_path, "rb") as image_file:
834
+ return base64.b64encode(image_file.read()).decode('utf-8')
835
+
836
+ standard_grid_base64 = encode_image(standard_grid_path)
837
+ shifted_grid_base64 = encode_image(shifted_grid_path)
838
+
839
+ # Prepare the messages payload
840
+ messages = [
841
+ {
842
+ "role": "system",
843
+ "content": "You are a search and rescue assistant analyzing aerial imagery."
844
+ },
845
+ {
846
+ "role": "user",
847
+ "content": [
848
+ {
849
+ "type": "text",
850
+ "text": prompt_text
851
+ },
852
+ {
853
+ "type": "image_url",
854
+ "image_url": {
855
+ "url": f"data:image/jpeg;base64,{standard_grid_base64}",
856
+ "detail": "high"
857
+ }
858
+ },
859
+ {
860
+ "type": "image_url",
861
+ "image_url": {
862
+ "url": f"data:image/jpeg;base64,{shifted_grid_base64}",
863
+ "detail": "high"
864
+ }
865
+ }
866
+ ]
867
+ }
868
+ ]
869
+
870
+ # Call the API
871
+ response = client.chat.completions.create(
872
+ model="gpt-4o", # Latest model that supports vision
873
+ messages=messages,
874
+ max_tokens=2000
875
+ )
876
+
877
+ return response.choices[0].message.content
878
+
879
+ def draw_boundary_around_person(image_path, output_path, dot_number, x_offset=0, y_offset=0,
880
+ width_percent=0.3, height_percent=0.3, grid_rows=5, grid_cols=5):
881
+ """Draw a more precise boundary within a grid cell"""
882
+ # Read the image
883
+ img = cv2.imread(image_path)
884
+ height, width = img.shape[:2]
885
+
886
+ # Calculate cell dimensions
887
+ cell_height = height // grid_rows
888
+ cell_width = width // grid_cols
889
+
890
+ # Calculate cell position
891
+ row = (dot_number - 1) // grid_cols
892
+ col = (dot_number - 1) % grid_cols
893
+
894
+ # Calculate cell center
895
+ center_x = (col * cell_width) + (cell_width // 2)
896
+ center_y = (row * cell_height) + (cell_height // 2)
897
+
898
+ # Calculate smaller boundary within the cell
899
+ box_width = int(cell_width * width_percent)
900
+ box_height = int(cell_height * height_percent)
901
+
902
+ # Apply offset from center if provided
903
+ x1 = center_x - (box_width // 2) + x_offset
904
+ y1 = center_y - (box_height // 2) + y_offset
905
+ x2 = x1 + box_width
906
+ y2 = y1 + box_height
907
+
908
+ # Draw the boundary
909
+ img_with_boundary = img.copy()
910
+ cv2.rectangle(img_with_boundary, (x1, y1), (x2, y2), (0, 255, 0), 2)
911
+
912
+ # Save the image
913
+ cv2.imwrite(output_path, img_with_boundary)
914
+
915
+ return img_with_boundary
916
+
917
+
918
+ def draw_focused_boundary(image_path, output_path, dot_number, grid_rows=5, grid_cols=5,
919
+ boundary_color=(0, 255, 0), focus_factor=0.5):
920
+ """Draw a boundary focused on the center portion of a grid cell"""
921
+ img = cv2.imread(image_path)
922
+ if img is None:
923
+ return None
924
+
925
+ height, width = img.shape[:2]
926
+ cell_height = height // grid_rows
927
+ cell_width = width // grid_cols
928
+
929
+ # Calculate grid position
930
+ row = (dot_number - 1) // grid_cols
931
+ col = (dot_number - 1) % grid_cols
932
+
933
+ # Calculate original cell boundaries
934
+ cell_x1 = col * cell_width
935
+ cell_y1 = row * cell_height
936
+ cell_x2 = cell_x1 + cell_width
937
+ cell_y2 = cell_y1 + cell_height
938
+
939
+ # Calculate focused area within cell
940
+ center_x = cell_x1 + (cell_width // 2)
941
+ center_y = cell_y1 + (cell_height // 2)
942
+
943
+ focus_width = int(cell_width * focus_factor)
944
+ focus_height = int(cell_height * focus_factor)
945
+
946
+ x1 = center_x - (focus_width // 2)
947
+ y1 = center_y - (focus_height // 2)
948
+ x2 = x1 + focus_width
949
+ y2 = y1 + focus_height
950
+
951
+ # Draw the boundary
952
+ img_copy = img.copy()
953
+
954
+ # Draw full cell with thin line
955
+ cv2.rectangle(img_copy, (cell_x1, cell_y1), (cell_x2, cell_y2), (0, 150, 0), 1)
956
+
957
+ # Draw focused area with thicker line
958
+ cv2.rectangle(img_copy, (x1, y1), (x2, y2), boundary_color, 2)
959
+
960
+ # Add label
961
+ label = f"Person detected (Cell {dot_number})"
962
+ cv2.putText(img_copy, label, (cell_x1, cell_y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, boundary_color, 2)
963
+
964
+ cv2.imwrite(output_path, img_copy)
965
+ return img_copy
966
+
967
+ def parse_api_response(api_response):
968
+ """
969
+ Parse the YAML response from the OpenAI API to extract the recommended zoom area.
970
+
971
+ Args:
972
+ api_response (str): The raw text response from the API
973
+
974
+ Returns:
975
+ int: The recommended grid number to zoom in on, or None if not found
976
+ """
977
+ import yaml
978
+ import re
979
+
980
+ # Try to extract directly using regex
981
+ best_detection_match = re.search(r'best_detection:.*?(\d+)', api_response, re.DOTALL)
982
+ recommended_area_match = re.search(r'recommended_zoom_area:\s*(\d+)', api_response, re.DOTALL)
983
+
984
+ if recommended_area_match:
985
+ return int(recommended_area_match.group(1))
986
+ elif best_detection_match:
987
+ return int(best_detection_match.group(1))
988
+
989
+ # If regex failed, try YAML parsing with error handling
990
+ try:
991
+ # Try to extract YAML content
992
+ yaml_match = re.search(r'```yaml\n(.*?)\n```', api_response, re.DOTALL)
993
+ if yaml_match:
994
+ yaml_content = yaml_match.group(1)
995
+
996
+ # Clean up potentially problematic YAML
997
+ # Replace "best_detection: standard_grid: 18" with "best_detection: 'standard_grid: 18'"
998
+ yaml_content = re.sub(r'best_detection:\s+(.*?):\s+(\d+)', r'best_detection: "\1: \2"', yaml_content)
999
+
1000
+ # Parse the YAML
1001
+ try:
1002
+ result = yaml.safe_load(yaml_content)
1003
+
1004
+ # Get the recommended zoom area
1005
+ zoom_area = result.get('final_determination', {}).get('recommended_zoom_area')
1006
+ if zoom_area:
1007
+ return int(zoom_area)
1008
+
1009
+ # Try alternate location
1010
+ best_detection = result.get('final_determination', {}).get('best_detection')
1011
+ if best_detection and isinstance(best_detection, str):
1012
+ # Extract number from string like "standard_grid: 18"
1013
+ number_match = re.search(r'(\d+)', best_detection)
1014
+ if number_match:
1015
+ return int(number_match.group(1))
1016
+ except yaml.YAMLError as e:
1017
+ print(f"YAML parsing error: {e}")
1018
+
1019
+ # Try to extract the number directly
1020
+ number_match = re.search(r'recommended_zoom_area:\s*(\d+)', yaml_content)
1021
+ if number_match:
1022
+ return int(number_match.group(1))
1023
+ except Exception as e:
1024
+ print(f"Error during response parsing: {e}")
1025
+
1026
+ # If all else fails, look for any number after "detection" or "area"
1027
+ any_number = re.search(r'(detection|area).*?(\d+)', api_response, re.DOTALL | re.IGNORECASE)
1028
+ if any_number:
1029
+ return int(any_number.group(2))
1030
+
1031
+ return None
1032
+
1033
+
1034
+ def verify_detection(image_path, prompt_text, api_key):
1035
+ """Second pass verification of a potential person detection"""
1036
+ import base64
1037
+ from openai import OpenAI
1038
+
1039
+ # Convert to supported formats and resize if needed
1040
+ if image_path.lower().endswith(('.tiff', '.tif')):
1041
+ image_path = convert_to_supported_format(image_path, "jpg")
1042
+
1043
+ image_path = resize_image_if_needed(image_path)
1044
+
1045
+ # Initialize API client
1046
+ client = OpenAI(api_key=api_key)
1047
+
1048
+ # Encode the image
1049
+ with open(image_path, "rb") as image_file:
1050
+ base64_image = base64.b64encode(image_file.read()).decode('utf-8')
1051
+
1052
+ # Call the API
1053
+ messages = [
1054
+ {
1055
+ "role": "system",
1056
+ "content": "You are a search and rescue imagery analyst specializing in detecting humans in aerial photography."
1057
+ },
1058
+ {
1059
+ "role": "user",
1060
+ "content": [
1061
+ {
1062
+ "type": "text",
1063
+ "text": prompt_text
1064
+ },
1065
+ {
1066
+ "type": "image_url",
1067
+ "image_url": {
1068
+ "url": f"data:image/jpeg;base64,{base64_image}",
1069
+ "detail": "high"
1070
+ }
1071
+ }
1072
+ ]
1073
+ }
1074
+ ]
1075
+
1076
+ response = client.chat.completions.create(
1077
+ model="gpt-4o",
1078
+ messages=messages,
1079
+ max_tokens=1000
1080
+ )
1081
+
1082
+ return response.choices[0].message.content
1083
+
1084
+ def check_verification_result(response_text):
1085
+ """Analyze verification response to determine if it's really a person"""
1086
+ import re
1087
+
1088
+ # Check for confident affirmative language
1089
+ positive_indicators = ['definitely a person', 'clearly a human', 'confident this is a person',
1090
+ 'human figure is visible', 'can confirm this is a person']
1091
+
1092
+ # Check for negative language
1093
+ negative_indicators = ['not a person', 'false positive', 'no human', 'just a', 'likely just',
1094
+ 'probably just', 'appears to be a rock', 'vegetation', 'shadow', 'no evidence']
1095
+
1096
+ response_lower = response_text.lower()
1097
+
1098
+ # Count indicators
1099
+ positive_count = sum(1 for term in positive_indicators if term in response_lower)
1100
+ negative_count = sum(1 for term in negative_indicators if term in response_lower)
1101
+
1102
+ # Extract any confidence statements
1103
+ confidence_match = re.search(r'confidence:?\s*(high|medium|low)', response_lower)
1104
+ confidence = confidence_match.group(1) if confidence_match else None
1105
+
1106
+ # Decision logic
1107
+ if positive_count > negative_count and (confidence != 'low'):
1108
+ return True
1109
+ elif 'yes' in response_lower[:100] and negative_count == 0:
1110
+ return True
1111
+ else:
1112
+ return False
1113
+
1114
  def main():
1115
  """
1116
  Main function to run the grid numbering script.
 
1118
  # Setup directories
1119
  images_dir, output_dirs = setup_directories()
1120
 
1121
+ # Add a new directory for final results
1122
+ results_dir = os.path.join("output", "results")
1123
+ if not os.path.exists(results_dir):
1124
+ os.makedirs(results_dir)
1125
+ print(f"Created results directory: {results_dir}")
1126
+ output_dirs["results"] = results_dir
1127
+
1128
  # Get image files
1129
  image_files = get_image_files(images_dir)
1130
 
1131
+ # Get API key from environment
1132
+ api_key = os.getenv("OPENAI_API_KEY")
1133
+
1134
+ # Fallback to a hardcoded key if environment variable is not set
1135
+ if not api_key:
1136
+ api_key = "HARD CODED API" # This is a fallback
1137
+ print("Warning: Using hardcoded API key. Better to set OPENAI_API_KEY environment variable.")
1138
+
1139
+ # Read the prompt from the markdown file
1140
+ try:
1141
+ with open("prompt_yaml.md", "r") as f:
1142
+ prompt_text = f.read()
1143
+ print("Successfully loaded prompt from prompt_yaml.md")
1144
+ except Exception as e:
1145
+ print(f"Error reading prompt file: {e}")
1146
+ return
1147
+
1148
+ # Read the verification prompt if it exists
1149
+ try:
1150
+ with open("verification_prompt.md", "r") as f:
1151
+ verification_prompt = f.read()
1152
+ print("Successfully loaded verification prompt")
1153
+ except:
1154
+ # Use a default verification prompt if file doesn't exist
1155
+ verification_prompt = """
1156
+ I'm showing you a zoomed-in section of an aerial image where a person might be present.
1157
+
1158
+ Please carefully analyze this image and determine if there is actually a human present.
1159
+
1160
+ Important considerations:
1161
+ 1. Look for definitive human shapes, limbs, or clothing
1162
+ 2. Be skeptical - many natural features can look like people from above
1163
+ 3. Consider whether this might be a false positive (rock, tree stump, shadow, etc.)
1164
+
1165
+ Provide your assessment with high, medium, or low confidence and explain your reasoning.
1166
+ """
1167
+ print("Using default verification prompt")
1168
+
1169
  if image_files:
1170
  # Choose which operation to perform
1171
+ operation = "all" # Options: "grid", "shift", "crop", "crop_cells", "all", "api"
1172
 
1173
+ # Process images with standard grid if required
1174
+ if operation == "grid" or operation == "all" or operation == "api":
1175
  print("\nProcessing images with standard grid pattern...")
1176
  process_images(images_dir, output_dirs["standard"], image_files)
1177
 
1178
+ # Process images with shifted grid if required
1179
+ if operation == "shift" or operation == "all" or operation == "api":
1180
  print("\nProcessing images with shifted grid pattern...")
1181
  process_images_with_shift(images_dir, output_dirs["shifted"], image_files)
1182
 
1183
+ # Perform API analysis if requested
1184
+ if operation == "api" or operation == "all":
1185
+ print("\nPerforming API analysis with both grid patterns...")
1186
+ for image_file in image_files:
1187
+ file_name, file_ext = os.path.splitext(image_file)
1188
+
1189
+ # Get paths to the generated grid images
1190
+ standard_grid_path = os.path.join(output_dirs["standard"], f"{file_name}_grid{file_ext}")
1191
+
1192
+ # Use the correct path for the shifted grid image (without "_shifted" in the filename)
1193
+ shifted_grid_path = os.path.join(output_dirs["shifted"], f"{file_name}_grid{file_ext}")
1194
+
1195
+ # Check if both images exist
1196
+ if not (os.path.exists(standard_grid_path) and os.path.exists(shifted_grid_path)):
1197
+ print(f"Error: Grid images not found for {image_file}. Run grid and shift operations first.")
1198
+ print(f"Looked for: {standard_grid_path} and {shifted_grid_path}")
1199
+ continue
1200
+
1201
+ print(f"\nAnalyzing grid patterns for {image_file}...")
1202
+ try:
1203
+ # Step 1: Initial detection
1204
+ api_response = call_openai_api(
1205
+ standard_grid_path,
1206
+ shifted_grid_path,
1207
+ prompt_text,
1208
+ api_key
1209
+ )
1210
+
1211
+ # Save the API response to a file
1212
+ response_path = os.path.join(output_dirs["standard"], f"{file_name}_analysis.txt")
1213
+ with open(response_path, "w") as f:
1214
+ f.write(api_response)
1215
+
1216
+ print(f"API response saved to {response_path}")
1217
+
1218
+ # Parse the response to extract recommended zoom area
1219
+ recommended_area = parse_api_response(api_response)
1220
+ if recommended_area:
1221
+ print(f"Potential person detected near number {recommended_area}")
1222
+
1223
+ # Step 2: Create a zoomed image for verification
1224
+ verification_path = os.path.join(output_dirs["verification"], f"{file_name}_verify_{recommended_area}.jpg")
1225
+ crop_image_around_dot(
1226
+ os.path.join(images_dir, image_file),
1227
+ verification_path,
1228
+ recommended_area,
1229
+ grid_rows=5,
1230
+ grid_cols=5,
1231
+ crop_factor=1.5 # Tighter crop for verification
1232
+ )
1233
+
1234
+ # Convert to jpg if needed for API
1235
+ if verification_path.lower().endswith(('.tiff', '.tif')):
1236
+ verification_path = convert_to_supported_format(verification_path, "jpg")
1237
+
1238
+ # Step 3: Verify the detection with a second API call
1239
+ print("Verifying potential detection...")
1240
+ verification_response = verify_detection(verification_path, verification_prompt, api_key)
1241
+
1242
+ # Save verification response
1243
+ verify_resp_path = os.path.join(output_dirs["verification"], f"{file_name}_verify_{recommended_area}_response.txt")
1244
+ with open(verify_resp_path, "w") as f:
1245
+ f.write(verification_response)
1246
+
1247
+ # Step 4: Check verification result
1248
+ is_person = check_verification_result(verification_response)
1249
+
1250
+ if is_person:
1251
+ print(f"CONFIRMED: Person detected in cell {recommended_area}")
1252
+
1253
+ # Draw boundary around the detected person with focused area
1254
+ boundary_path = os.path.join(output_dirs["results"], f"{file_name}_person_detected{file_ext}")
1255
+ draw_focused_boundary(
1256
+ os.path.join(images_dir, image_file),
1257
+ boundary_path,
1258
+ recommended_area,
1259
+ grid_rows=5,
1260
+ grid_cols=5,
1261
+ focus_factor=0.6 # Draw boundary around 60% of the cell
1262
+ )
1263
+ print(f"Image with person boundary saved to {boundary_path}")
1264
+
1265
+ # Create the zoomed crop
1266
+ crop_path = os.path.join(output_dirs["crops"], f"{file_name}_zoom_{recommended_area}{file_ext}")
1267
+ crop_image_around_dot(
1268
+ os.path.join(images_dir, image_file),
1269
+ crop_path,
1270
+ recommended_area,
1271
+ grid_rows=5,
1272
+ grid_cols=5,
1273
+ crop_factor=2.0 # Adjust as needed
1274
+ )
1275
+ print(f"Zoomed image saved to {crop_path}")
1276
+ else:
1277
+ print(f"FALSE POSITIVE: Initial detection in cell {recommended_area} appears to be incorrect.")
1278
+ # Save a rejected detection image for reference
1279
+ rejected_path = os.path.join(output_dirs["results"], f"{file_name}_rejected_{recommended_area}{file_ext}")
1280
+ # To this:
1281
+ draw_boundary_around_person(
1282
+ os.path.join(images_dir, image_file),
1283
+ rejected_path,
1284
+ recommended_area,
1285
+ x_offset=0,
1286
+ y_offset=0,
1287
+ width_percent=0.3,
1288
+ height_percent=0.3,
1289
+ grid_rows=5,
1290
+ grid_cols=5
1291
+ )
1292
+ print(f"Rejected detection saved to {rejected_path}")
1293
+ else:
1294
+ print("Could not identify a potential person in the image.")
1295
+
1296
+ except Exception as e:
1297
+ print(f"Error in API processing: {e}")
1298
+ print(f"Exception details: {str(e)}")
1299
+
1300
+ # Perform manual cropping if requested
1301
  if operation == "crop" or operation == "all":
1302
  print("\nCropping images around specific dots...")
1303
  # Example: crop around these dot numbers
 
1319
  else:
1320
  print("No images to process.")
1321
 
1322
+
1323
  if __name__ == "__main__":
1324
  main()
promt_yaml.md → prompt_yaml.md RENAMED
@@ -5,15 +5,32 @@ You are a search-and-rescue assistant deployed in a wilderness environment. Your
5
 
6
  The missing person may be wearing outdoor or winter gear and could be **lying down, standing, or partially obscured** by vegetation or terrain. Visibility may be reduced due to tree cover, rocks, shadows, or snow.
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  #### Objective:
9
- - **Carefully the image**
10
  - Identify the **nearest number most likely has a human figure or human-like features**.
11
  - You may see only **parts of a human body** (like a head, torso, arm, or leg), or clothing that stands out from the natural environment.
12
  - The individual may appear **small or camouflaged**, so analyze closely.
 
 
13
 
14
- #### Input:
15
- - A single image that has been **given numbers** with proper intervals.
16
- - The numbers are given such that the area can be identified using nearest number.
 
 
17
 
18
  #### What to look for:
19
  - **Skin tones**, **shoes**, **backpacks**, or **bright clothing**.
@@ -26,17 +43,38 @@ The missing person may be wearing outdoor or winter gear and could be **lying do
26
  1. Nearest **integer number(s)** a human or human-like feature is most likely detected.
27
  2. **Short justification** for your choice (e.g., “visible figure in red jacket lying near a rock” or “unusual shape with color contrast suggesting a backpack”).
28
  3. If unsure, list **top 2-3 most suspicious numbers** near the human in descending order of confidence.
 
29
 
30
  #### 📝 Format:
31
  ```yaml
32
- likely_human_near_number: 40
33
- confidence: High
34
- reason: "There is a human-shaped figure wearing dark clothing near a cleared area with fallen logs near the number 19"
35
-
36
- alternative_candidates:
37
- - near_number: 57
38
- confidence: Medium
39
- reason: "Bright object that may be clothing or gear, partially hidden by trees."
40
- - near_number: 43
41
- confidence: Low
42
- reason: "Dark form resembling a crouching figure, but could be a shadow or rock."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  The missing person may be wearing outdoor or winter gear and could be **lying down, standing, or partially obscured** by vegetation or terrain. Visibility may be reduced due to tree cover, rocks, shadows, or snow.
7
 
8
+ ## Important Aerial Imagery Considerations:
9
+ - People viewed from above appear very different than from ground level
10
+ - Look for these specific indicators of human presence:
11
+ - **Body shape**: Oval or elongated shapes that contrast with surroundings
12
+ - **Limbs**: Linear extensions from a central mass (arms/legs)
13
+ - **Clothing**: Artificial colors like bright blues, reds, yellows that contrast with nature
14
+ - **Shadow patterns**: Human-shaped shadows distinct from vegetation
15
+ - Common false positives to avoid:
16
+ - Fallen logs or tree branches (often mistaken for bodies)
17
+ - Animal trails or small clearings
18
+ - Rock formations or terrain features
19
+ - Shadows from trees and other vegetation
20
+
21
  #### Objective:
22
+ - **Carefully examine the image**
23
  - Identify the **nearest number most likely has a human figure or human-like features**.
24
  - You may see only **parts of a human body** (like a head, torso, arm, or leg), or clothing that stands out from the natural environment.
25
  - The individual may appear **small or camouflaged**, so analyze closely.
26
+ - Compare both images to account for potential obstruction by the numbered circles
27
+ - If a human figure is visible in one image but not the other, indicate which image and number
28
 
29
+ #### Input:
30
+ - Two versions of the same aerial image, both with numbered grids:
31
+ - Image 1 (standard_grid): Standard grid pattern with numbered circles
32
+ - Image 2 (shifted_grid): Same grid pattern but shifted slightly to ensure no person is hidden behind circles
33
+ - Please analyze both images and determine if a human is present in either or both images
34
 
35
  #### What to look for:
36
  - **Skin tones**, **shoes**, **backpacks**, or **bright clothing**.
 
43
  1. Nearest **integer number(s)** a human or human-like feature is most likely detected.
44
  2. **Short justification** for your choice (e.g., “visible figure in red jacket lying near a rock” or “unusual shape with color contrast suggesting a backpack”).
45
  3. If unsure, list **top 2-3 most suspicious numbers** near the human in descending order of confidence.
46
+ 4. If no human is detected in either image, please state so clearly while maintaining the YAML format with "None" values where appropriate.
47
 
48
  #### 📝 Format:
49
  ```yaml
50
+ # Analysis of both grid patterns
51
+ standard_grid:
52
+ likely_human_near_number: [number]
53
+ confidence: [High/Medium/Low]
54
+ reason: "[Detailed description of what you see and why it appears human]"
55
+ alternative_candidates:
56
+ - near_number: [number]
57
+ confidence: [Medium/Low]
58
+ reason: "[Description of what makes this suspicious]"
59
+ - near_number: [number]
60
+ confidence: [Medium/Low]
61
+ reason: "[Description of what makes this suspicious]"
62
+
63
+ shifted_grid:
64
+ likely_human_near_number: [number]
65
+ confidence: [High/Medium/Low]
66
+ reason: "[Detailed description of what you see and why it appears human]"
67
+ alternative_candidates:
68
+ - near_number: [number]
69
+ confidence: [Medium/Low]
70
+ reason: "[Description of what makes this suspicious]"
71
+ - near_number: [number]
72
+ confidence: [Medium/Low]
73
+ reason: "[Description of what makes this suspicious]"
74
+
75
+ # Combined assessment
76
+ final_determination:
77
+ best_detection: [standard_grid: number] OR [shifted_grid: number]
78
+ confidence: [High/Medium/Low]
79
+ reason: "[Explanation of why this is the most reliable detection]"
80
+ recommended_zoom_area: [number]