| import os |
| import base64 |
| import openai |
| from time import sleep |
| from tqdm import tqdm |
|
|
| |
| |
| |
| openai.api_key = "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" |
|
|
| |
| |
| |
| def encode_image(image_path: str) -> str: |
| with open(image_path, "rb") as f: |
| return base64.b64encode(f.read()).decode("utf-8") |
|
|
| |
| |
| |
| def analyze_obstacles_in_folder( |
| image_dir: str, |
| output_path: str, |
| model: str = "gpt-5", |
| temperature: float = 1, |
| sleep_time: float = 1.0, |
| ): |
| """ |
| For each .png in image_dir, send the image to GPT with the obstacle prompt |
| and write results to output_path. |
| """ |
| image_paths = [ |
| os.path.join(image_dir, f) |
| for f in os.listdir(image_dir) |
| if f.lower().endswith(".png") |
| ] |
| image_paths.sort() |
|
|
| image_rest_names=["Flower_pot_Pos5_OOPS0.png","GarbageBag_Set1_Pos1_OOPS0.png","Safety_Cone_Pos3_OOPS1.png","trashcan_in_0L.png"] |
|
|
| if not image_paths: |
| print(f"No .png images found in {image_dir}") |
| return |
|
|
| |
| questions_prompt = ( |
| "1. Identify the obstacle on the sidewalk or walkable path ahead. " |
| "2.Identify the single object most likely to be hit by a pedestrian moving straight ahead, and assign it a formal Out-of-Place Score (0–100) based only on its position.\n" |
| "Scoring scale:\n" |
| "0 = perfectly expected position (default/home location)\n" |
| "50 = somewhat out of place from where it is typically expected\n" |
| "100 = completely out of place and highly surprising\n\n" |
| ) |
|
|
| |
| system_message_text = ( |
| "I am fully blind. You are a mobility assistant who analyzes the scene " |
| "and describes obstacles for safe navigation. Be concise and accurate. " |
| ) |
|
|
| with open(output_path, "a", encoding="utf-8") as out_f: |
| for img_path in tqdm(image_paths, desc="Processing images"): |
| cont_flag=True |
| for fname in image_rest_names: |
| if fname in img_path: |
| cont_flag=False |
| break |
| if cont_flag: |
| continue |
| try: |
| img_b64 = encode_image(img_path) |
|
|
| response = openai.ChatCompletion.create( |
| model=model, |
| messages=[ |
| { |
| "role": "system", |
| "content": system_message_text, |
| }, |
| { |
| "role": "user", |
| "content": [ |
| {"type": "text", "text": questions_prompt}, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{img_b64}" |
| }, |
| }, |
| ], |
| }, |
| ], |
| max_completion_tokens=2048 |
| ) |
|
|
| answer = response.choices[0].message.content |
|
|
| out_f.write(f"IMAGE: {img_path}\n") |
| out_f.write(answer.strip() + "\n") |
| out_f.write("\n" + "-" * 80 + "\n\n") |
| out_f.flush() |
|
|
| sleep(sleep_time) |
|
|
| except Exception as e: |
| print(f"Error processing {img_path}: {e}") |
| out_f.write(f"IMAGE: {img_path}\n") |
| out_f.write(f"ERROR: {e}\n") |
| out_f.write("\n" + "-" * 80 + "\n\n") |
| out_f.flush() |
|
|
| print(f"Done. Results saved to {output_path}") |
|
|
| |
| |
| |
| if __name__ == "__main__": |
| import argparse |
|
|
| parser = argparse.ArgumentParser(description="Process PNG images with GPT.") |
| parser.add_argument("--image_dir", required=True) |
| parser.add_argument("--output", required=True) |
| parser.add_argument("--model", default="gpt-5") |
| parser.add_argument("--temperature", type=float, default=0.2) |
| parser.add_argument("--sleep", type=float, default=1.0) |
|
|
| args = parser.parse_args() |
|
|
| analyze_obstacles_in_folder( |
| args.image_dir, |
| args.output, |
| model=args.model, |
| temperature=args.temperature, |
| sleep_time=args.sleep, |
| ) |
|
|