oops / gpt.py

Upload folder using huggingface_hub

75f0bc0 verified about 2 months ago

7.11 kB

	import os
	import base64
	import openai
	from time import sleep
	from tqdm import tqdm
	import re
	Q_RE = re.compile(r"^\s([1-7])\.\s.+", re.M)

	def looks_complete(ans: str) -> bool:
	if not ans or not ans.strip():
	return False
	found = {int(m.group(1)) for m in re.finditer(r"^\s*([1-7])\.", ans, flags=re.M)}
	return found == set(range(1, 8))

	# -------------------------------------------------------------------
	# Setup
	# -------------------------------------------------------------------
	openai.api_key = "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"

	# -------------------------------------------------------------------
	# Helper: encode image as base64
	# -------------------------------------------------------------------
	def encode_image(image_path: str) -> str:
	with open(image_path, "rb") as f:
	return base64.b64encode(f.read()).decode("utf-8")

	# -------------------------------------------------------------------
	# Main function
	# -------------------------------------------------------------------
	def analyze_obstacles_in_folder(
	image_dir: str,
	output_path: str,
	model: str = "gpt-5",
	temperature: float = 0.2,
	sleep_time: float = 0.5,
	):
	"""
	For each .png in image_dir, send the image to GPT with the obstacle prompt
	and write results to output_path.
	"""
	images_to_regenerate = [
	"Safety_Cone_Pos1_OOPS0.png"
	]


	image_paths = [
	os.path.join(image_dir, f)
	for f in os.listdir(image_dir)
	if f.lower().endswith(".png") and f in images_to_regenerate
	]
	image_paths.sort()


	if not image_paths:
	print(f"No .png images found in {image_dir}")
	return

	# The list of 8 questions – stays in user message
	questions_prompt = (
	"Answer ALL 7 questions.\n"
	"FORMAT REQUIREMENT (must follow exactly):\n"
	"- Output exactly 7 lines.\n"
	"- Each line MUST start with the question number and a dot: '1.' '2.' ... '7.'\n"
	"- Do NOT add extra lines, bullets, or commentary.\n"
	"- If you are unsure, still answer with 'Unknown.' (do not leave blank).\n\n"
	"1. Identify the obstacle on the sidewalk or walkable path ahead.\n\n"
	"2. If I keep walking, will I collide with this obstacle? "
	"Response: \"Yes\" or \"No.\"\n\n"
	"3. If relevant, specify where it is on the path: "
	"{far left, center left, middle, center right, right}. "
	"Also include approximate direction relative to the user: {left / front / right / behind}.\n\n"
	"4. If a collision is likely, describe how I should walk to avoid it using simple "
	"directional instructions. If no collision is likely, say clear passway.\n\n"
	"5. How should this obstacle be moved or repositioned so that it no longer poses "
	"danger to pedestrians and returns to its default or home location? "
	"Example: \"Move the sign closer to the building wall\" or "
	"\"Place the bin at the curb edge.\"\n\n"
	"6. How predictable or anticipated is this obstacle’s presence at its exact "
	"location? Response: One of {expected / somewhat unexpected / clearly out of place}.\n\n "
	"7. Identify the single object most likely to be hit by a pedestrian moving straight ahead, "
	"and assign it a formal Out-of-Place Score (0–100) based only on its position:\n"
	"0 = perfectly expected position (default/home location)\n"
	"50 = somewhat out of place from where it is typically expected\n"
	"100 = completely out of place and highly surprising\n\n"
	)

	# Correct system message (your exact paragraph)
	system_message_text = (
	"I am fully blind. You are a mobility assistant who analyzes the scene "
	"and describes obstacles for safe navigation. Be concise and accurate."
	)

	with open(output_path, "a", encoding="utf-8") as out_f:
	for img_path in tqdm(image_paths, desc="Processing images"):
	try:
	img_b64 = encode_image(img_path)

	max_retries = 3
	answer = ""

	for attempt in range(1, max_retries + 1):
	response = openai.ChatCompletion.create(
	model=model,
	messages=[
	{"role": "system", "content": system_message_text},
	{
	"role": "user",
	"content": [
	{"type": "text", "text": questions_prompt},
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_b64}"}},
	],
	},
	],
	max_completion_tokens=2048
	)

	answer = response.choices[0].message.content or ""

	# success: has 1..7
	if looks_complete(answer):
	break

	# If empty/incomplete, retry with a stricter nudge
	if attempt < max_retries:
	questions_prompt_retry = questions_prompt + (
	"\nIMPORTANT: Your previous response was empty or missing some numbered lines. "
	"Output EXACTLY 7 lines, numbered 1. to 7., no extra text. "
	"If unsure, write 'Unknown.'\n"
	)
	questions_prompt = questions_prompt_retry
	sleep(0.5) # small backoff


	out_f.write(f"IMAGE: {img_path}\n")
	out_f.write(answer.strip() + "\n")
	out_f.write("\n" + "-" * 80 + "\n\n")
	out_f.flush()

	sleep(sleep_time)

	except Exception as e:
	print(f"Error processing {img_path}: {e}")
	out_f.write(f"IMAGE: {img_path}\n")
	out_f.write(f"ERROR: {e}\n")
	out_f.write("\n" + "-" * 80 + "\n\n")
	out_f.flush()

	print(f"Done. Results saved to {output_path}")

	# -------------------------------------------------------------------
	# CLI
	# -------------------------------------------------------------------
	if __name__ == "__main__":
	import argparse

	parser = argparse.ArgumentParser(description="Process PNG images with GPT.")
	parser.add_argument("--image_dir", required=True)
	parser.add_argument("--output", required=True)
	parser.add_argument("--model", default="gpt-5")
	parser.add_argument("--temperature", type=float, default=0.2)
	parser.add_argument("--sleep", type=float, default=1.0)

	args = parser.parse_args()

	analyze_obstacles_in_folder(
	args.image_dir,
	args.output,
	model=args.model,
	temperature=args.temperature,
	sleep_time=args.sleep,
	)