Upload folder using huggingface_hub

b27cd24 verified about 2 months ago

6.9 kB

	import json
	import os
	import base64
	from time import sleep
	from tqdm import tqdm
	import openai

	# Prefer environment variable; remove hardcoded keys for safety.
	openai.api_key = "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"

	def encode_image(image_path):
	with open(image_path, "rb") as image_file:
	return base64.b64encode(image_file.read()).decode("utf-8")

	def _build_annotation_prompt():
	"""
	Returns a compact, deterministic prompt with the exact questions and options
	used by the GUI tool, and asks for strict JSON output.
	"""
	# Questions and options mirrored from gui1.py (keep these in lockstep). :contentReference[oaicite:2]{index=2}
	return (
	"You are an expert at analyzing a single image of a line of people. "
	"Answer the following 17 questions STRICTLY as a single JSON object. "
	"Use the exact keys provided, and for multiple-choice fields choose ONE "
	"of the listed options verbatim. If something is not visible, pick the most appropriate option (e.g., 'N/A').\n\n"
	"Return ONLY JSON. No prose.\n\n"
	"Definitions:\n"
	"- Start of the line (front): the person closest to the counter or service point. "
	"This is usually the direction the line is facing towards.\n"
	"- End of the line (back): the person farthest from the counter or service point. "
	"This is usually the last person to join the line.\n\n"
	"Fields:\n"
	'1) "number_of_turns": integer\n'
	'2) "line_shape": one of ["Straight","Curved","S-shaped","Angled","other"]\n'
	'3) "line_facing_direction": one of ["Facing towards","Facing away","Facing sideways","other"]\n'
	'4) "number_of_people_in_line": integer\n'
	'5) "line_purpose": string (short phrase)\n'
	'6) "start_person_description": string (brief)\n'
	'7) "end_person_description": string (brief)\n'
	'8) "counter_person_description": string (brief)\n'
	'9) "boundary_present": one of ["yes","no"]\n'
	'10) "boundary_types": one of ["none","cones","rope dividers","stanchions","other"]\n'
	'11) "end_of_line_visible": one of ["yes","no"]\n'
	'12) "end_of_line_location_if_visible": one of ["far left","center left","center","center right","far right","N/A"]\n'
	'13) "direction_to_turn_to_see_end_if_not_visible": one of ["left","right","back","N/A"]\n'
	'14) "start_of_line_visible": one of ["yes","no"]\n'
	'15) "start_of_line_location_if_visible": one of ["far left","center left","center","center right","far right","N/A"]\n'
	'16) "direction_to_turn_to_see_start_if_not_visible": one of ["left","right","back","N/A"]\n'
	'17) "line_completeness": one of ["full","partial"]\n\n'
	"JSON schema example (values are placeholders):\n"
	"{\n"
	' "number_of_turns": 0,\n'
	' "line_shape": "Straight",\n'
	' "line_facing_direction": "Facing towards",\n'
	' "number_of_people_in_line": 16,\n'
	' "line_purpose": "airport",\n'
	' "start_person_description": "man wearing hat and blue shirt",\n'
	' "end_person_description": "person wearing black t-shirt",\n'
	' "counter_person_description": "unknown",\n'
	' "boundary_present": "true",\n'
	' "boundary_types": "rope dividers",\n'
	' "end_of_line_visible": "yes",\n'
	' "end_of_line_location_if_visible": "far left",\n'
	' "direction_to_turn_to_see_end_if_not_visible": "N/A",\n'
	' "start_of_line_visible": "no",\n'
	' "start_of_line_location_if_visible": "N/A",\n'
	' "direction_to_turn_to_see_start_if_not_visible": "right",\n'
	' "line_completeness": "partial"\n'
	"}"
	)

	def generate_reranking(image_paths, res_file_name, temperature=0.2):
	"""
	New behavior: given a list of single-image paths, ask the 17 GUI questions
	for each image and write a JSON answer per image to `res_file_name`.

	Args:
	image_paths (List[str]): list of absolute or relative image paths.
	res_file_name (str): output text file; we append one record per image:
	<basename>\n<json>\n\n
	temperature (float): sampling temperature.
	"""
	prompt = _build_annotation_prompt() # from gui spec :contentReference[oaicite:3]{index=3}

	with open(res_file_name, "a", encoding="utf-8") as f:
	for img_path in tqdm(image_paths):
	basename=os.path.basename(img_path)

	img_b64 = encode_image(img_path)
	print(img_path)
	response = openai.ChatCompletion.create(
	model="gpt-4o",
	messages=[
	{
	"role": "system",
	"content": [
	{
	"type": "text",
	"text": (
	"You analyze a SINGLE image and return ONLY valid JSON "
	"with the specified keys and enumerated options. Do not include any extra text."
	),
	}
	],
	},
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/png;base64,{img_b64}"},
	},
	],
	},
	],
	max_tokens=1000,
	temperature=temperature,
	)

	content = response.choices[0].message.content.strip()
	# Optional: validate JSON quickly; if it fails, still write raw for debugging.
	try:
	parsed = json.loads(content)
	content = json.dumps(parsed, ensure_ascii=False)
	except Exception:
	pass # leave as-is for troubleshooting

	f.write(os.path.basename(img_path) + "\n" + content + "\n\n")
	sleep(0.5)

	root = "/vast/ds5725/linefinder/LineFinder/Images"

	# Subfolders
	subfolders = ["QueuesInAirports","QueuesInSupermarkets", "QueuesInBanks", "ImagesOnline"]

	# Collect all absolute file paths
	all_files = []
	for sub in subfolders:
	folder_path = os.path.join(root, sub)
	for dirpath, _, filenames in os.walk(folder_path):
	for f in filenames:
	all_files.append(os.path.abspath(os.path.join(dirpath, f)))
	all_files.sort()
	# print(all_files[0])
	# test=[all_files[0],all_files[1]]
	generate_reranking(all_files[63:], "gpt_line_test.jsonl")