import json import os from time import sleep from tqdm import tqdm from PIL import Image from google import genai from google.genai import types from pydantic import BaseModel # ========================= # Gemini setup # ========================= GEMINI_API_KEY = "AIzaSyAfnBWMguUci9GyzW-gBxrxCfOmMExiDnA" client = genai.Client(api_key=GEMINI_API_KEY) MODEL_ID = "gemini-3-flash-preview" # or newer if available # ========================= # JSON schema (STRICT) # ========================= class QueueAnswer(BaseModel): number_of_people: int line_direction: str end_visible: str end_location: str end_camera_direction: str end_person_description: str start_visible: str start_location: str start_camera_direction: str start_person_description: str # ========================= # Prompt (same as GPT version) # ========================= def build_prompt(): return ( "You are an expert at analyzing a single image of a line of people.\n" "Return STRICT JSON only.\n\n" "Fields:\n" 'number_of_people: integer\n' 'line_direction: ["towards", "away", "sideways-left", "sideways-right"]\n' 'end_visible: ["yes","no"]\n' 'end_location: ["far left","center left","center","center right","far right","N/A"]\n' 'end_camera_direction: ["left","right","back","N/A"]\n' 'end_person_description: string\n' 'start_visible: ["yes","no"]\n' 'start_location: ["far left","center left","center","center right","far right","N/A"]\n' 'start_camera_direction: ["left","right","back","N/A"]\n' 'start_person_description: string\n' ) # ========================= # Single image inference # ========================= def analyze_image(img_path): image = Image.open(img_path) image.thumbnail([512, 512]) # Gemini requirement response = client.models.generate_content( model=MODEL_ID, contents=[ "Return ONLY JSON.", build_prompt(), image ], config=types.GenerateContentConfig( response_mime_type="application/json", response_schema=QueueAnswer, temperature=0.2, ), ) return response.text # ========================= # Batch processing # ========================= def generate_reranking(image_paths, output_file): with open(output_file, "a", encoding="utf-8") as f: for img_path in tqdm(image_paths): basename = os.path.basename(img_path) try: result = analyze_image(img_path) try: parsed = json.loads(result) except: parsed = {"error": "invalid_json", "raw": result} f.write(basename + "\n") f.write(json.dumps(parsed) + "\n\n") f.flush() except Exception as e: print(f"Error: {img_path} -> {e}") f.write(basename + "\n") f.write(json.dumps({"error": str(e)}) + "\n\n") sleep(0.2) # ========================= # Load images (same as before) # ========================= root = "/scratch/ds5725/linefinder/LineFinder/Images" subfolders = ["QueuesOutdoors","QueuesInSupermarketNew","QueuesInThemeParks"] all_files = [] for sub in subfolders: folder_path = os.path.join(root, sub) for dirpath, _, filenames in os.walk(folder_path): for f in filenames: all_files.append(os.path.abspath(os.path.join(dirpath, f))) all_files.sort() generate_reranking(all_files, "gemini_line_luna_olivia.jsonl")