File size: 5,107 Bytes

75f0bc0

import os
from PIL import Image
from google import genai
from google.genai import types
from pydantic import BaseModel
import json
from tqdm import tqdm
# ----------------------------------------------------------------------
# Optional: A structured schema for obstacle analysis
# If you don't want JSON output, remove the config section below.
# ----------------------------------------------------------------------
class ObstacleAnswer(BaseModel):
    image_name: str
    q1: str
    # q2: str


# ----------------------------------------------------------------------
# Prompt used for Gemini 3 Pro (same 8 questions you used before)
# ----------------------------------------------------------------------
QUESTIONS = """
Answer the following question about obstacles in the image:

1. Identify the obstacle on the sidewalk or walkable path ahead.
"""
# 2. Identify the single object most likely to be hit by a pedestrian moving straight ahead, and assign it a formal Out-of-Place Score (0–100) based only on its position.
#     Scoring scale:
#     0 = perfectly expected position (default/home location)
#     50 = somewhat out of place from where it is typically expected
#     100 = completely out of place and highly surprising

SYSTEM_MESSAGE = (
    "I am fully blind. You are a mobility assistant who analyzes the scene "
    "and describes obstacles for safe navigation. Be concise and accurate."
)


# ----------------------------------------------------------------------
# Gemini 3 Pro client setup
# ----------------------------------------------------------------------
GEMINI_API_KEY = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY"
client = genai.Client(api_key=GEMINI_API_KEY)
MODEL_ID = "gemini-3-pro-preview"


# ----------------------------------------------------------------------
# Function to analyze one image
# ----------------------------------------------------------------------
def analyze_image_with_gemini(img_path: str, structured: bool = False):
    # Load and shrink image (Gemini requirement)
    image = Image.open(img_path)
    image.thumbnail([512, 512])

    # Prepare message contents
    contents = [
        SYSTEM_MESSAGE,
        image,
        QUESTIONS,
    ]

    # If structured JSON output is requested
    if structured:
        response = client.models.generate_content(
            model=MODEL_ID,
            contents=contents,
            config=types.GenerateContentConfig(
                response_mime_type="application/json",
                response_schema=ObstacleAnswer,
            ),
        )
    else:
        response = client.models.generate_content(
                model=MODEL_ID,
                contents=contents
        )

    return response


# ----------------------------------------------------------------------
# Example: process a folder of images
# ----------------------------------------------------------------------
def process_folder(image_dir, output_txt, structured=False):
    with open(output_txt, "a", encoding="utf-8") as f_out:
        # first_flag=True
        for fname in tqdm(sorted(os.listdir(image_dir))):
            # if first_flag:
            #     first_flag=False
            #     continue
            if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".heic")):
                continue
            # if fname not in ["WheelieBin_Set2_Pos5_OOPS0.png"]:
            #     continue

            img_path = os.path.join(image_dir, fname)
            print(f"Processing: {img_path}")

            try:
                response = analyze_image_with_gemini(img_path, structured=structured)

                if structured:
                    # Parse JSON → update image_name → write JSONL
                    try:
                        data = json.loads(response.text)
                        data["image_name"] = fname  # <-- overwrite with actual filename
                        f_out.write(json.dumps(data) + "\n")
                    except Exception as parse_err:
                        print(f"JSON parse error for {fname}: {parse_err}")
                        f_out.write(json.dumps({
                            "image_name": fname,
                            "error": "JSON parse error",
                            "raw_response": response.text
                        }) + "\n")
                else:
                    # Raw text response
                    f_out.write(f"IMAGE: {img_path}\n")
                    f_out.write(response.text.strip() + "\n\n")

                f_out.flush()

            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                f_out.write(f"IMAGE: {img_path}\nERROR: {e}\n\n")
            # exit()


# ----------------------------------------------------------------------
# Example usage
# ----------------------------------------------------------------------
if __name__ == "__main__":
    process_folder(
        image_dir="/scratch/ds5725/OOPS/images",
        output_txt="gemini_results_fix_q1.jsonl",
        structured=True  # Set True if you want JSON following ObstacleAnswer schema
    )