oops / gemini_fix.py
deansmile123's picture
Upload folder using huggingface_hub
75f0bc0 verified
Raw
History Blame Contribute Delete
5.11 kB
import os
from PIL import Image
from google import genai
from google.genai import types
from pydantic import BaseModel
import json
from tqdm import tqdm
# ----------------------------------------------------------------------
# Optional: A structured schema for obstacle analysis
# If you don't want JSON output, remove the config section below.
# ----------------------------------------------------------------------
class ObstacleAnswer(BaseModel):
image_name: str
q1: str
# q2: str
# ----------------------------------------------------------------------
# Prompt used for Gemini 3 Pro (same 8 questions you used before)
# ----------------------------------------------------------------------
QUESTIONS = """
Answer the following question about obstacles in the image:
1. Identify the obstacle on the sidewalk or walkable path ahead.
"""
# 2. Identify the single object most likely to be hit by a pedestrian moving straight ahead, and assign it a formal Out-of-Place Score (0–100) based only on its position.
# Scoring scale:
# 0 = perfectly expected position (default/home location)
# 50 = somewhat out of place from where it is typically expected
# 100 = completely out of place and highly surprising
SYSTEM_MESSAGE = (
"I am fully blind. You are a mobility assistant who analyzes the scene "
"and describes obstacles for safe navigation. Be concise and accurate."
)
# ----------------------------------------------------------------------
# Gemini 3 Pro client setup
# ----------------------------------------------------------------------
GEMINI_API_KEY = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY"
client = genai.Client(api_key=GEMINI_API_KEY)
MODEL_ID = "gemini-3-pro-preview"
# ----------------------------------------------------------------------
# Function to analyze one image
# ----------------------------------------------------------------------
def analyze_image_with_gemini(img_path: str, structured: bool = False):
# Load and shrink image (Gemini requirement)
image = Image.open(img_path)
image.thumbnail([512, 512])
# Prepare message contents
contents = [
SYSTEM_MESSAGE,
image,
QUESTIONS,
]
# If structured JSON output is requested
if structured:
response = client.models.generate_content(
model=MODEL_ID,
contents=contents,
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=ObstacleAnswer,
),
)
else:
response = client.models.generate_content(
model=MODEL_ID,
contents=contents
)
return response
# ----------------------------------------------------------------------
# Example: process a folder of images
# ----------------------------------------------------------------------
def process_folder(image_dir, output_txt, structured=False):
with open(output_txt, "a", encoding="utf-8") as f_out:
# first_flag=True
for fname in tqdm(sorted(os.listdir(image_dir))):
# if first_flag:
# first_flag=False
# continue
if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".heic")):
continue
# if fname not in ["WheelieBin_Set2_Pos5_OOPS0.png"]:
# continue
img_path = os.path.join(image_dir, fname)
print(f"Processing: {img_path}")
try:
response = analyze_image_with_gemini(img_path, structured=structured)
if structured:
# Parse JSON → update image_name → write JSONL
try:
data = json.loads(response.text)
data["image_name"] = fname # <-- overwrite with actual filename
f_out.write(json.dumps(data) + "\n")
except Exception as parse_err:
print(f"JSON parse error for {fname}: {parse_err}")
f_out.write(json.dumps({
"image_name": fname,
"error": "JSON parse error",
"raw_response": response.text
}) + "\n")
else:
# Raw text response
f_out.write(f"IMAGE: {img_path}\n")
f_out.write(response.text.strip() + "\n\n")
f_out.flush()
except Exception as e:
print(f"Error processing {img_path}: {e}")
f_out.write(f"IMAGE: {img_path}\nERROR: {e}\n\n")
# exit()
# ----------------------------------------------------------------------
# Example usage
# ----------------------------------------------------------------------
if __name__ == "__main__":
process_folder(
image_dir="/scratch/ds5725/OOPS/images",
output_txt="gemini_results_fix_q1.jsonl",
structured=True # Set True if you want JSON following ObstacleAnswer schema
)