| import os |
| from PIL import Image |
| from google import genai |
| from google.genai import types |
| from pydantic import BaseModel |
| import json |
| from tqdm import tqdm |
| |
| |
| |
| |
| class ObstacleAnswer(BaseModel): |
| image_name: str |
| q1: str |
| |
|
|
|
|
| |
| |
| |
| QUESTIONS = """ |
| Answer the following question about obstacles in the image: |
| |
| 1. Identify the obstacle on the sidewalk or walkable path ahead. |
| """ |
| |
| |
| |
| |
| |
|
|
| SYSTEM_MESSAGE = ( |
| "I am fully blind. You are a mobility assistant who analyzes the scene " |
| "and describes obstacles for safe navigation. Be concise and accurate." |
| ) |
|
|
|
|
| |
| |
| |
| GEMINI_API_KEY = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY" |
| client = genai.Client(api_key=GEMINI_API_KEY) |
| MODEL_ID = "gemini-3-pro-preview" |
|
|
|
|
| |
| |
| |
| def analyze_image_with_gemini(img_path: str, structured: bool = False): |
| |
| image = Image.open(img_path) |
| image.thumbnail([512, 512]) |
|
|
| |
| contents = [ |
| SYSTEM_MESSAGE, |
| image, |
| QUESTIONS, |
| ] |
|
|
| |
| if structured: |
| response = client.models.generate_content( |
| model=MODEL_ID, |
| contents=contents, |
| config=types.GenerateContentConfig( |
| response_mime_type="application/json", |
| response_schema=ObstacleAnswer, |
| ), |
| ) |
| else: |
| response = client.models.generate_content( |
| model=MODEL_ID, |
| contents=contents |
| ) |
|
|
| return response |
|
|
|
|
| |
| |
| |
| def process_folder(image_dir, output_txt, structured=False): |
| with open(output_txt, "a", encoding="utf-8") as f_out: |
| |
| for fname in tqdm(sorted(os.listdir(image_dir))): |
| |
| |
| |
| if not fname.lower().endswith((".png", ".jpg", ".jpeg", ".heic")): |
| continue |
| |
| |
|
|
| img_path = os.path.join(image_dir, fname) |
| print(f"Processing: {img_path}") |
|
|
| try: |
| response = analyze_image_with_gemini(img_path, structured=structured) |
|
|
| if structured: |
| |
| try: |
| data = json.loads(response.text) |
| data["image_name"] = fname |
| f_out.write(json.dumps(data) + "\n") |
| except Exception as parse_err: |
| print(f"JSON parse error for {fname}: {parse_err}") |
| f_out.write(json.dumps({ |
| "image_name": fname, |
| "error": "JSON parse error", |
| "raw_response": response.text |
| }) + "\n") |
| else: |
| |
| f_out.write(f"IMAGE: {img_path}\n") |
| f_out.write(response.text.strip() + "\n\n") |
|
|
| f_out.flush() |
|
|
| except Exception as e: |
| print(f"Error processing {img_path}: {e}") |
| f_out.write(f"IMAGE: {img_path}\nERROR: {e}\n\n") |
| |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| process_folder( |
| image_dir="/scratch/ds5725/OOPS/images", |
| output_txt="gemini_results_fix_q1.jsonl", |
| structured=True |
| ) |
|
|