File size: 1,704 Bytes
75f0bc0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | # gemini_nearest_obstacle_name.py
import os
import argparse
from PIL import Image
from google import genai
from google.genai import types
SYSTEM_MESSAGE = (
"You are a mobility assistant who analyzes the scene for safe navigation. "
"Be concise and accurate."
)
QUESTION = (
"Identify the nearest obstacle on the sidewalk or walkable path ahead. "
"Output ONLY the object name. "
"No punctuation, no explanation, no full sentences. "
)
# Your original script used: MODEL_ID = "gemini-3-pro-preview"
DEFAULT_MODEL = "gemini-3-pro-preview"
def ask_gemini_object_name(image_path: str, model_id: str):
api_key = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY"
if not api_key:
raise RuntimeError("Missing GEMINI_API_KEY env var. Do: export GEMINI_API_KEY='...'\n")
client = genai.Client(api_key=api_key)
image = Image.open(image_path).convert("RGB")
image.thumbnail((768, 768))
contents = [
SYSTEM_MESSAGE,
image,
QUESTION,
]
# Use low temperature for stable short labels
resp = client.models.generate_content(
model=model_id,
contents=contents
)
# Clean up: keep first line, strip quotes/punctuation
text = (resp.text or "")
if not resp.text:
print("Warning: Gemini response is empty.")
return ""
return text
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--image", required=True, help="Path to input image")
parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model id")
args = parser.parse_args()
name = ask_gemini_object_name(args.image, args.model)
print(name)
if __name__ == "__main__":
main()
|