File size: 1,704 Bytes
75f0bc0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# gemini_nearest_obstacle_name.py
import os
import argparse
from PIL import Image
from google import genai
from google.genai import types

SYSTEM_MESSAGE = (
    "You are a mobility assistant who analyzes the scene for safe navigation. "
    "Be concise and accurate."
)

QUESTION = (
    "Identify the nearest obstacle on the sidewalk or walkable path ahead. "
    "Output ONLY the object name. "
    "No punctuation, no explanation, no full sentences. "
)

# Your original script used: MODEL_ID = "gemini-3-pro-preview"
DEFAULT_MODEL = "gemini-3-pro-preview"

def ask_gemini_object_name(image_path: str, model_id: str):
    api_key = "AIzaSyCjz1zbRQ_57ovEBPN2rlbfPYm2qVOEiuY"
    if not api_key:
        raise RuntimeError("Missing GEMINI_API_KEY env var. Do: export GEMINI_API_KEY='...'\n")

    client = genai.Client(api_key=api_key)

    image = Image.open(image_path).convert("RGB")
    image.thumbnail((768, 768))

    contents = [
        SYSTEM_MESSAGE,
        image,
        QUESTION,
    ]

    # Use low temperature for stable short labels
    resp = client.models.generate_content(
        model=model_id,
        contents=contents
    )
    # Clean up: keep first line, strip quotes/punctuation
    text = (resp.text or "")
    if not resp.text:
        print("Warning: Gemini response is empty.")
        return ""
    return text

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--image", required=True, help="Path to input image")
    parser.add_argument("--model", default=DEFAULT_MODEL, help="Gemini model id")
    args = parser.parse_args()

    name = ask_gemini_object_name(args.image, args.model)
    print(name)

if __name__ == "__main__":
    main()