| import base64
|
| import os
|
| import sys
|
| from openai import OpenAI
|
| import google.generativeai as genai
|
| from PIL import Image
|
|
|
|
|
| NVIDIA_MODELS = [
|
| "meta/llama-3.2-90b-vision-instruct",
|
| "meta/llama-3.2-11b-vision-instruct",
|
| "microsoft/phi-3-vision-128k-instruct",
|
| "nvidia/neva-22b"
|
| ]
|
|
|
| GEMINI_MODELS = [
|
| "gemini-2.0-flash-exp",
|
| "gemini-2.5-flash",
|
| "gemini-2.5-flash-8b",
|
| "gemini-3.0-flash"
|
| ]
|
|
|
|
|
| NVIDIA_API_KEY = "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"
|
| GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "")
|
|
|
| IMAGE_PATH = "image.png"
|
|
|
| def encode_image(image_path):
|
| """Encode image to base64 string"""
|
| with open(image_path, "rb") as image_file:
|
| return base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
| def try_nvidia_model(model_name, base64_image, prompt):
|
| """Try to get summary using NVIDIA model"""
|
| try:
|
| print(f"π Trying NVIDIA model: {model_name}")
|
|
|
| client = OpenAI(
|
| base_url="https://integrate.api.nvidia.com/v1",
|
| api_key=NVIDIA_API_KEY
|
| )
|
|
|
| completion = client.chat.completions.create(
|
| model=model_name,
|
| messages=[
|
| {
|
| "role": "user",
|
| "content": [
|
| {"type": "text", "text": prompt},
|
| {
|
| "type": "image_url",
|
| "image_url": {
|
| "url": f"data:image/png;base64,{base64_image}"
|
| }
|
| }
|
| ]
|
| }
|
| ],
|
| max_tokens=500,
|
| temperature=0.2,
|
| stream=True
|
| )
|
|
|
| print(f"β
Success with NVIDIA model: {model_name}\n")
|
| print("Image Summary:\n" + "-"*50)
|
|
|
| full_response = ""
|
| for chunk in completion:
|
| content = chunk.choices[0].delta.content
|
| if content is not None:
|
| print(content, end="", flush=True)
|
| full_response += content
|
|
|
| print("\n" + "-"*50)
|
| return True, full_response
|
|
|
| except Exception as e:
|
| print(f"β NVIDIA model {model_name} failed: {str(e)}\n")
|
| return False, None
|
|
|
| def try_gemini_model(model_name, image_path, prompt):
|
| """Try to get summary using Gemini model"""
|
| try:
|
| print(f"π Trying Gemini model: {model_name}")
|
|
|
| if not GEMINI_API_KEY:
|
| print("β Gemini API key not set\n")
|
| return False, None
|
|
|
| genai.configure(api_key=GEMINI_API_KEY)
|
| model = genai.GenerativeModel(model_name)
|
|
|
|
|
| img = Image.open(image_path)
|
|
|
|
|
| response = model.generate_content([prompt, img], stream=True)
|
|
|
| print(f"β
Success with Gemini model: {model_name}\n")
|
| print("Image Summary:\n" + "-"*50)
|
|
|
| full_response = ""
|
| for chunk in response:
|
| if chunk.text:
|
| print(chunk.text, end="", flush=True)
|
| full_response += chunk.text
|
|
|
| print("\n" + "-"*50)
|
| return True, full_response
|
|
|
| except Exception as e:
|
| print(f"β Gemini model {model_name} failed: {str(e)}\n")
|
| return False, None
|
|
|
| def summarize_image_with_fallback():
|
| """Main function with fallback logic across NVIDIA and Gemini models"""
|
|
|
| if not os.path.exists(IMAGE_PATH):
|
| print(f"β Error: {IMAGE_PATH} not found.")
|
| return
|
|
|
| print(f"πΌοΈ Processing {IMAGE_PATH}...\n")
|
| print("="*50)
|
|
|
| prompt = "Please summarize what you see in this image."
|
|
|
|
|
| base64_image = encode_image(IMAGE_PATH)
|
|
|
|
|
| print("\nπ PHASE 1: Trying NVIDIA Models")
|
| print("="*50 + "\n")
|
|
|
| for model in NVIDIA_MODELS:
|
| success, response = try_nvidia_model(model, base64_image, prompt)
|
| if success:
|
| print(f"\nβ
Successfully completed with NVIDIA model: {model}")
|
| return response
|
|
|
|
|
| print("\nπ PHASE 2: Trying Gemini Models (Fallback)")
|
| print("="*50 + "\n")
|
|
|
| for model in GEMINI_MODELS:
|
| success, response = try_gemini_model(model, IMAGE_PATH, prompt)
|
| if success:
|
| print(f"\nβ
Successfully completed with Gemini model: {model}")
|
| return response
|
|
|
|
|
| print("\nβ All models failed. Please check:")
|
| print(" 1. API keys are valid")
|
| print(" 2. Image file is accessible and valid")
|
| print(" 3. Network connection is stable")
|
| print(" 4. API quotas are not exceeded")
|
| return None
|
|
|
| if __name__ == "__main__":
|
|
|
| if len(sys.argv) > 1:
|
| IMAGE_PATH = sys.argv[1]
|
|
|
|
|
| if not os.path.exists(IMAGE_PATH):
|
| print(f"\nβ Error: Image file '{IMAGE_PATH}' not found!")
|
| print("\nπ Usage:")
|
| print(f" python {os.path.basename(__file__)} [image_path]")
|
| print("\n Examples:")
|
| print(f" python {os.path.basename(__file__)} image.png")
|
| print(f" python {os.path.basename(__file__)} test_image.jpg")
|
| print(f" python {os.path.basename(__file__)} C:\\path\\to\\your\\image.png")
|
| sys.exit(1)
|
|
|
| result = summarize_image_with_fallback()
|
|
|
| if result:
|
| print("\n" + "="*50)
|
| print("β
Image summarization completed successfully!")
|
| else:
|
| print("\n" + "="*50)
|
| print("β Image summarization failed with all available models.")
|
|
|