Spaces:
Sleeping
Sleeping
File size: 4,830 Bytes
3f4fb13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 | import base64
import os
from openai import OpenAI
# Initialize NVIDIA Client
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=os.getenv('GEMINI_API_KEY_1')
)
# Model configurations
PRIMARY_MODEL = "meta/llama-3.2-90b-vision-instruct"
FALLBACK_MODEL = "meta/llama-3.1-70b-instruct" # Text-only fallback model
IMAGE_PATH = "image.png"
def encode_image(image_path):
"""Encode image to base64 string."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def summarize_with_vision_model(base64_image):
"""
Attempt to summarize image using vision model.
Args:
base64_image: Base64 encoded image string
Returns:
str: Summary text or None if failed
"""
try:
print(f"π Attempting with primary vision model: {PRIMARY_MODEL}...")
completion = client.chat.completions.create(
model=PRIMARY_MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Please summarize what you see in this image."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
}
]
}
],
max_tokens=500,
temperature=0.2,
stream=True
)
print("\nβ
Image Summary (Vision Model):\n" + "-" * 50)
summary = ""
for chunk in completion:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
summary += content
print("\n" + "-" * 50)
return summary
except Exception as e:
print(f"\nβ οΈ Vision model failed: {e}")
return None
def summarize_with_text_fallback():
"""
Fallback method using text-only LLM.
Provides a generic response when vision model fails.
Returns:
str: Fallback response
"""
try:
print(f"\nπ Falling back to text model: {FALLBACK_MODEL}...")
# Create a prompt that acknowledges the limitation
prompt = """I attempted to analyze an image but the vision model is unavailable.
Please provide a helpful response about what types of information can typically be extracted from images,
and suggest alternative approaches for image analysis."""
completion = client.chat.completions.create(
model=FALLBACK_MODEL,
messages=[
{
"role": "user",
"content": prompt
}
],
max_tokens=500,
temperature=0.2,
stream=True
)
print("\nπ‘ Fallback Response (Text Model):\n" + "-" * 50)
response = ""
for chunk in completion:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
response += content
print("\n" + "-" * 50)
return response
except Exception as e:
print(f"\nβ Fallback model also failed: {e}")
return None
def summarize_image():
"""
Main function to summarize an image with fallback support.
Attempts to use vision model first, falls back to text model if needed.
"""
# Check if image exists
if not os.path.exists(IMAGE_PATH):
print(f"β Error: {IMAGE_PATH} not found.")
print(f"π Current directory: {os.getcwd()}")
print(f"π Files in current directory: {os.listdir('.')}")
return
print(f"πΈ Processing {IMAGE_PATH}...")
print(f"π File size: {os.path.getsize(IMAGE_PATH)} bytes\n")
# Encode the image
try:
base64_image = encode_image(IMAGE_PATH)
except Exception as e:
print(f"β Error encoding image: {e}")
return
# Try vision model first
result = summarize_with_vision_model(base64_image)
# If vision model failed, use fallback
if result is None:
print("\nπ Primary model failed, attempting fallback...")
result = summarize_with_text_fallback()
# Final status
if result is None:
print("\nβ All methods failed. Please check:")
print(" 1. API key validity")
print(" 2. Network connection")
print(" 3. NVIDIA API service status")
else:
print("\nβ
Image processing completed successfully!")
if __name__ == "__main__":
summarize_image()
|