Pest_Outbreak_Prediction / image_summarizer.py
krushimitravit's picture
Update image_summarizer.py
3f4fb13 verified
import base64
import os
from openai import OpenAI
# Initialize NVIDIA Client
client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=os.getenv('GEMINI_API_KEY_1')
)
# Model configurations
PRIMARY_MODEL = "meta/llama-3.2-90b-vision-instruct"
FALLBACK_MODEL = "meta/llama-3.1-70b-instruct" # Text-only fallback model
IMAGE_PATH = "image.png"
def encode_image(image_path):
"""Encode image to base64 string."""
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
def summarize_with_vision_model(base64_image):
"""
Attempt to summarize image using vision model.
Args:
base64_image: Base64 encoded image string
Returns:
str: Summary text or None if failed
"""
try:
print(f"πŸ” Attempting with primary vision model: {PRIMARY_MODEL}...")
completion = client.chat.completions.create(
model=PRIMARY_MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Please summarize what you see in this image."},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{base64_image}"
}
}
]
}
],
max_tokens=500,
temperature=0.2,
stream=True
)
print("\nβœ… Image Summary (Vision Model):\n" + "-" * 50)
summary = ""
for chunk in completion:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
summary += content
print("\n" + "-" * 50)
return summary
except Exception as e:
print(f"\n⚠️ Vision model failed: {e}")
return None
def summarize_with_text_fallback():
"""
Fallback method using text-only LLM.
Provides a generic response when vision model fails.
Returns:
str: Fallback response
"""
try:
print(f"\nπŸ”„ Falling back to text model: {FALLBACK_MODEL}...")
# Create a prompt that acknowledges the limitation
prompt = """I attempted to analyze an image but the vision model is unavailable.
Please provide a helpful response about what types of information can typically be extracted from images,
and suggest alternative approaches for image analysis."""
completion = client.chat.completions.create(
model=FALLBACK_MODEL,
messages=[
{
"role": "user",
"content": prompt
}
],
max_tokens=500,
temperature=0.2,
stream=True
)
print("\nπŸ’‘ Fallback Response (Text Model):\n" + "-" * 50)
response = ""
for chunk in completion:
content = chunk.choices[0].delta.content
if content is not None:
print(content, end="", flush=True)
response += content
print("\n" + "-" * 50)
return response
except Exception as e:
print(f"\n❌ Fallback model also failed: {e}")
return None
def summarize_image():
"""
Main function to summarize an image with fallback support.
Attempts to use vision model first, falls back to text model if needed.
"""
# Check if image exists
if not os.path.exists(IMAGE_PATH):
print(f"❌ Error: {IMAGE_PATH} not found.")
print(f"πŸ“ Current directory: {os.getcwd()}")
print(f"πŸ“‹ Files in current directory: {os.listdir('.')}")
return
print(f"πŸ“Έ Processing {IMAGE_PATH}...")
print(f"πŸ“ File size: {os.path.getsize(IMAGE_PATH)} bytes\n")
# Encode the image
try:
base64_image = encode_image(IMAGE_PATH)
except Exception as e:
print(f"❌ Error encoding image: {e}")
return
# Try vision model first
result = summarize_with_vision_model(base64_image)
# If vision model failed, use fallback
if result is None:
print("\nπŸ”„ Primary model failed, attempting fallback...")
result = summarize_with_text_fallback()
# Final status
if result is None:
print("\n❌ All methods failed. Please check:")
print(" 1. API key validity")
print(" 2. Network connection")
print(" 3. NVIDIA API service status")
else:
print("\nβœ… Image processing completed successfully!")
if __name__ == "__main__":
summarize_image()