Spaces:

PRC142004
/

Pest_Outbreak_Prediction

Sleeping

File size: 4,830 Bytes

3f4fb13

import base64
import os
from openai import OpenAI

# Initialize NVIDIA Client
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",
    api_key=os.getenv('GEMINI_API_KEY_1')
)

# Model configurations
PRIMARY_MODEL = "meta/llama-3.2-90b-vision-instruct"
FALLBACK_MODEL = "meta/llama-3.1-70b-instruct"  # Text-only fallback model
IMAGE_PATH = "image.png"


def encode_image(image_path):
    """Encode image to base64 string."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')


def summarize_with_vision_model(base64_image):
    """
    Attempt to summarize image using vision model.
    
    Args:
        base64_image: Base64 encoded image string
        
    Returns:
        str: Summary text or None if failed
    """
    try:
        print(f"🔍 Attempting with primary vision model: {PRIMARY_MODEL}...")
        
        completion = client.chat.completions.create(
            model=PRIMARY_MODEL,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": "Please summarize what you see in this image."},
                        {
                            "type": "image_url",
                            "image_url": {
                                "url": f"data:image/png;base64,{base64_image}"
                            }
                        }
                    ]
                }
            ],
            max_tokens=500,
            temperature=0.2,
            stream=True
        )

        print("\n✅ Image Summary (Vision Model):\n" + "-" * 50)
        summary = ""
        for chunk in completion:
            content = chunk.choices[0].delta.content
            if content is not None:
                print(content, end="", flush=True)
                summary += content
        print("\n" + "-" * 50)
        
        return summary

    except Exception as e:
        print(f"\n⚠️ Vision model failed: {e}")
        return None


def summarize_with_text_fallback():
    """
    Fallback method using text-only LLM.
    Provides a generic response when vision model fails.
    
    Returns:
        str: Fallback response
    """
    try:
        print(f"\n🔄 Falling back to text model: {FALLBACK_MODEL}...")
        
        # Create a prompt that acknowledges the limitation
        prompt = """I attempted to analyze an image but the vision model is unavailable. 
Please provide a helpful response about what types of information can typically be extracted from images, 
and suggest alternative approaches for image analysis."""
        
        completion = client.chat.completions.create(
            model=FALLBACK_MODEL,
            messages=[
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            max_tokens=500,
            temperature=0.2,
            stream=True
        )

        print("\n💡 Fallback Response (Text Model):\n" + "-" * 50)
        response = ""
        for chunk in completion:
            content = chunk.choices[0].delta.content
            if content is not None:
                print(content, end="", flush=True)
                response += content
        print("\n" + "-" * 50)
        
        return response

    except Exception as e:
        print(f"\n❌ Fallback model also failed: {e}")
        return None


def summarize_image():
    """
    Main function to summarize an image with fallback support.
    
    Attempts to use vision model first, falls back to text model if needed.
    """
    # Check if image exists
    if not os.path.exists(IMAGE_PATH):
        print(f"❌ Error: {IMAGE_PATH} not found.")
        print(f"📁 Current directory: {os.getcwd()}")
        print(f"📋 Files in current directory: {os.listdir('.')}")
        return

    print(f"📸 Processing {IMAGE_PATH}...")
    print(f"📏 File size: {os.path.getsize(IMAGE_PATH)} bytes\n")
    
    # Encode the image
    try:
        base64_image = encode_image(IMAGE_PATH)
    except Exception as e:
        print(f"❌ Error encoding image: {e}")
        return

    # Try vision model first
    result = summarize_with_vision_model(base64_image)
    
    # If vision model failed, use fallback
    if result is None:
        print("\n🔄 Primary model failed, attempting fallback...")
        result = summarize_with_text_fallback()
    
    # Final status
    if result is None:
        print("\n❌ All methods failed. Please check:")
        print("   1. API key validity")
        print("   2. Network connection")
        print("   3. NVIDIA API service status")
    else:
        print("\n✅ Image processing completed successfully!")


if __name__ == "__main__":
    summarize_image()