Spaces:
Sleeping
Sleeping
| import base64 | |
| import os | |
| from openai import OpenAI | |
| # Initialize NVIDIA Client | |
| client = OpenAI( | |
| base_url="https://integrate.api.nvidia.com/v1", | |
| api_key=os.getenv('GEMINI_API_KEY_1') | |
| ) | |
| # Model configurations | |
| PRIMARY_MODEL = "meta/llama-3.2-90b-vision-instruct" | |
| FALLBACK_MODEL = "meta/llama-3.1-70b-instruct" # Text-only fallback model | |
| IMAGE_PATH = "image.png" | |
| def encode_image(image_path): | |
| """Encode image to base64 string.""" | |
| with open(image_path, "rb") as image_file: | |
| return base64.b64encode(image_file.read()).decode('utf-8') | |
| def summarize_with_vision_model(base64_image): | |
| """ | |
| Attempt to summarize image using vision model. | |
| Args: | |
| base64_image: Base64 encoded image string | |
| Returns: | |
| str: Summary text or None if failed | |
| """ | |
| try: | |
| print(f"π Attempting with primary vision model: {PRIMARY_MODEL}...") | |
| completion = client.chat.completions.create( | |
| model=PRIMARY_MODEL, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": "Please summarize what you see in this image."}, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{base64_image}" | |
| } | |
| } | |
| ] | |
| } | |
| ], | |
| max_tokens=500, | |
| temperature=0.2, | |
| stream=True | |
| ) | |
| print("\nβ Image Summary (Vision Model):\n" + "-" * 50) | |
| summary = "" | |
| for chunk in completion: | |
| content = chunk.choices[0].delta.content | |
| if content is not None: | |
| print(content, end="", flush=True) | |
| summary += content | |
| print("\n" + "-" * 50) | |
| return summary | |
| except Exception as e: | |
| print(f"\nβ οΈ Vision model failed: {e}") | |
| return None | |
| def summarize_with_text_fallback(): | |
| """ | |
| Fallback method using text-only LLM. | |
| Provides a generic response when vision model fails. | |
| Returns: | |
| str: Fallback response | |
| """ | |
| try: | |
| print(f"\nπ Falling back to text model: {FALLBACK_MODEL}...") | |
| # Create a prompt that acknowledges the limitation | |
| prompt = """I attempted to analyze an image but the vision model is unavailable. | |
| Please provide a helpful response about what types of information can typically be extracted from images, | |
| and suggest alternative approaches for image analysis.""" | |
| completion = client.chat.completions.create( | |
| model=FALLBACK_MODEL, | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| max_tokens=500, | |
| temperature=0.2, | |
| stream=True | |
| ) | |
| print("\nπ‘ Fallback Response (Text Model):\n" + "-" * 50) | |
| response = "" | |
| for chunk in completion: | |
| content = chunk.choices[0].delta.content | |
| if content is not None: | |
| print(content, end="", flush=True) | |
| response += content | |
| print("\n" + "-" * 50) | |
| return response | |
| except Exception as e: | |
| print(f"\nβ Fallback model also failed: {e}") | |
| return None | |
| def summarize_image(): | |
| """ | |
| Main function to summarize an image with fallback support. | |
| Attempts to use vision model first, falls back to text model if needed. | |
| """ | |
| # Check if image exists | |
| if not os.path.exists(IMAGE_PATH): | |
| print(f"β Error: {IMAGE_PATH} not found.") | |
| print(f"π Current directory: {os.getcwd()}") | |
| print(f"π Files in current directory: {os.listdir('.')}") | |
| return | |
| print(f"πΈ Processing {IMAGE_PATH}...") | |
| print(f"π File size: {os.path.getsize(IMAGE_PATH)} bytes\n") | |
| # Encode the image | |
| try: | |
| base64_image = encode_image(IMAGE_PATH) | |
| except Exception as e: | |
| print(f"β Error encoding image: {e}") | |
| return | |
| # Try vision model first | |
| result = summarize_with_vision_model(base64_image) | |
| # If vision model failed, use fallback | |
| if result is None: | |
| print("\nπ Primary model failed, attempting fallback...") | |
| result = summarize_with_text_fallback() | |
| # Final status | |
| if result is None: | |
| print("\nβ All methods failed. Please check:") | |
| print(" 1. API key validity") | |
| print(" 2. Network connection") | |
| print(" 3. NVIDIA API service status") | |
| else: | |
| print("\nβ Image processing completed successfully!") | |
| if __name__ == "__main__": | |
| summarize_image() | |