# NVIDIA Vision Model Configuration # This file manages API keys and model fallback preferences # ============================================================================= # API CONFIGURATION # ============================================================================= # For production, use environment variables instead of hardcoding keys # Set these in your .env file or system environment NVIDIA_API_KEY=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 GEMINI_API_KEY=AIzaSyDkiYr-eSkqIXpZ1fHlik_YFsFtfQoFi0w # ============================================================================= # MODEL CONFIGURATION # ============================================================================= # Models are tried in order. If one fails, the next is attempted. # Format: model_name (comma-separated, in priority order) # NVIDIA Vision Models (in order of preference) # These models support image analysis with vision capabilities NVIDIA_VISION_MODELS=meta/llama-3.2-90b-vision-instruct,microsoft/phi-3-vision-128k-instruct,meta/llama-3.2-11b-vision-instruct,nvidia/neva-22b # NVIDIA Text Models (for text-only analysis as additional fallback) # These are faster and work for non-image tasks NVIDIA_TEXT_MODELS=meta/llama-3.1-405b-instruct,meta/llama-3.1-70b-instruct,meta/llama-3.1-8b-instruct,mistralai/mixtral-8x7b-instruct-v0.1,mistralai/mistral-7b-instruct-v0.3 # Combined NVIDIA Models (vision + text for maximum coverage) NVIDIA_MODELS=meta/llama-3.2-90b-vision-instruct,microsoft/phi-3-vision-128k-instruct,meta/llama-3.2-11b-vision-instruct,nvidia/neva-22b,meta/llama-3.1-405b-instruct,meta/llama-3.1-70b-instruct,meta/llama-3.1-8b-instruct,mistralai/mixtral-8x7b-instruct-v0.1 # Gemini Models (for text analysis fallback) GEMINI_MODELS=gemini-2.0-flash-exp,gemini-2.0-flash-thinking-exp-1219,gemini-exp-1206,gemini-2.0-flash,gemini-1.5-flash,gemini-1.5-flash-8b,gemini-1.5-pro # ============================================================================= # REQUEST CONFIGURATION # ============================================================================= # Maximum tokens for model responses MAX_TOKENS=500 # Temperature for model creativity (0.0 = deterministic, 1.0 = creative) TEMPERATURE=0.2 # Request timeout in seconds REQUEST_TIMEOUT=30 # Maximum retry attempts per model MAX_RETRIES=2 # ============================================================================= # FEATURE FLAGS # ============================================================================= # Enable/disable streaming responses ENABLE_STREAMING=true # Enable verbose logging VERBOSE_LOGGING=true # Enable fallback to Gemini if all NVIDIA models fail ENABLE_GEMINI_FALLBACK=true