| # NVIDIA Vision Model Configuration | |
| # This file manages API keys and model fallback preferences | |
| # ============================================================================= | |
| # API CONFIGURATION | |
| # ============================================================================= | |
| # For production, use environment variables instead of hardcoding keys | |
| # Set these in your .env file or system environment | |
| NVIDIA_API_KEY=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 | |
| GEMINI_API_KEY=AIzaSyDkiYr-eSkqIXpZ1fHlik_YFsFtfQoFi0w | |
| # ============================================================================= | |
| # MODEL CONFIGURATION | |
| # ============================================================================= | |
| # Models are tried in order. If one fails, the next is attempted. | |
| # Format: model_name (comma-separated, in priority order) | |
| # NVIDIA Vision Models (in order of preference) | |
| # These models support image analysis with vision capabilities | |
| NVIDIA_VISION_MODELS=meta/llama-3.2-90b-vision-instruct,microsoft/phi-3-vision-128k-instruct,meta/llama-3.2-11b-vision-instruct,nvidia/neva-22b | |
| # NVIDIA Text Models (for text-only analysis as additional fallback) | |
| # These are faster and work for non-image tasks | |
| NVIDIA_TEXT_MODELS=meta/llama-3.1-405b-instruct,meta/llama-3.1-70b-instruct,meta/llama-3.1-8b-instruct,mistralai/mixtral-8x7b-instruct-v0.1,mistralai/mistral-7b-instruct-v0.3 | |
| # Combined NVIDIA Models (vision + text for maximum coverage) | |
| NVIDIA_MODELS=meta/llama-3.2-90b-vision-instruct,microsoft/phi-3-vision-128k-instruct,meta/llama-3.2-11b-vision-instruct,nvidia/neva-22b,meta/llama-3.1-405b-instruct,meta/llama-3.1-70b-instruct,meta/llama-3.1-8b-instruct,mistralai/mixtral-8x7b-instruct-v0.1 | |
| # Gemini Models (for text analysis fallback) | |
| GEMINI_MODELS=gemini-2.0-flash-exp,gemini-2.0-flash-thinking-exp-1219,gemini-exp-1206,gemini-2.0-flash,gemini-1.5-flash,gemini-1.5-flash-8b,gemini-1.5-pro | |
| # ============================================================================= | |
| # REQUEST CONFIGURATION | |
| # ============================================================================= | |
| # Maximum tokens for model responses | |
| MAX_TOKENS=500 | |
| # Temperature for model creativity (0.0 = deterministic, 1.0 = creative) | |
| TEMPERATURE=0.2 | |
| # Request timeout in seconds | |
| REQUEST_TIMEOUT=30 | |
| # Maximum retry attempts per model | |
| MAX_RETRIES=2 | |
| # ============================================================================= | |
| # FEATURE FLAGS | |
| # ============================================================================= | |
| # Enable/disable streaming responses | |
| ENABLE_STREAMING=true | |
| # Enable verbose logging | |
| VERBOSE_LOGGING=true | |
| # Enable fallback to Gemini if all NVIDIA models fail | |
| ENABLE_GEMINI_FALLBACK=true | |