File size: 6,561 Bytes
aca8ab4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
#!/usr/bin/env python3
"""
Diagnostic script to validate Azure OpenAI embeddings deployment.
This script helps diagnose 404 errors related to embedding deployments.
Run this before deploying to HuggingFace Spaces to ensure configuration is correct.
Usage:
python scripts/validate_azure_embeddings.py
"""
import os
import sys
from pathlib import Path
from openai import AzureOpenAI
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
def validate_azure_config():
"""Validate Azure OpenAI configuration."""
print("=" * 80)
print("Azure OpenAI Embeddings Deployment Validator")
print("=" * 80)
print()
# Check required environment variables
required_vars = {
"AZURE_OPENAI_ENDPOINT": os.getenv("AZURE_OPENAI_ENDPOINT"),
"AZURE_OPENAI_API_KEY": os.getenv("AZURE_OPENAI_API_KEY"),
"AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME": os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"),
"AZURE_OPENAI_API_VERSION": os.getenv("AZURE_OPENAI_API_VERSION", "2024-02-01"),
}
print("1. Checking environment variables...")
print("-" * 80)
missing_vars = []
for var_name, var_value in required_vars.items():
if var_value:
# Mask sensitive values
if "KEY" in var_name:
display_value = f"{var_value[:10]}...{var_value[-4:]}" if len(var_value) > 14 else "***"
else:
display_value = var_value
print(f"β
{var_name}: {display_value}")
else:
print(f"β {var_name}: NOT SET")
missing_vars.append(var_name)
print()
if missing_vars:
print(f"ERROR: Missing required environment variables: {', '.join(missing_vars)}")
print()
print("Fix: Add these variables to your .env file or HuggingFace Spaces secrets")
return False
print("2. Testing embeddings deployment...")
print("-" * 80)
try:
# Initialize Azure OpenAI client
client = AzureOpenAI(
api_key=required_vars["AZURE_OPENAI_API_KEY"],
api_version=required_vars["AZURE_OPENAI_API_VERSION"],
azure_endpoint=required_vars["AZURE_OPENAI_ENDPOINT"]
)
deployment_name = required_vars["AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME"]
print(f"Testing deployment: {deployment_name}")
print()
# Try to generate a test embedding
test_text = "This is a test embedding."
response = client.embeddings.create(
input=test_text,
model=deployment_name
)
embedding = response.data[0].embedding
embedding_dim = len(embedding)
print(f"β
SUCCESS: Embedding generated successfully!")
print(f" Embedding dimension: {embedding_dim}")
print(f" Model used: {deployment_name}")
print()
print("=" * 80)
print("β
All checks passed! Your Azure OpenAI embeddings configuration is correct.")
print("=" * 80)
return True
except Exception as e:
error_msg = str(e)
print(f"β ERROR: Failed to generate embedding")
print()
print(f"Error message: {error_msg}")
print()
# Provide helpful diagnostics
if "404" in error_msg or "Resource not found" in error_msg:
print("DIAGNOSIS: Deployment not found (404 error)")
print()
print("Possible causes:")
print(" 1. Deployment name is incorrect")
print(" 2. Deployment doesn't exist in your Azure OpenAI resource")
print(" 3. Deployment is in a different Azure region/resource")
print()
print("How to fix:")
print(" Option A: Create the deployment in Azure Portal")
print(" 1. Go to https://portal.azure.com")
print(" 2. Navigate to your Azure OpenAI resource")
print(" 3. Go to 'Model deployments' β 'Manage Deployments'")
print(" 4. Create a new deployment:")
print(f" - Model: text-embedding-3-small (or text-embedding-ada-002)")
print(f" - Deployment name: {deployment_name}")
print()
print(" Option B: Use existing deployment")
print(" 1. Check what embedding deployments you already have in Azure Portal")
print(" 2. Update AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME to match existing deployment")
print(" 3. Common deployment names:")
print(" - text-embedding-3-small")
print(" - text-embedding-ada-002")
print(" - embedding")
print()
elif "401" in error_msg or "Unauthorized" in error_msg:
print("DIAGNOSIS: Authentication failed (401 error)")
print()
print("How to fix:")
print(" 1. Verify AZURE_OPENAI_API_KEY is correct")
print(" 2. Check that the key hasn't expired")
print(" 3. Ensure the key matches the Azure OpenAI resource")
print()
elif "InvalidRequestError" in error_msg:
print("DIAGNOSIS: Invalid request to Azure OpenAI API")
print()
print("How to fix:")
print(" 1. Check AZURE_OPENAI_API_VERSION (try '2024-02-01' or '2024-05-01-preview')")
print(" 2. Verify AZURE_OPENAI_ENDPOINT format (should end with '/')")
print()
print("=" * 80)
print("β Configuration validation FAILED")
print("=" * 80)
return False
def list_common_deployment_names():
"""List common embedding deployment names."""
print()
print("Common embedding deployment names to try:")
print(" - text-embedding-3-small (recommended, most cost-effective)")
print(" - text-embedding-3-large (higher quality, more expensive)")
print(" - text-embedding-ada-002 (legacy, widely supported)")
print(" - embedding (generic name, check your Azure portal)")
print()
if __name__ == "__main__":
print()
success = validate_azure_config()
if not success:
list_common_deployment_names()
sys.exit(1)
print()
print("Next steps:")
print(" 1. If deploying to HuggingFace Spaces:")
print(" - Add all Azure OpenAI secrets to HuggingFace Spaces settings")
print(" - Ensure AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME matches your Azure deployment")
print(" 2. Run the application:")
print(" python app.py")
print()
sys.exit(0)
|