| #!/bin/bash |
|
|
| |
| set -euo pipefail |
|
|
| |
| API_HOST="localhost" |
| API_PORT="8000" |
| API_VERSION="v1" |
| BASE_URL="http://${API_HOST}:${API_PORT}/api/${API_VERSION}" |
|
|
| |
| generate_test_embedding() { |
| python3 - <<EOF |
| import numpy as np |
| import json |
| |
| # Generate a 4096-dimensional embedding vector (correct dimension for model) |
| embedding = np.random.randn(4096).astype(np.float32) |
| # Normalize the embedding |
| embedding = embedding / np.linalg.norm(embedding) |
| print(json.dumps(embedding.tolist()), end="") |
| EOF |
| } |
|
|
| |
| test_health() { |
| echo "Testing health endpoint..." |
| curl -s "${BASE_URL}/health" || { |
| echo "Health check failed" |
| exit 1 |
| } |
| } |
|
|
| |
| test_inference() { |
| echo |
| start_time=$(date +%s) |
| echo "Testing inference endpoint..." |
| local embedding_data=$(generate_test_embedding) |
| |
| curl -X POST "${BASE_URL}/inference" \ |
| -H "Content-Type: application/json" \ |
| -d "{ |
| \"embedding\": ${embedding_data} |
| }" || { |
| echo "Inference request failed" |
| exit 1 |
| } |
| end_time=$(date +%s) |
| duration=$((end_time - start_time)) |
| echo "Inference request completed in ${duration} seconds" |
| } |
|
|
| main() { |
| test_health |
| test_inference |
| } |
|
|
| main "$@" |