Spaces:
Running
Running
Anirudh Esthuri
commited on
Commit
·
4cd3a4a
1
Parent(s):
9b52469
Fix: Use inference profile ARN as modelId parameter (not inferenceProfileIdentifier)
Browse files
llm.py
CHANGED
|
@@ -137,8 +137,9 @@ def chat(messages, persona):
|
|
| 137 |
}
|
| 138 |
|
| 139 |
# Check if this model has an inference profile ARN (provisioned throughput)
|
|
|
|
| 140 |
if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
|
| 141 |
-
invoke_kwargs["
|
| 142 |
else:
|
| 143 |
invoke_kwargs["modelId"] = MODEL_STRING
|
| 144 |
|
|
@@ -396,9 +397,9 @@ def check_credentials():
|
|
| 396 |
})
|
| 397 |
}
|
| 398 |
|
| 399 |
-
# Use inference profile ARN if available
|
| 400 |
if test_model in MODEL_TO_INFERENCE_PROFILE_ARN:
|
| 401 |
-
test_kwargs["
|
| 402 |
else:
|
| 403 |
test_kwargs["modelId"] = test_model
|
| 404 |
|
|
|
|
| 137 |
}
|
| 138 |
|
| 139 |
# Check if this model has an inference profile ARN (provisioned throughput)
|
| 140 |
+
# For provisioned throughput, use the ARN as the modelId
|
| 141 |
if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
|
| 142 |
+
invoke_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
|
| 143 |
else:
|
| 144 |
invoke_kwargs["modelId"] = MODEL_STRING
|
| 145 |
|
|
|
|
| 397 |
})
|
| 398 |
}
|
| 399 |
|
| 400 |
+
# Use inference profile ARN if available (use ARN as modelId for provisioned throughput)
|
| 401 |
if test_model in MODEL_TO_INFERENCE_PROFILE_ARN:
|
| 402 |
+
test_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[test_model]
|
| 403 |
else:
|
| 404 |
test_kwargs["modelId"] = test_model
|
| 405 |
|