Anirudh Esthuri commited on
Commit
4cd3a4a
·
1 Parent(s): 9b52469

Fix: Use inference profile ARN as modelId parameter (not inferenceProfileIdentifier)

Browse files
Files changed (1) hide show
  1. llm.py +4 -3
llm.py CHANGED
@@ -137,8 +137,9 @@ def chat(messages, persona):
137
  }
138
 
139
  # Check if this model has an inference profile ARN (provisioned throughput)
 
140
  if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
141
- invoke_kwargs["inferenceProfileIdentifier"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
142
  else:
143
  invoke_kwargs["modelId"] = MODEL_STRING
144
 
@@ -396,9 +397,9 @@ def check_credentials():
396
  })
397
  }
398
 
399
- # Use inference profile ARN if available
400
  if test_model in MODEL_TO_INFERENCE_PROFILE_ARN:
401
- test_kwargs["inferenceProfileIdentifier"] = MODEL_TO_INFERENCE_PROFILE_ARN[test_model]
402
  else:
403
  test_kwargs["modelId"] = test_model
404
 
 
137
  }
138
 
139
  # Check if this model has an inference profile ARN (provisioned throughput)
140
+ # For provisioned throughput, use the ARN as the modelId
141
  if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
142
+ invoke_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
143
  else:
144
  invoke_kwargs["modelId"] = MODEL_STRING
145
 
 
397
  })
398
  }
399
 
400
+ # Use inference profile ARN if available (use ARN as modelId for provisioned throughput)
401
  if test_model in MODEL_TO_INFERENCE_PROFILE_ARN:
402
+ test_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[test_model]
403
  else:
404
  test_kwargs["modelId"] = test_model
405