Spaces:

Memverge
/

MemMachine-Playground

Running

Anirudh Esthuri commited on Nov 19

Commit

4cd3a4a

1 Parent(s): 9b52469

Fix: Use inference profile ARN as modelId parameter (not inferenceProfileIdentifier)

Files changed (1) hide show

llm.py CHANGED Viewed

@@ -137,8 +137,9 @@ def chat(messages, persona):
             }
             # Check if this model has an inference profile ARN (provisioned throughput)
             if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
-                invoke_kwargs["inferenceProfileIdentifier"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
             else:
                 invoke_kwargs["modelId"] = MODEL_STRING
@@ -396,9 +397,9 @@ def check_credentials():
                 })
             }
-            # Use inference profile ARN if available
             if test_model in MODEL_TO_INFERENCE_PROFILE_ARN:
-                test_kwargs["inferenceProfileIdentifier"] = MODEL_TO_INFERENCE_PROFILE_ARN[test_model]
             else:
                 test_kwargs["modelId"] = test_model

             }
             # Check if this model has an inference profile ARN (provisioned throughput)
+            # For provisioned throughput, use the ARN as the modelId
             if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
+                invoke_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
             else:
                 invoke_kwargs["modelId"] = MODEL_STRING
                 })
             }
+            # Use inference profile ARN if available (use ARN as modelId for provisioned throughput)
             if test_model in MODEL_TO_INFERENCE_PROFILE_ARN:
+                test_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[test_model]
             else:
                 test_kwargs["modelId"] = test_model