Spaces:

Memverge
/

MemMachine-Playground

Running

App Files Files Community

Anirudh Esthuri commited on 29 days ago

Commit

3a73f5d

1 Parent(s): 4cd3a4a

Add Gemini 3.0 Pro and Gemini 2.5 Flash models support via AWS Bedrock

Browse files

Files changed (2) hide show

llm.py +101 -2
model_config.py +16 -0

llm.py CHANGED Viewed

@@ -172,6 +172,105 @@ def chat(messages, persona):
         ).strip()
         total_tok = len(text.split())
         return text, dt, total_tok, (total_tok / dt if dt else total_tok)
     elif provider == "deepseek":
         print("Using deepseek: ", MODEL_STRING)
@@ -378,8 +477,8 @@ def check_credentials():
     #         print(f"Ollama connection failed: {e}")
     #         return False
-    # Check if using Bedrock providers (anthropic, meta, mistral, deepseek)
-    bedrock_providers = ["anthropic"]
     if MODEL_TO_PROVIDER.get(MODEL_STRING) in bedrock_providers:
         # Test AWS Bedrock connection by trying to invoke a simple model
         try:

         ).strip()
         total_tok = len(text.split())
+        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    elif provider == "google":
+        print("Using google (Gemini): ", MODEL_STRING)
+        t0 = time.time()
+        # Add system prompt for better behavior
+        system_prompt = ""
+        # Convert messages to Gemini format
+        # Gemini uses "user" and "model" roles, and content is an array
+        gemini_messages = []
+        for msg in messages:
+            role = msg.get("role", "user")
+            # Gemini uses "model" instead of "assistant"
+            if role == "assistant":
+                role = "model"
+            gemini_messages.append({
+                "role": role,
+                "parts": [{"text": msg["content"]}]
+            })
+        try:
+            bedrock_runtime = get_bedrock_client()
+            # Use inference profile ARN if available (for provisioned throughput models)
+            # Otherwise use modelId (for on-demand models)
+            invoke_kwargs = {
+                "contentType": "application/json",
+                "accept": "application/json",
+                "body": json.dumps(
+                    {
+                        "contents": gemini_messages,
+                        "generationConfig": {
+                            "maxOutputTokens": 4000,
+                            "temperature": 0.3,
+                        }
+                    }
+                ),
+            }
+            # Add system instruction if provided
+            if system_prompt:
+                invoke_kwargs["body"] = json.dumps(
+                    {
+                        "contents": gemini_messages,
+                        "systemInstruction": {
+                            "parts": [{"text": system_prompt}]
+                        },
+                        "generationConfig": {
+                            "maxOutputTokens": 4000,
+                            "temperature": 0.3,
+                        }
+                    }
+                )
+            # Check if this model has an inference profile ARN (provisioned throughput)
+            # For provisioned throughput, use the ARN as the modelId
+            if MODEL_STRING in MODEL_TO_INFERENCE_PROFILE_ARN:
+                invoke_kwargs["modelId"] = MODEL_TO_INFERENCE_PROFILE_ARN[MODEL_STRING]
+            else:
+                invoke_kwargs["modelId"] = MODEL_STRING
+            response = bedrock_runtime.invoke_model(**invoke_kwargs)
+            dt = time.time() - t0
+            body = json.loads(response["body"].read())
+        except ValueError as e:
+            # Re-raise ValueError (credential errors) as-is
+            raise
+        except Exception as e:
+            error_msg = str(e)
+            if "ValidationException" in error_msg and "model identifier is invalid" in error_msg:
+                raise ValueError(
+                    f"Invalid Bedrock model ID: '{MODEL_STRING}'. "
+                    f"Error: {error_msg}. "
+                    "Please verify the model ID is correct and the model is available in your AWS region. "
+                    "Common Gemini model IDs: 'google.gemini-pro-v1' or 'google.gemini-2.0-flash-exp'"
+                ) from e
+            elif "UnrecognizedClientException" in error_msg or "invalid" in error_msg.lower():
+                raise ValueError(
+                    f"AWS Bedrock authentication failed: {error_msg}. "
+                    "Please verify your AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY secrets "
+                    "are correct and have Bedrock access permissions."
+                ) from e
+            raise
+        # Extract text from Gemini response
+        # Gemini response format: {"candidates": [{"content": {"parts": [{"text": "..."}]}}]}
+        text = ""
+        if "candidates" in body and len(body["candidates"]) > 0:
+            candidate = body["candidates"][0]
+            if "content" in candidate and "parts" in candidate["content"]:
+                for part in candidate["content"]["parts"]:
+                    if "text" in part:
+                        text += part["text"]
+        text = text.strip()
+        total_tok = len(text.split())
         return text, dt, total_tok, (total_tok / dt if dt else total_tok)
     elif provider == "deepseek":
         print("Using deepseek: ", MODEL_STRING)
     #         print(f"Ollama connection failed: {e}")
     #         return False
+    # Check if using Bedrock providers (anthropic, google, meta, mistral, deepseek)
+    bedrock_providers = ["anthropic", "google"]
     if MODEL_TO_PROVIDER.get(MODEL_STRING) in bedrock_providers:
         # Test AWS Bedrock connection by trying to invoke a simple model
         try:

model_config.py CHANGED Viewed

@@ -10,6 +10,10 @@ PROVIDER_MODEL_MAP = {
         "anthropic.claude-sonnet-4-5-20250929-v1:0",
         "anthropic.claude-opus-4-20250514-v1:0",
     ],
 }
@@ -28,6 +32,8 @@ MODEL_DISPLAY_NAMES = {
     "anthropic.claude-haiku-4-5-20251001-v1:0": "AWS Bedrock - Anthropic - Claude Haiku 4.5",
     "anthropic.claude-sonnet-4-5-20250929-v1:0": "AWS Bedrock - Anthropic - Claude Sonnet 4.5",
     "anthropic.claude-opus-4-20250514-v1:0": "AWS Bedrock - Anthropic - Claude Opus 4",
 }
 MODEL_CHOICES = [model for models in PROVIDER_MODEL_MAP.values() for model in models]
@@ -51,3 +57,13 @@ if sonnet_arn:
 opus_arn = os.getenv("BEDROCK_OPUS_4_ARN", "").strip()
 if opus_arn:
     MODEL_TO_INFERENCE_PROFILE_ARN["anthropic.claude-opus-4-20250514-v1:0"] = opus_arn

         "anthropic.claude-sonnet-4-5-20250929-v1:0",
         "anthropic.claude-opus-4-20250514-v1:0",
     ],
+    "google": [
+        "google.gemini-3.0-pro-v1:0",
+        "google.gemini-2.5-flash-v1:0",
+    ],
 }
     "anthropic.claude-haiku-4-5-20251001-v1:0": "AWS Bedrock - Anthropic - Claude Haiku 4.5",
     "anthropic.claude-sonnet-4-5-20250929-v1:0": "AWS Bedrock - Anthropic - Claude Sonnet 4.5",
     "anthropic.claude-opus-4-20250514-v1:0": "AWS Bedrock - Anthropic - Claude Opus 4",
+    "google.gemini-3.0-pro-v1:0": "AWS Bedrock - Google - Gemini 3.0 Pro",
+    "google.gemini-2.5-flash-v1:0": "AWS Bedrock - Google - Gemini 2.5 Flash",
 }
 MODEL_CHOICES = [model for models in PROVIDER_MODEL_MAP.values() for model in models]
 opus_arn = os.getenv("BEDROCK_OPUS_4_ARN", "").strip()
 if opus_arn:
     MODEL_TO_INFERENCE_PROFILE_ARN["anthropic.claude-opus-4-20250514-v1:0"] = opus_arn
+# Gemini 3.0 Pro
+gemini_3_arn = os.getenv("BEDROCK_GEMINI_3_ARN", "").strip()
+if gemini_3_arn:
+    MODEL_TO_INFERENCE_PROFILE_ARN["google.gemini-3.0-pro-v1:0"] = gemini_3_arn
+# Gemini 2.5 Flash
+gemini_2_5_arn = os.getenv("BEDROCK_GEMINI_2_5_ARN", "").strip()
+if gemini_2_5_arn:
+    MODEL_TO_INFERENCE_PROFILE_ARN["google.gemini-2.5-flash-v1:0"] = gemini_2_5_arn