Spaces:

Memverge
/

MemMachine-Playground

Running

App Files Files Community

AnirudhEsthuri-MV commited on Nov 14, 2025

Commit

94aee85

1 Parent(s): f2a2584

Update llm.py

Browse files

Files changed (1) hide show

llm.py +118 -122

llm.py CHANGED Viewed

@@ -21,15 +21,11 @@ api_key = os.getenv("MODEL_API_KEY")
 client = openai.OpenAI(api_key=api_key)
 bedrock_runtime = boto3.client(
     "bedrock-runtime",
-    region_name="us-west-2",
-    aws_access_key_id="AWS_ACCESS_KEY_ID",
-    aws_secret_access_key="AWS_SECRET_ACCESS_KEY"
 )
-# Ollama configuration
-OLLAMA_BASE_URL = os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")
 # ──────────────────────────────────────────────────────────────
 # Model switcher
 # ──────────────────────────────────────────────────────────────
@@ -170,140 +166,140 @@ def chat(messages, persona):
         total_tok = len(text.split())
         return text, dt, total_tok, (total_tok / dt if dt else total_tok)
-    elif provider == "meta":
-        print("Using meta (LLaMA): ", MODEL_STRING)
-        t0 = time.time()
-        # Add system prompt for better behavior
-        system_prompt = ""
-        # Format conversation properly for Llama3
-        formatted_prompt = "<|begin_of_text|>"
-        # Add system prompt
-        formatted_prompt += "<|start_header_id|>system<|end_header_id|>\n" + system_prompt + "<|eot_id|>\n"
-        # Add conversation history
-        for msg in messages:
-            if msg["role"] == "user":
-                formatted_prompt += "<|start_header_id|>user<|end_header_id|>\n" + msg["content"] + "<|eot_id|>\n"
-            elif msg["role"] == "assistant":
-                formatted_prompt += "<|start_header_id|>assistant<|end_header_id|>\n" + msg["content"] + "<|eot_id|>\n"
-        # Add final assistant prompt
-        formatted_prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
-        response = bedrock_runtime.invoke_model(
-            modelId=MODEL_STRING,
-            contentType="application/json",
-            accept="application/json",
-            body=json.dumps(
-                {
-                    "prompt": formatted_prompt,
-                    "max_gen_len": 512,  # Shorter responses
-                    "temperature": 0.3,  # Lower temperature for more focused responses
-                }
-            ),
-        )
-        dt = time.time() - t0
-        body = json.loads(response["body"].read())
-        text = body.get("generation", "").strip()
-        total_tok = len(text.split())
-        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
-    elif provider == "mistral":
-        print("Using mistral: ", MODEL_STRING)
-        t0 = time.time()
-        prompt = messages[-1]["content"]
-        formatted_prompt = f"<s>[INST] {prompt} [/INST]"
-        response = bedrock_runtime.invoke_model(
-            modelId=MODEL_STRING,
-            contentType="application/json",
-            accept="application/json",
-            body=json.dumps(
-                {"prompt": formatted_prompt, "max_tokens": 512, "temperature": 0.5}
-            ),
-        )
-        dt = time.time() - t0
-        body = json.loads(response["body"].read())
-        text = body["outputs"][0]["text"].strip()
-        total_tok = len(text.split())
-        return text, dt, total_tok, (total_tok / dt if dt else total_tok)
-    elif provider == "ollama":
-        print("Using ollama: ", MODEL_STRING)
-        t0 = time.time()
-        # Format messages for Ollama API with system prompt
-        ollama_messages = []
-        # Add system prompt for better behavior
-        system_prompt = ""
-        ollama_messages.append({
-            "role": "system",
-            "content": system_prompt
-        })
-        for msg in messages:
-            ollama_messages.append({
-                "role": msg["role"],
-                "content": msg["content"]
-            })
-        # Make request to Ollama API
-        response = requests.post(
-            f"{OLLAMA_BASE_URL}/api/chat",
-            json={
-                "model": MODEL_STRING,
-                "messages": ollama_messages,
-                "stream": False,
-                "options": {
-                    "temperature": 0.3,  # Lower temperature for more focused responses
-                    # "num_predict": 4000,  # Much higher limit for longer responses
-                    "top_p": 0.9,
-                    "repeat_penalty": 1.1
-                }
-            },
-            timeout=60
-        )
-        dt = time.time() - t0
-        if response.status_code == 200:
-            result = response.json()
-            text = result["message"]["content"].strip()
-            total_tok = len(text.split())
-            return text, dt, total_tok, (total_tok / dt if dt else total_tok)
-        else:
-            raise Exception(f"Ollama API error: {response.status_code} - {response.text}")
 # ──────────────────────────────────────────────────────────────
 # Diagnostics / CLI test
 # ──────────────────────────────────────────────────────────────
 def check_credentials():
-    # Check if using Ollama (no API key required)
-    if MODEL_TO_PROVIDER.get(MODEL_STRING) == "ollama":
-        # Test Ollama connection
-        try:
-            response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
-            if response.status_code == 200:
-                print("Ollama connection successful")
-                return True
-            else:
-                print(f"Ollama connection failed: {response.status_code}")
-                return False
-        except Exception as e:
-            print(f"Ollama connection failed: {e}")
-            return False
     # Check if using Bedrock providers (anthropic, meta, mistral, deepseek)
-    bedrock_providers = ["anthropic", "meta", "mistral", "deepseek"]
     if MODEL_TO_PROVIDER.get(MODEL_STRING) in bedrock_providers:
         # Test AWS Bedrock connection by trying to invoke a simple model
         try:

 client = openai.OpenAI(api_key=api_key)
 bedrock_runtime = boto3.client(
     "bedrock-runtime",
+    region_name="us-east-1",
+    aws_access_key_id= os.getenv("AWS_ACCESS_ID")
+    aws_secret_access_key= os.getenv("AWS_SECRET_ACCESS_KEY")
 )
 # ──────────────────────────────────────────────────────────────
 # Model switcher
 # ──────────────────────────────────────────────────────────────
         total_tok = len(text.split())
         return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    # elif provider == "meta":
+    #     print("Using meta (LLaMA): ", MODEL_STRING)
+    #     t0 = time.time()
+    #     # Add system prompt for better behavior
+    #     system_prompt = ""
+    #     # Format conversation properly for Llama3
+    #     formatted_prompt = "<|begin_of_text|>"
+    #     # Add system prompt
+    #     formatted_prompt += "<|start_header_id|>system<|end_header_id|>\n" + system_prompt + "<|eot_id|>\n"
+    #     # Add conversation history
+    #     for msg in messages:
+    #         if msg["role"] == "user":
+    #             formatted_prompt += "<|start_header_id|>user<|end_header_id|>\n" + msg["content"] + "<|eot_id|>\n"
+    #         elif msg["role"] == "assistant":
+    #             formatted_prompt += "<|start_header_id|>assistant<|end_header_id|>\n" + msg["content"] + "<|eot_id|>\n"
+    #     # Add final assistant prompt
+    #     formatted_prompt += "<|start_header_id|>assistant<|end_header_id|>\n"
+    #     response = bedrock_runtime.invoke_model(
+    #         modelId=MODEL_STRING,
+    #         contentType="application/json",
+    #         accept="application/json",
+    #         body=json.dumps(
+    #             {
+    #                 "prompt": formatted_prompt,
+    #                 "max_gen_len": 512,  # Shorter responses
+    #                 "temperature": 0.3,  # Lower temperature for more focused responses
+    #             }
+    #         ),
+    #     )
+        # dt = time.time() - t0
+        # body = json.loads(response["body"].read())
+        # text = body.get("generation", "").strip()
+        # total_tok = len(text.split())
+    #     return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    # elif provider == "mistral":
+    #     print("Using mistral: ", MODEL_STRING)
+    #     t0 = time.time()
+    #     prompt = messages[-1]["content"]
+    #     formatted_prompt = f"<s>[INST] {prompt} [/INST]"
+    #     response = bedrock_runtime.invoke_model(
+    #         modelId=MODEL_STRING,
+    #         contentType="application/json",
+    #         accept="application/json",
+    #         body=json.dumps(
+    #             {"prompt": formatted_prompt, "max_tokens": 512, "temperature": 0.5}
+    #         ),
+    #     )
+    #     dt = time.time() - t0
+    #     body = json.loads(response["body"].read())
+    #     text = body["outputs"][0]["text"].strip()
+    #     total_tok = len(text.split())
+    #     return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    # elif provider == "ollama":
+    #     print("Using ollama: ", MODEL_STRING)
+    #     t0 = time.time()
+    #     # Format messages for Ollama API with system prompt
+    #     ollama_messages = []
+    #     # Add system prompt for better behavior
+    #     system_prompt = ""
+    #     ollama_messages.append({
+    #         "role": "system",
+    #         "content": system_prompt
+    #     })
+    #     for msg in messages:
+    #         ollama_messages.append({
+    #             "role": msg["role"],
+    #             "content": msg["content"]
+    #         })
+    #     # Make request to Ollama API
+    #     response = requests.post(
+    #         f"{OLLAMA_BASE_URL}/api/chat",
+    #         json={
+    #             "model": MODEL_STRING,
+    #             "messages": ollama_messages,
+    #             "stream": False,
+    #             "options": {
+    #                 "temperature": 0.3,  # Lower temperature for more focused responses
+    #                 # "num_predict": 4000,  # Much higher limit for longer responses
+    #                 "top_p": 0.9,
+    #                 "repeat_penalty": 1.1
+    #             }
+    #         },
+    #         timeout=60
+    #     )
+    #     dt = time.time() - t0
+    #     if response.status_code == 200:
+    #         result = response.json()
+    #         text = result["message"]["content"].strip()
+    #         total_tok = len(text.split())
+    #         return text, dt, total_tok, (total_tok / dt if dt else total_tok)
+    #     else:
+    #         raise Exception(f"Ollama API error: {response.status_code} - {response.text}")
 # ──────────────────────────────────────────────────────────────
 # Diagnostics / CLI test
 # ──────────────────────────────────────────────────────────────
 def check_credentials():
+    # # Check if using Ollama (no API key required)
+    # if MODEL_TO_PROVIDER.get(MODEL_STRING) == "ollama":
+    #     # Test Ollama connection
+    #     try:
+    #         response = requests.get(f"{OLLAMA_BASE_URL}/api/tags", timeout=5)
+    #         if response.status_code == 200:
+    #             print("Ollama connection successful")
+    #             return True
+    #         else:
+    #             print(f"Ollama connection failed: {response.status_code}")
+    #             return False
+    #     except Exception as e:
+    #         print(f"Ollama connection failed: {e}")
+    #         return False
     # Check if using Bedrock providers (anthropic, meta, mistral, deepseek)
+    bedrock_providers = ["anthropic"]
     if MODEL_TO_PROVIDER.get(MODEL_STRING) in bedrock_providers:
         # Test AWS Bedrock connection by trying to invoke a simple model
         try: