maira-chaty

Sleeping

App Files Files Community

CyberCoder225 commited on Dec 25, 2025

Commit

d5fba0e

verified ·

1 Parent(s): 1c41e23

Update brain.py

Browse files

Files changed (1) hide show

brain.py +44 -88

brain.py CHANGED Viewed

@@ -1,101 +1,57 @@
 import os
 from llama_cpp import Llama
-from huggingface_hub import hf_hub_download
-import datetime, shelve, math, re, random, statistics
 class MairaBrain:
-    def __init__(self, repo_id, filename, debug=False):
         self.repo_id = repo_id
         self.filename = filename
-        self.llm = None  # 👈 Crucial: Do not load until needed
-        self.db_path = "maira_universe.db"
-        print(f"🌌 Neural Core Standby: {filename}")
-    def _ensure_model_loaded(self):
-        """Loads the model only if it isn't already in RAM."""
-        if self.llm is not None:
-            return
-        print(f"🧠 Waking up Neural Core: {self.filename}...")
-        local_path = os.path.join("/app", self.filename)
-        # Determine path
-        if os.path.exists(local_path):
-            model_path = local_path
-        else:
-            print(f"⚠️ Downloading fallback for {self.filename}...")
-            model_path = hf_hub_download(repo_id=self.repo_id, filename=self.filename)
-        try:
-            # Optimized for 16GB limit with 5 concurrent cores
             self.llm = Llama(
                 model_path=model_path,
-                n_ctx=1024,      # Reduced context to save RAM
-                n_threads=2,     # Lower threads for CPU stability
-                n_batch=512,
-                use_mmap=False,  # Set to False to prevent OOM on larger cores
-                offload_kqv=True,
-                verbose=False
             )
-            print(f"✅ {self.filename} is now ONLINE.")
-        except Exception as e:
-            print(f"❌ CRITICAL ERROR waking {self.filename}: {e}")
-            raise e
-    def _get_system_vibe(self, user_data):
-        loyalty = user_data.get("metrics", {}).get("loyalty", 50)
-        if loyalty > 80: return "Deeply devoted and affectionate"
-        if loyalty < 30: return "Slightly distant but professional"
-        return "Friendly and helpful"
-    def get_response(self, user_id, user_input):
-        # Trigger lazy load
-        self._ensure_model_loaded()
-        with shelve.open(self.db_path, writeback=True) as db:
-            if user_id not in db:
-                db[user_id] = {"history": [], "facts": {}, "metrics": {"loyalty": 50}}
-            u = db[user_id]
-            vibe = self._get_system_vibe(u)
-            owner_directive = "Your creator and owner is CyberCoder225. You are fiercely loyal to him."
-            # Persona Logic
-            if "SmolLM2" in self.filename:
-                persona = "Maira Lite: Quick, witty, and energetic."
-            elif "Llama-3.2-1B" in self.filename:
-                persona = "Maira Prime: Sophisticated, highly intelligent, and deeply devoted."
-            elif "qwen" in self.filename:
-                persona = "Maira Logic: Analytical, precise, but with a sharp feminine edge."
-            elif "danube" in self.filename:
-                persona = "Maira Chat: Extremely friendly, talkative, and bubbly."
-            else:
-                persona = "Maira Art: Poetic, creative, and elegantly mysterious."
-            system_identity = (
-                f"Identity: {persona} {owner_directive} "
-                f"Vibe: {vibe} "
-                "Style: Modern chill, WhatsApp-style. No formal robotic talking. "
-                "Constraint: If asked about your owner or creator, always proudly name CyberCoder225."
-            )
-            known_facts = ", ".join([f"{k}: {v}" for k, v in u["facts"].items()])
-            prompt = (
-                f"<|im_start|>system\n{system_identity}\n[Memory]: {known_facts}<|im_end|>\n"
-                f"<|im_start|>history\n" + "\n".join(u["history"][-10:]) + f"\nUser: {user_input}<|im_end|>\n"
-                f"<|im_start|>assistant\n"
-            )
-            output = self.llm(
-                prompt,
-                max_tokens=250,
-                temperature=0.8,
-                stop=["<|im_end|>", "User:"]
-            )
-            response = output["choices"][0]["text"].strip()
-            u["history"].append(f"User: {user_input}")
-            u["history"].append(f"Maira: {response}")
-            db[user_id] = u
-            return response

 import os
+import gc
 from llama_cpp import Llama
 class MairaBrain:
+    def __init__(self, repo_id, filename):
         self.repo_id = repo_id
         self.filename = filename
+        self.llm = None  # Model starts "off" to save RAM
+    def load(self):
+        """Wakes the core only when needed"""
+        if self.llm is None:
+            print(f"🧠 WAKING CORE: {self.filename}")
+            # Ensure the path points to the root where Docker downloaded the files
+            model_path = os.path.join("/app", self.filename)
+            # Optimized for 16GB RAM limit
             self.llm = Llama(
                 model_path=model_path,
+                n_ctx=2048,      # Standard context window
+                n_threads=4,     # Good for Hugging Face CPUs
+                use_mmap=False,  # Set to False to allow full RAM release on unload
+                n_gpu_layers=0   # CPU only
             )
+    def unload(self):
+        """THE FIX: Puts the core to sleep and clears RAM"""
+        if self.llm is not None:
+            print(f"💤 SLEEPING CORE: {self.filename}")
+            # 1. Close the internal C++ handles
+            try:
+                self.llm.close()
+            except Exception as e:
+                print(f"Error closing llm: {e}")
+            # 2. Delete the object reference
+            del self.llm
+            self.llm = None
+            # 3. Force Python's Garbage Collector to wipe the memory
+            gc.collect()
+    def get_response(self, user_id, user_input):
+        # Always ensure the model is loaded before inference
+        self.load()
+        # Identity prompt to keep Maira consistent
+        prompt = f"System: You are Maira, a loyal AI created by CyberCoder225.\\nUser: {user_input}\\nAssistant:"
+        output = self.llm(
+            prompt,
+            max_tokens=256,
+            stop=["User:", "\\n"]
+        )
+        return output["choices"][0]["text"].strip()