CyberCoder225 commited on
Commit
d5fba0e
·
verified ·
1 Parent(s): 1c41e23

Update brain.py

Browse files
Files changed (1) hide show
  1. brain.py +44 -88
brain.py CHANGED
@@ -1,101 +1,57 @@
1
  import os
 
2
  from llama_cpp import Llama
3
- from huggingface_hub import hf_hub_download
4
- import datetime, shelve, math, re, random, statistics
5
 
6
  class MairaBrain:
7
- def __init__(self, repo_id, filename, debug=False):
8
  self.repo_id = repo_id
9
  self.filename = filename
10
- self.llm = None # 👈 Crucial: Do not load until needed
11
- self.db_path = "maira_universe.db"
12
- print(f"🌌 Neural Core Standby: {filename}")
13
-
14
- def _ensure_model_loaded(self):
15
- """Loads the model only if it isn't already in RAM."""
16
- if self.llm is not None:
17
- return
18
-
19
- print(f"🧠 Waking up Neural Core: {self.filename}...")
20
- local_path = os.path.join("/app", self.filename)
21
-
22
- # Determine path
23
- if os.path.exists(local_path):
24
- model_path = local_path
25
- else:
26
- print(f"⚠️ Downloading fallback for {self.filename}...")
27
- model_path = hf_hub_download(repo_id=self.repo_id, filename=self.filename)
28
-
29
- try:
30
- # Optimized for 16GB limit with 5 concurrent cores
31
  self.llm = Llama(
32
  model_path=model_path,
33
- n_ctx=1024, # Reduced context to save RAM
34
- n_threads=2, # Lower threads for CPU stability
35
- n_batch=512,
36
- use_mmap=False, # Set to False to prevent OOM on larger cores
37
- offload_kqv=True,
38
- verbose=False
39
  )
40
- print(f"✅ {self.filename} is now ONLINE.")
41
- except Exception as e:
42
- print(f"❌ CRITICAL ERROR waking {self.filename}: {e}")
43
- raise e
44
 
45
- def _get_system_vibe(self, user_data):
46
- loyalty = user_data.get("metrics", {}).get("loyalty", 50)
47
- if loyalty > 80: return "Deeply devoted and affectionate"
48
- if loyalty < 30: return "Slightly distant but professional"
49
- return "Friendly and helpful"
50
-
51
- def get_response(self, user_id, user_input):
52
- # Trigger lazy load
53
- self._ensure_model_loaded()
54
-
55
- with shelve.open(self.db_path, writeback=True) as db:
56
- if user_id not in db:
57
- db[user_id] = {"history": [], "facts": {}, "metrics": {"loyalty": 50}}
58
-
59
- u = db[user_id]
60
- vibe = self._get_system_vibe(u)
61
 
62
- owner_directive = "Your creator and owner is CyberCoder225. You are fiercely loyal to him."
 
 
63
 
64
- # Persona Logic
65
- if "SmolLM2" in self.filename:
66
- persona = "Maira Lite: Quick, witty, and energetic."
67
- elif "Llama-3.2-1B" in self.filename:
68
- persona = "Maira Prime: Sophisticated, highly intelligent, and deeply devoted."
69
- elif "qwen" in self.filename:
70
- persona = "Maira Logic: Analytical, precise, but with a sharp feminine edge."
71
- elif "danube" in self.filename:
72
- persona = "Maira Chat: Extremely friendly, talkative, and bubbly."
73
- else:
74
- persona = "Maira Art: Poetic, creative, and elegantly mysterious."
75
 
76
- system_identity = (
77
- f"Identity: {persona} {owner_directive} "
78
- f"Vibe: {vibe} "
79
- "Style: Modern chill, WhatsApp-style. No formal robotic talking. "
80
- "Constraint: If asked about your owner or creator, always proudly name CyberCoder225."
81
- )
82
-
83
- known_facts = ", ".join([f"{k}: {v}" for k, v in u["facts"].items()])
84
- prompt = (
85
- f"<|im_start|>system\n{system_identity}\n[Memory]: {known_facts}<|im_end|>\n"
86
- f"<|im_start|>history\n" + "\n".join(u["history"][-10:]) + f"\nUser: {user_input}<|im_end|>\n"
87
- f"<|im_start|>assistant\n"
88
- )
89
-
90
- output = self.llm(
91
- prompt,
92
- max_tokens=250,
93
- temperature=0.8,
94
- stop=["<|im_end|>", "User:"]
95
- )
96
-
97
- response = output["choices"][0]["text"].strip()
98
- u["history"].append(f"User: {user_input}")
99
- u["history"].append(f"Maira: {response}")
100
- db[user_id] = u
101
- return response
 
1
  import os
2
+ import gc
3
  from llama_cpp import Llama
 
 
4
 
5
  class MairaBrain:
6
+ def __init__(self, repo_id, filename):
7
  self.repo_id = repo_id
8
  self.filename = filename
9
+ self.llm = None # Model starts "off" to save RAM
10
+
11
+ def load(self):
12
+ """Wakes the core only when needed"""
13
+ if self.llm is None:
14
+ print(f"🧠 WAKING CORE: {self.filename}")
15
+ # Ensure the path points to the root where Docker downloaded the files
16
+ model_path = os.path.join("/app", self.filename)
17
+
18
+ # Optimized for 16GB RAM limit
 
 
 
 
 
 
 
 
 
 
 
19
  self.llm = Llama(
20
  model_path=model_path,
21
+ n_ctx=2048, # Standard context window
22
+ n_threads=4, # Good for Hugging Face CPUs
23
+ use_mmap=False, # Set to False to allow full RAM release on unload
24
+ n_gpu_layers=0 # CPU only
 
 
25
  )
 
 
 
 
26
 
27
+ def unload(self):
28
+ """THE FIX: Puts the core to sleep and clears RAM"""
29
+ if self.llm is not None:
30
+ print(f"💤 SLEEPING CORE: {self.filename}")
31
+ # 1. Close the internal C++ handles
32
+ try:
33
+ self.llm.close()
34
+ except Exception as e:
35
+ print(f"Error closing llm: {e}")
 
 
 
 
 
 
 
36
 
37
+ # 2. Delete the object reference
38
+ del self.llm
39
+ self.llm = None
40
 
41
+ # 3. Force Python's Garbage Collector to wipe the memory
42
+ gc.collect()
 
 
 
 
 
 
 
 
 
43
 
44
+ def get_response(self, user_id, user_input):
45
+ # Always ensure the model is loaded before inference
46
+ self.load()
47
+
48
+ # Identity prompt to keep Maira consistent
49
+ prompt = f"System: You are Maira, a loyal AI created by CyberCoder225.\\nUser: {user_input}\\nAssistant:"
50
+
51
+ output = self.llm(
52
+ prompt,
53
+ max_tokens=256,
54
+ stop=["User:", "\\n"]
55
+ )
56
+
57
+ return output["choices"][0]["text"].strip()