prometechinc commited on
Commit
6aced6f
·
verified ·
1 Parent(s): 10c8d66

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ bce_brain_part_mini_code.gguf filter=lfs diff=lfs merge=lfs -text
37
+ bce_brain_part_mini_math.gguf filter=lfs diff=lfs merge=lfs -text
38
+ bce_brain_part_mini_normal.gguf filter=lfs diff=lfs merge=lfs -text
39
+ bce_brain_part_mini_vl.gguf filter=lfs diff=lfs merge=lfs -text
40
+ cat.png filter=lfs diff=lfs merge=lfs -text
Modelfile.code ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM bce_brain_part_mini_code.gguf
3
+ TEMPLATE """{{ if .System }}<|im_start|>system
4
+ {{ .System }}<|im_end|>
5
+ {{ end }}{{ if .Prompt }}<|im_start|>user
6
+ {{ .Prompt }}<|im_end|>
7
+ {{ end }}<|im_start|>assistant
8
+ """
9
+ SYSTEM """You are an expert coding assistant. Provide clean, efficient, and well-commented code."""
10
+ PARAMETER stop "<|im_start|>"
11
+ PARAMETER stop "<|im_end|>"
Modelfile.math ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM bce_brain_part_mini_math.gguf
3
+ TEMPLATE """{{ if .System }}<|im_start|>system
4
+ {{ .System }}<|im_end|>
5
+ {{ end }}{{ if .Prompt }}<|im_start|>user
6
+ {{ .Prompt }}<|im_end|>
7
+ {{ end }}<|im_start|>assistant
8
+ """
9
+ SYSTEM """You are a mathematics expert. Solve problems step-by-step."""
10
+ PARAMETER stop "<|im_start|>"
11
+ PARAMETER stop "<|im_end|>"
Modelfile.normal ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ FROM bce_brain_part_mini_normal.gguf
3
+ TEMPLATE """{{ if .System }}<|im_start|>system
4
+ {{ .System }}<|im_end|>
5
+ {{ end }}{{ if .Prompt }}<|im_start|>user
6
+ {{ .Prompt }}<|im_end|>
7
+ {{ end }}<|im_start|>assistant
8
+ """
9
+ SYSTEM """You are a helpful AI assistant capable of general tasks."""
10
+ PARAMETER stop "<|im_start|>"
11
+ PARAMETER stop "<|im_end|>"
README.md CHANGED
@@ -1,5 +1,28 @@
1
- ---
2
- license: other
3
- license_name: licence.md
4
- license_link: LICENSE
5
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Brain Bus Deployment Package
3
+
4
+ This package contains the artifacts for the Brain Bus AI system, optimized for T4 GPUs.
5
+
6
+ ## Contents
7
+ - `bce_brain_part_mini_*.gguf`: Quantized GGUF models for Ollama/llama.cpp.
8
+ - `normal`: General conversational model.
9
+ - `code`: Coding specialist.
10
+ - `math`: Mathematics specialist.
11
+ - `vl`: Vision-Language model (Qwen2.5-VL).
12
+ - `advanced_brain_bus.py`: Orchestrator script to route queries to appropriate experts.
13
+ - `cat.png`: Sample image for testing.
14
+ - `Modelfile.*`: Configuration files for creating Ollama models.
15
+ - `system_prompts.md`: Reference for system prompts used by the experts.
16
+
17
+ ## Setup
18
+ 1. Install Ollama: https://ollama.com/
19
+ 2. Create models:
20
+ ```bash
21
+ ollama create brain-normal -f Modelfile.normal
22
+ ollama create brain-code -f Modelfile.code
23
+ ollama create brain-math -f Modelfile.math
24
+ ```
25
+ 3. Run the orchestrator (requires Python dependencies):
26
+ ```bash
27
+ python advanced_brain_bus.py
28
+ ```
advanced_brain_bus.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import torch
3
+ import gc
4
+ from transformers import activations
5
+
6
+ # Monkeypatch PytorchGELUTanh for AutoAWQ compatibility
7
+ if not hasattr(activations, 'PytorchGELUTanh'):
8
+ activations.PytorchGELUTanh = activations.NewGELUActivation
9
+
10
+ from transformers import (
11
+ AutoModelForCausalLM,
12
+ AutoTokenizer,
13
+ BitsAndBytesConfig,
14
+ AutoModelForVision2Seq,
15
+ AutoProcessor
16
+ )
17
+ from diffusers import DiffusionPipeline
18
+ from diffusers.utils import export_to_video
19
+ from PIL import Image
20
+ import requests
21
+ import io
22
+ from qwen_vl_utils import process_vision_info
23
+ import os
24
+
25
+ class BrainBus:
26
+ def __init__(self):
27
+ print("Initializing Brain Bus Orchestrator...")
28
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
29
+
30
+ # Configuration for loading 4-bit models (Orchestrator)
31
+ self.bnb_config = BitsAndBytesConfig(
32
+ load_in_4bit=True,
33
+ bnb_4bit_quant_type="nf4",
34
+ bnb_4bit_compute_dtype=torch.float32, # Using float32 for T4 stability
35
+ )
36
+
37
+ # Load the Orchestrator (Math Model) immediately
38
+ self.orchestrator_path = "merged_models/math"
39
+ self.tokenizer = None
40
+ self.orchestrator = None
41
+ self._load_orchestrator()
42
+
43
+ def _load_orchestrator(self):
44
+ print(f"Loading Orchestrator from {self.orchestrator_path}...")
45
+ try:
46
+ self.tokenizer = AutoTokenizer.from_pretrained(self.orchestrator_path)
47
+ self.orchestrator = AutoModelForCausalLM.from_pretrained(
48
+ self.orchestrator_path,
49
+ quantization_config=self.bnb_config,
50
+ device_map="auto",
51
+ trust_remote_code=True
52
+ )
53
+ except Exception as e:
54
+ print(f"Failed to load orchestrator: {e}")
55
+
56
+ def _clean_memory(self):
57
+ torch.cuda.empty_cache()
58
+ gc.collect()
59
+
60
+ def determine_intent(self, user_input):
61
+ # Construct a classification prompt
62
+ prompt = (
63
+ "Classify the following user query into one of these categories: "
64
+ "[CODE, MATH, GENERAL, VISION, VIDEO, 3D]. "
65
+ "Return ONLY the category name.\n\n"
66
+ f"Query: {user_input}\nCategory:"
67
+ )
68
+
69
+ try:
70
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
71
+ outputs = self.orchestrator.generate(**inputs, max_new_tokens=10)
72
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
73
+
74
+ # Extract the label from the response (simple parsing)
75
+ # Remove input prompt from response if model echoes it
76
+ if prompt in response:
77
+ response = response.replace(prompt, "")
78
+
79
+ response = response.strip().upper()
80
+
81
+ # Fallback if generation is verbose
82
+ for category in ['CODE', 'MATH', 'GENERAL', 'VISION', 'VIDEO', '3D']:
83
+ if category in response:
84
+ return category
85
+
86
+ return "GENERAL" # Default fallback
87
+ except Exception as e:
88
+ print(f"Error determining intent: {e}")
89
+ return "GENERAL"
90
+
91
+ def run_code_expert(self, query):
92
+ print("Loading Code Expert...")
93
+ model = None
94
+ try:
95
+ model = AutoModelForCausalLM.from_pretrained(
96
+ "merged_models/code",
97
+ quantization_config=self.bnb_config,
98
+ device_map="auto",
99
+ trust_remote_code=True
100
+ )
101
+ inputs = self.tokenizer(query, return_tensors="pt").to(self.device)
102
+ outputs = model.generate(**inputs, max_new_tokens=256)
103
+ result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
104
+ if query in result:
105
+ result = result.replace(query, "").strip()
106
+ return result
107
+ except Exception as e:
108
+ return f"Code Expert Error: {e}"
109
+ finally:
110
+ if model is not None:
111
+ del model
112
+ self._clean_memory()
113
+
114
+ def run_general_expert(self, query):
115
+ print("Loading General Expert...")
116
+ model = None
117
+ try:
118
+ model = AutoModelForCausalLM.from_pretrained(
119
+ "merged_models/normal",
120
+ quantization_config=self.bnb_config,
121
+ device_map="auto",
122
+ trust_remote_code=True
123
+ )
124
+ inputs = self.tokenizer(query, return_tensors="pt").to(self.device)
125
+ outputs = model.generate(**inputs, max_new_tokens=256)
126
+ result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
127
+ if query in result:
128
+ result = result.replace(query, "").strip()
129
+ return result
130
+ except Exception as e:
131
+ return f"General Expert Error: {e}"
132
+ finally:
133
+ if model is not None:
134
+ del model
135
+ self._clean_memory()
136
+
137
+ def run_math_expert(self, query):
138
+ print("Using Orchestrator (Math Expert)...")
139
+ # Since the orchestrator IS the math model, use it directly
140
+ try:
141
+ inputs = self.tokenizer(query, return_tensors="pt").to(self.device)
142
+ outputs = self.orchestrator.generate(**inputs, max_new_tokens=256)
143
+ result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
144
+ if query in result:
145
+ result = result.replace(query, "").strip()
146
+ return result
147
+ except Exception as e:
148
+ return f"Math Expert Error: {e}"
149
+
150
+ def run_vision_expert(self, query, image_path=None):
151
+ print("Loading Vision Expert...")
152
+ model = None
153
+ try:
154
+ # Use specific AWQ model ID
155
+ model_id = "Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
156
+ # Use AutoModelForVision2Seq to handle Qwen2.5VL architecture
157
+ model = AutoModelForVision2Seq.from_pretrained(
158
+ model_id,
159
+ torch_dtype=torch.float16,
160
+ device_map="auto"
161
+ )
162
+ processor = AutoProcessor.from_pretrained(model_id)
163
+
164
+ # Setup input
165
+ messages = []
166
+ content = []
167
+ if image_path:
168
+ try:
169
+ image = Image.open(image_path)
170
+ content.append({"type": "image", "image": image})
171
+ except:
172
+ return "Error loading image."
173
+
174
+ content.append({"type": "text", "text": query})
175
+ messages.append({"role": "user", "content": content})
176
+
177
+ text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
178
+ image_inputs, video_inputs = process_vision_info(messages)
179
+ inputs = processor(
180
+ text=[text],
181
+ images=image_inputs,
182
+ videos=video_inputs,
183
+ padding=True,
184
+ return_tensors="pt",
185
+ ).to(self.device)
186
+
187
+ generated_ids = model.generate(**inputs, max_new_tokens=128)
188
+ generated_ids_trimmed = [
189
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
190
+ ]
191
+ result = processor.batch_decode(
192
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
193
+ )[0]
194
+
195
+ return result
196
+ except Exception as e:
197
+ return f"Vision Expert Error: {e}"
198
+ finally:
199
+ if model is not None:
200
+ del model
201
+ self._clean_memory()
202
+
203
+ def run_video_expert(self, query):
204
+ print("Loading Video Expert...")
205
+ pipe = None
206
+ try:
207
+ # Use fallback model from testing
208
+ model_id = "damo-vilab/text-to-video-ms-1.7b"
209
+ pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
210
+ pipe.enable_model_cpu_offload()
211
+
212
+ # video_frames is list of numpy arrays or PIL images
213
+ result = pipe(query, num_inference_steps=20)
214
+ video_frames = result.frames[0]
215
+
216
+ output_path = "generated_video.mp4"
217
+ export_to_video(video_frames, output_path, fps=8)
218
+
219
+ return f"Video generated at {output_path}"
220
+ except Exception as e:
221
+ return f"Video Expert Error: {e}"
222
+ finally:
223
+ if pipe is not None:
224
+ del pipe
225
+ self._clean_memory()
226
+
227
+ def run_3d_expert(self, query):
228
+ print("Loading 3D Expert...")
229
+ pipe = None
230
+ try:
231
+ model_id = "openai/shap-e"
232
+ pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
233
+ pipe.to("cuda")
234
+
235
+ _ = pipe(query, num_inference_steps=20)
236
+
237
+ return "3D Object generated (check output directory)"
238
+ except Exception as e:
239
+ return f"3D Expert Error: {e}"
240
+ finally:
241
+ if pipe is not None:
242
+ del pipe
243
+ self._clean_memory()
244
+
245
+ def process_query(self, text, image_path=None):
246
+ # 1. Determine Intent
247
+ print(f"\n[Input]: {text}")
248
+ intent = self.determine_intent(text)
249
+ print(f"[Intent Detected]: {intent}")
250
+
251
+ # 2. Route to Expert
252
+ response = ""
253
+ if intent == "CODE":
254
+ response = self.run_code_expert(text)
255
+ elif intent == "MATH":
256
+ response = self.run_math_expert(text)
257
+ elif intent == "VISION":
258
+ response = self.run_vision_expert(text, image_path)
259
+ elif intent == "VIDEO":
260
+ response = self.run_video_expert(text)
261
+ elif intent == "3D":
262
+ response = self.run_3d_expert(text)
263
+ else: # GENERAL
264
+ response = self.run_general_expert(text)
265
+
266
+ return response
267
+
268
+ if __name__ == "__main__":
269
+ # Initialize the bus but don't run a loop yet
270
+ bus = BrainBus()
271
+ print("Brain Bus ready. Run 'process_query' to interact.")
bce_brain_part_mini_code.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77b8c11067e1f4a02a3aa578e2c3de0be3393b1df556fe4a6825fb208526539
3
+ size 3093668864
bce_brain_part_mini_math.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a219a2b4b0d5cfc58f1f461d9d2bdf28c8bd4176258e0a7d30dc6fcc7b7d7d35
3
+ size 3093668736
bce_brain_part_mini_normal.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1933b45780325cffbf7d985bf3d0d8da9d93db4a55b55872600efd13224c846
3
+ size 3093668864
bce_brain_part_mini_vl.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297e033594e12374b073f345ccf0c9dc46688f1a3c672dc674d5285ddf12bd01
3
+ size 6178315200
cat.png ADDED

Git LFS Details

  • SHA256: b3596420d2f0bdf3122f04c89b550d7ff504045312b6b1e075a0c2841531543d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.05 MB
system_prompts.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # System Prompts for Brain Bus
3
+
4
+ ## General Expert
5
+ You are a helpful AI assistant capable of general tasks.
6
+
7
+ ## Code Expert
8
+ You are an expert coding assistant. Provide clean, efficient, and well-commented code.
9
+
10
+ ## Math Expert
11
+ You are a mathematics expert. Solve problems step-by-step.
12
+
13
+ ## Vision Expert
14
+ (Handled by Qwen2-VL architecture)
15
+ Describe the image in detail.