Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

.gitattributes +5 -0
Modelfile.code +11 -0
Modelfile.math +11 -0
Modelfile.normal +11 -0
README.md +28 -5
advanced_brain_bus.py +271 -0
bce_brain_part_mini_code.gguf +3 -0
bce_brain_part_mini_math.gguf +3 -0
bce_brain_part_mini_normal.gguf +3 -0
bce_brain_part_mini_vl.gguf +3 -0
cat.png +3 -0
system_prompts.md +15 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+bce_brain_part_mini_code.gguf filter=lfs diff=lfs merge=lfs -text
+bce_brain_part_mini_math.gguf filter=lfs diff=lfs merge=lfs -text
+bce_brain_part_mini_normal.gguf filter=lfs diff=lfs merge=lfs -text
+bce_brain_part_mini_vl.gguf filter=lfs diff=lfs merge=lfs -text
+cat.png filter=lfs diff=lfs merge=lfs -text

Modelfile.code ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM bce_brain_part_mini_code.gguf
+TEMPLATE """{{ if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+"""
+SYSTEM """You are an expert coding assistant. Provide clean, efficient, and well-commented code."""
+PARAMETER stop "<|im_start|>"
+PARAMETER stop "<|im_end|>"

Modelfile.math ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM bce_brain_part_mini_math.gguf
+TEMPLATE """{{ if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+"""
+SYSTEM """You are a mathematics expert. Solve problems step-by-step."""
+PARAMETER stop "<|im_start|>"
+PARAMETER stop "<|im_end|>"

Modelfile.normal ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM bce_brain_part_mini_normal.gguf
+TEMPLATE """{{ if .System }}<|im_start|>system
+{{ .System }}<|im_end|>
+{{ end }}{{ if .Prompt }}<|im_start|>user
+{{ .Prompt }}<|im_end|>
+{{ end }}<|im_start|>assistant
+"""
+SYSTEM """You are a helpful AI assistant capable of general tasks."""
+PARAMETER stop "<|im_start|>"
+PARAMETER stop "<|im_end|>"

README.md CHANGED Viewed

@@ -1,5 +1,28 @@
----
-license: other
-license_name: licence.md
-license_link: LICENSE
----

+# Brain Bus Deployment Package
+This package contains the artifacts for the Brain Bus AI system, optimized for T4 GPUs.
+## Contents
+- `bce_brain_part_mini_*.gguf`: Quantized GGUF models for Ollama/llama.cpp.
+  - `normal`: General conversational model.
+  - `code`: Coding specialist.
+  - `math`: Mathematics specialist.
+  - `vl`: Vision-Language model (Qwen2.5-VL).
+- `advanced_brain_bus.py`: Orchestrator script to route queries to appropriate experts.
+- `cat.png`: Sample image for testing.
+- `Modelfile.*`: Configuration files for creating Ollama models.
+- `system_prompts.md`: Reference for system prompts used by the experts.
+## Setup
+1. Install Ollama: https://ollama.com/
+2. Create models:
+   ```bash
+   ollama create brain-normal -f Modelfile.normal
+   ollama create brain-code -f Modelfile.code
+   ollama create brain-math -f Modelfile.math
+   ```
+3. Run the orchestrator (requires Python dependencies):
+   ```bash
+   python advanced_brain_bus.py
+   ```

advanced_brain_bus.py ADDED Viewed

	@@ -0,0 +1,271 @@

+import torch
+import gc
+from transformers import activations
+# Monkeypatch PytorchGELUTanh for AutoAWQ compatibility
+if not hasattr(activations, 'PytorchGELUTanh'):
+    activations.PytorchGELUTanh = activations.NewGELUActivation
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    AutoModelForVision2Seq,
+    AutoProcessor
+)
+from diffusers import DiffusionPipeline
+from diffusers.utils import export_to_video
+from PIL import Image
+import requests
+import io
+from qwen_vl_utils import process_vision_info
+import os
+class BrainBus:
+    def __init__(self):
+        print("Initializing Brain Bus Orchestrator...")
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Configuration for loading 4-bit models (Orchestrator)
+        self.bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float32, # Using float32 for T4 stability
+        )
+        # Load the Orchestrator (Math Model) immediately
+        self.orchestrator_path = "merged_models/math"
+        self.tokenizer = None
+        self.orchestrator = None
+        self._load_orchestrator()
+    def _load_orchestrator(self):
+        print(f"Loading Orchestrator from {self.orchestrator_path}...")
+        try:
+            self.tokenizer = AutoTokenizer.from_pretrained(self.orchestrator_path)
+            self.orchestrator = AutoModelForCausalLM.from_pretrained(
+                self.orchestrator_path,
+                quantization_config=self.bnb_config,
+                device_map="auto",
+                trust_remote_code=True
+            )
+        except Exception as e:
+            print(f"Failed to load orchestrator: {e}")
+    def _clean_memory(self):
+        torch.cuda.empty_cache()
+        gc.collect()
+    def determine_intent(self, user_input):
+        # Construct a classification prompt
+        prompt = (
+            "Classify the following user query into one of these categories: "
+            "[CODE, MATH, GENERAL, VISION, VIDEO, 3D]. "
+            "Return ONLY the category name.\n\n"
+            f"Query: {user_input}\nCategory:"
+        )
+        try:
+            inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+            outputs = self.orchestrator.generate(**inputs, max_new_tokens=10)
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Extract the label from the response (simple parsing)
+            # Remove input prompt from response if model echoes it
+            if prompt in response:
+                response = response.replace(prompt, "")
+            response = response.strip().upper()
+            # Fallback if generation is verbose
+            for category in ['CODE', 'MATH', 'GENERAL', 'VISION', 'VIDEO', '3D']:
+                if category in response:
+                    return category
+            return "GENERAL" # Default fallback
+        except Exception as e:
+            print(f"Error determining intent: {e}")
+            return "GENERAL"
+    def run_code_expert(self, query):
+        print("Loading Code Expert...")
+        model = None
+        try:
+            model = AutoModelForCausalLM.from_pretrained(
+                "merged_models/code",
+                quantization_config=self.bnb_config,
+                device_map="auto",
+                trust_remote_code=True
+            )
+            inputs = self.tokenizer(query, return_tensors="pt").to(self.device)
+            outputs = model.generate(**inputs, max_new_tokens=256)
+            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            if query in result:
+                 result = result.replace(query, "").strip()
+            return result
+        except Exception as e:
+            return f"Code Expert Error: {e}"
+        finally:
+            if model is not None:
+                del model
+            self._clean_memory()
+    def run_general_expert(self, query):
+        print("Loading General Expert...")
+        model = None
+        try:
+            model = AutoModelForCausalLM.from_pretrained(
+                "merged_models/normal",
+                quantization_config=self.bnb_config,
+                device_map="auto",
+                trust_remote_code=True
+            )
+            inputs = self.tokenizer(query, return_tensors="pt").to(self.device)
+            outputs = model.generate(**inputs, max_new_tokens=256)
+            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            if query in result:
+                 result = result.replace(query, "").strip()
+            return result
+        except Exception as e:
+            return f"General Expert Error: {e}"
+        finally:
+            if model is not None:
+                del model
+            self._clean_memory()
+    def run_math_expert(self, query):
+        print("Using Orchestrator (Math Expert)...")
+        # Since the orchestrator IS the math model, use it directly
+        try:
+            inputs = self.tokenizer(query, return_tensors="pt").to(self.device)
+            outputs = self.orchestrator.generate(**inputs, max_new_tokens=256)
+            result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            if query in result:
+                 result = result.replace(query, "").strip()
+            return result
+        except Exception as e:
+            return f"Math Expert Error: {e}"
+    def run_vision_expert(self, query, image_path=None):
+        print("Loading Vision Expert...")
+        model = None
+        try:
+            # Use specific AWQ model ID
+            model_id = "Qwen/Qwen2.5-VL-3B-Instruct-AWQ"
+            # Use AutoModelForVision2Seq to handle Qwen2.5VL architecture
+            model = AutoModelForVision2Seq.from_pretrained(
+                model_id,
+                torch_dtype=torch.float16,
+                device_map="auto"
+            )
+            processor = AutoProcessor.from_pretrained(model_id)
+            # Setup input
+            messages = []
+            content = []
+            if image_path:
+                try:
+                    image = Image.open(image_path)
+                    content.append({"type": "image", "image": image})
+                except:
+                    return "Error loading image."
+            content.append({"type": "text", "text": query})
+            messages.append({"role": "user", "content": content})
+            text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+            image_inputs, video_inputs = process_vision_info(messages)
+            inputs = processor(
+                text=[text],
+                images=image_inputs,
+                videos=video_inputs,
+                padding=True,
+                return_tensors="pt",
+            ).to(self.device)
+            generated_ids = model.generate(**inputs, max_new_tokens=128)
+            generated_ids_trimmed = [
+                out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
+            ]
+            result = processor.batch_decode(
+                generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
+            )[0]
+            return result
+        except Exception as e:
+            return f"Vision Expert Error: {e}"
+        finally:
+            if model is not None:
+                del model
+            self._clean_memory()
+    def run_video_expert(self, query):
+        print("Loading Video Expert...")
+        pipe = None
+        try:
+            # Use fallback model from testing
+            model_id = "damo-vilab/text-to-video-ms-1.7b"
+            pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, variant="fp16")
+            pipe.enable_model_cpu_offload()
+            # video_frames is list of numpy arrays or PIL images
+            result = pipe(query, num_inference_steps=20)
+            video_frames = result.frames[0]
+            output_path = "generated_video.mp4"
+            export_to_video(video_frames, output_path, fps=8)
+            return f"Video generated at {output_path}"
+        except Exception as e:
+            return f"Video Expert Error: {e}"
+        finally:
+            if pipe is not None:
+                del pipe
+            self._clean_memory()
+    def run_3d_expert(self, query):
+        print("Loading 3D Expert...")
+        pipe = None
+        try:
+            model_id = "openai/shap-e"
+            pipe = DiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16)
+            pipe.to("cuda")
+            _ = pipe(query, num_inference_steps=20)
+            return "3D Object generated (check output directory)"
+        except Exception as e:
+            return f"3D Expert Error: {e}"
+        finally:
+            if pipe is not None:
+                del pipe
+            self._clean_memory()
+    def process_query(self, text, image_path=None):
+        # 1. Determine Intent
+        print(f"\n[Input]: {text}")
+        intent = self.determine_intent(text)
+        print(f"[Intent Detected]: {intent}")
+        # 2. Route to Expert
+        response = ""
+        if intent == "CODE":
+            response = self.run_code_expert(text)
+        elif intent == "MATH":
+            response = self.run_math_expert(text)
+        elif intent == "VISION":
+            response = self.run_vision_expert(text, image_path)
+        elif intent == "VIDEO":
+            response = self.run_video_expert(text)
+        elif intent == "3D":
+            response = self.run_3d_expert(text)
+        else: # GENERAL
+            response = self.run_general_expert(text)
+        return response
+if __name__ == "__main__":
+    # Initialize the bus but don't run a loop yet
+    bus = BrainBus()
+    print("Brain Bus ready. Run 'process_query' to interact.")

bce_brain_part_mini_code.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f77b8c11067e1f4a02a3aa578e2c3de0be3393b1df556fe4a6825fb208526539
+size 3093668864

bce_brain_part_mini_math.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a219a2b4b0d5cfc58f1f461d9d2bdf28c8bd4176258e0a7d30dc6fcc7b7d7d35
+size 3093668736

bce_brain_part_mini_normal.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1933b45780325cffbf7d985bf3d0d8da9d93db4a55b55872600efd13224c846
+size 3093668864

bce_brain_part_mini_vl.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:297e033594e12374b073f345ccf0c9dc46688f1a3c672dc674d5285ddf12bd01
+size 6178315200

cat.png ADDED Viewed

Git LFS Details

SHA256: b3596420d2f0bdf3122f04c89b550d7ff504045312b6b1e075a0c2841531543d
Pointer size: 132 Bytes
Size of remote file: 1.05 MB

system_prompts.md ADDED Viewed

	@@ -0,0 +1,15 @@

+# System Prompts for Brain Bus
+## General Expert
+You are a helpful AI assistant capable of general tasks.
+## Code Expert
+You are an expert coding assistant. Provide clean, efficient, and well-commented code.
+## Math Expert
+You are a mathematics expert. Solve problems step-by-step.
+## Vision Expert
+(Handled by Qwen2-VL architecture)
+Describe the image in detail.