| #!/bin/bash |
| |
| |
| |
| set -e |
|
|
| BUCKET="MADdegens/Models" |
| MODELS_DIR="${MODEL_CACHE_DIR:-/models}" |
| HF_BIN="${HF_CLI:-hf}" |
| OLLAMA_NAMESPACE="${OLLAMA_NAMESPACE:-nicholasjmcleod}" |
|
|
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "β Agent Q3 β Cloud Model Stack β" |
| echo "β Kimi-Linear-48B Β· Harmonic-Hermes-9B Β· Qwen3-Coder-53B β" |
| echo "β Kimi-Distilled-35B Β· Genstruct-7B Β· Infinity-Parser2-Pro β" |
| echo "β Monitor: Hermes-9B watches + Qwen3-Coder remediates β" |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
|
|
| |
| ollama serve & |
| OLLAMA_PID=$! |
| echo "Waiting for Ollama..." |
| for i in $(seq 1 60); do |
| curl -sf http://localhost:11434/api/tags > /dev/null 2>&1 && echo "β Ollama ready" && break |
| sleep 2 |
| done |
|
|
| |
| pull_from_cloud() { |
| local cloud_tag="$1" |
| local display_name="$2" |
|
|
| if ollama list | awk '{print $1}' | grep -qx "$cloud_tag"; then |
| echo " β $display_name ($cloud_tag) already loaded" |
| return 0 |
| fi |
| echo " β Pulling $display_name from Ollama Cloud..." |
| ollama pull "$cloud_tag" |
| echo " β $display_name β $cloud_tag" |
| } |
|
|
| |
| create_and_push() { |
| local gguf_file="$1" |
| local local_tag="$2" |
| local cloud_tag="$3" |
| local display_name="$4" |
|
|
| mkdir -p "$MODELS_DIR" |
| local local_path="$MODELS_DIR/$gguf_file" |
|
|
| if [ ! -f "$local_path" ]; then |
| echo " β Syncing $display_name from HF bucket..." |
| "$HF_BIN" sync "hf://buckets/$BUCKET" "$MODELS_DIR" --include "$gguf_file" |
| echo " β Downloaded $gguf_file" |
| else |
| echo " β $display_name GGUF cached at $local_path" |
| fi |
|
|
| echo " β Creating local Ollama model $local_tag..." |
| local tmpdir |
| tmpdir=$(mktemp -d) |
| cat > "$tmpdir/Modelfile" << MODELEOF |
| FROM $local_path |
| PARAMETER num_ctx 32768 |
| PARAMETER num_predict 4096 |
| PARAMETER temperature 0.7 |
| MODELEOF |
| ollama create "$local_tag" -f "$tmpdir/Modelfile" |
| rm -rf "$tmpdir" |
|
|
| echo " β Pushing $display_name to Ollama Cloud as $cloud_tag..." |
| ollama push "$cloud_tag" |
| echo " β $display_name β Ollama Cloud: $cloud_tag" |
| } |
|
|
| |
| declare -A MODEL_GGUFS=( |
| ["reasoner"]="Kimi-Linear-48B-A3B-Instruct.i1-Q6_K.gguf" |
| ["tandem"]="Harmonic-Hermes-9B.i1-Q6_K.gguf" |
| ["coder_dedicated"]="Qwen3-Coder-53B-A3B-Instruct-TOTAL-RECALL-v2-MASTER-CODER-L.i1-Q6_K.gguf" |
| ["kimi_distilled"]="Qwen3.6-35B-A3B-Kimi-K2.6-Reasoning-Distilled.IQ4_XS.gguf" |
| ["genstruct"]="Genstruct-7B.i1-Q6_K.gguf" |
| ["infinity_parser"]="Infinity-Parser2-Pro" |
| ) |
|
|
| declare -A LOCAL_TAGS=( |
| ["reasoner"]="${REASONER_MODEL:-kimi-linear:q6_k}" |
| ["tandem"]="${TANDEM_MODEL:-harmonic-hermes:q6_k}" |
| ["coder_dedicated"]="${CODER_DEDICATED_MODEL:-qwen3-coder-53b:q6_k}" |
| ["kimi_distilled"]="${KIMI_DISTILLED_MODEL:-kimi-distilled:iq4_xs}" |
| ["genstruct"]="${GENSTRUCT_MODEL:-genstruct:q6_k}" |
| ["infinity_parser"]="${INFINITY_PARSER_MODEL:-infinity-parser:latest}" |
| ) |
|
|
| declare -A CLOUD_TAGS=( |
| ["reasoner"]="$OLLAMA_NAMESPACE/${REASONER_MODEL:-kimi-linear:q6_k}" |
| ["tandem"]="$OLLAMA_NAMESPACE/${TANDEM_MODEL:-harmonic-hermes:q6_k}" |
| ["coder_dedicated"]="$OLLAMA_NAMESPACE/${CODER_DEDICATED_MODEL:-qwen3-coder-53b:q6_k}" |
| ["kimi_distilled"]="$OLLAMA_NAMESPACE/${KIMI_DISTILLED_MODEL:-kimi-distilled:iq4_xs}" |
| ["genstruct"]="$OLLAMA_NAMESPACE/${GENSTRUCT_MODEL:-genstruct:q6_k}" |
| ["infinity_parser"]="$OLLAMA_NAMESPACE/${INFINITY_PARSER_MODEL:-infinity-parser:latest}" |
| ) |
|
|
| declare -A DISPLAY_NAMES=( |
| ["reasoner"]="Kimi-Linear-48B (1M-ctx Preprocessor)" |
| ["tandem"]="Harmonic-Hermes-9B (Tandem+Monitor)" |
| ["coder_dedicated"]="Qwen3-Coder-53B TOTAL-RECALL (Primary Coder)" |
| ["kimi_distilled"]="Qwen3.6-35B Kimi-K2.6-Distilled IQ4_XS (Structured Reasoning)" |
| ["genstruct"]="Genstruct-7B (Generator)" |
| ["infinity_parser"]="Infinity-Parser2-Pro (Parser)" |
| ) |
|
|
| ROLES=("reasoner" "tandem" "coder_dedicated" "kimi_distilled" "genstruct" "infinity_parser") |
|
|
| if [ "${BOOTSTRAP_MODE:-false}" = "true" ]; then |
| echo "" |
| echo "=== BOOTSTRAP MODE: Syncing models from HF bucket β Ollama Cloud ===" |
| echo "" |
|
|
| echo " β Syncing all GGUFs from hf://buckets/$BUCKET β $MODELS_DIR ..." |
| mkdir -p "$MODELS_DIR" |
| "$HF_BIN" sync "hf://buckets/$BUCKET" "$MODELS_DIR" |
| echo " β Bucket synced to $MODELS_DIR" |
| echo "" |
|
|
| for role in "${ROLES[@]}"; do |
| gguf="${MODEL_GGUFS[$role]}" |
| local_tag="${LOCAL_TAGS[$role]}" |
| cloud_tag="${CLOUD_TAGS[$role]}" |
| name="${DISPLAY_NAMES[$role]}" |
|
|
| if [ "$role" = "infinity_parser" ]; then |
| if ! "$HF_BIN" buckets list "$BUCKET" 2>/dev/null | grep -q "Infinity-Parser2-Pro"; then |
| echo " β Infinity-Parser2-Pro not yet in bucket β skipping" |
| continue |
| fi |
| fi |
|
|
| create_and_push "$gguf" "$local_tag" "$cloud_tag" "$name" |
| done |
|
|
| echo "" |
| echo "β All models pushed to Ollama Cloud under $OLLAMA_NAMESPACE/" |
| ollama list |
|
|
| else |
| echo "" |
| echo "=== PRODUCTION MODE: Pulling models from Ollama Cloud ===" |
| echo "" |
| for role in "${ROLES[@]}"; do |
| cloud_tag="${CLOUD_TAGS[$role]}" |
| name="${DISPLAY_NAMES[$role]}" |
| pull_from_cloud "$cloud_tag" "$name" |
| done |
|
|
| echo "" |
| echo "β All models loaded from Ollama Cloud" |
| ollama list |
| fi |
|
|
| |
| exec python -m uvicorn orchestrator.main:app \ |
| --host 0.0.0.0 \ |
| --port "${PORT:-8000}" \ |
| --log-level "${LOG_LEVEL:-info}" & |
| UVICORN_PID=$! |
|
|
| wait $OLLAMA_PID $UVICORN_PID |
|
|