scenarist / setup.py
github-actions[bot]
Sync backend to Hugging Face Space (commit: 39b5c807918249fa80049d49f4b6a74d6a0ed1fc)
6d86412
"""Orsync Scenarist β€” Setup Script.
Run once before starting the project. Installs dependencies, creates the
.env file, validates configuration, checks service connectivity, and
pre-loads models so that ``python run.py`` starts instantly.
Usage:
python setup.py # Interactive setup
python setup.py --check # Validate only (no installs, no .env creation)
"""
from __future__ import annotations
import argparse
import os
import shutil
import subprocess
import sys
import textwrap
# ── Constants ─────────────────────────────────────────────────────────────────
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
PROJECT_PARENT = os.path.dirname(ROOT_DIR)
if PROJECT_PARENT not in sys.path:
sys.path.insert(0, PROJECT_PARENT)
ENV_FILE = os.path.join(ROOT_DIR, ".env")
ENV_EXAMPLE = os.path.join(ROOT_DIR, ".env.example")
REQUIREMENTS = os.path.join(ROOT_DIR, "requirements.txt")
SETUP_MARKER = os.path.join(ROOT_DIR, ".setup_done")
_BOLD = "\033[1m"
_GREEN = "\033[92m"
_YELLOW = "\033[93m"
_RED = "\033[91m"
_CYAN = "\033[96m"
_RESET = "\033[0m"
def _ok(msg: str) -> None:
print(f" {_GREEN}βœ”{_RESET} {msg}")
def _warn(msg: str) -> None:
print(f" {_YELLOW}⚠{_RESET} {msg}")
def _fail(msg: str) -> None:
print(f" {_RED}βœ–{_RESET} {msg}")
def _info(msg: str) -> None:
print(f" {_CYAN}β„Ή{_RESET} {msg}")
def _header(title: str) -> None:
print(f"\n{_BOLD}── {title} ──{_RESET}")
# ── Step 1: Python version ───────────────────────────────────────────────────
def check_python_version() -> bool:
_header("Python version")
major, minor = sys.version_info[:2]
if (major, minor) >= (3, 12):
_ok(f"Python {major}.{minor} (>= 3.12 required)")
return True
_fail(
f"Python {major}.{minor} detected β€” Python >= 3.12 is required.\n"
" β†’ Install Python 3.12+ from https://www.python.org/downloads/"
)
return False
# ── Step 2: Install dependencies ─────────────────────────────────────────────
def install_dependencies(check_only: bool = False) -> bool:
_header("Python dependencies")
missing: list[str] = []
probes = {
"fastapi": "fastapi",
"uvicorn": "uvicorn",
"ollama": "ollama",
"redis": "redis",
"neo4j": "neo4j",
"chromadb": "chromadb",
"httpx": "httpx",
"pydantic_settings": "pydantic-settings",
"numpy": "numpy",
"sklearn": "scikit-learn",
"scipy": "scipy",
"jose": "python-jose",
"bcrypt": "bcrypt",
"aio_pika": "aio-pika",
"pytest": "pytest",
"fakeredis": "fakeredis",
}
for import_name, pkg_name in probes.items():
try:
__import__(import_name)
except ImportError:
missing.append(pkg_name)
if not missing:
_ok("All dependencies installed")
return True
if check_only:
_fail(f"Missing packages: {', '.join(missing)}")
_info("Run `python setup.py` (without --check) to install them.")
return False
_info(f"Installing {len(missing)} missing packages from requirements.txt ...")
try:
uv_cmd = ["uv", "pip", "install", "-r", REQUIREMENTS]
# uv requires --system when not inside a virtual environment
if sys.prefix == sys.base_prefix:
uv_cmd.insert(3, "--system")
subprocess.check_call(
uv_cmd,
stdout=subprocess.DEVNULL if os.name != "nt" else None,
)
_ok("All dependencies installed successfully")
return True
except subprocess.CalledProcessError:
_fail(
"uv pip install failed.\n"
" β†’ Try manually: uv pip install -r requirements.txt\n"
" β†’ On Windows, run your terminal as Administrator if you get permission errors.\n"
" β†’ If a package conflicts, try: uv pip install -r requirements.txt --user"
)
return False
# ── Step 3: .env file ────────────────────────────────────────────────────────
def ensure_env_file(check_only: bool = False) -> bool:
_header("Environment file (.env)")
if os.path.isfile(ENV_FILE):
_ok(".env file exists")
return True
if check_only:
_fail(
".env file not found.\n"
" β†’ Run `python setup.py` to create it from .env.example\n"
" β†’ Or manually: copy .env.example .env"
)
return False
if not os.path.isfile(ENV_EXAMPLE):
_fail(
".env.example not found β€” cannot create .env.\n"
" β†’ Ensure you cloned the full repository."
)
return False
shutil.copy(ENV_EXAMPLE, ENV_FILE)
_ok("Created .env from .env.example")
_warn("Edit .env to set your OLLAMA_API_KEY and other secrets.")
return True
# ── Step 4: Validate configuration ──────────────────────────────────────────
def validate_config() -> tuple[bool, dict]:
_header("Configuration validation")
# Force-reload settings from the .env we just created/verified
os.environ.pop("OLLAMA_API_KEY", None)
try:
# Clear any stale pydantic module cache before importing
for mod_name in list(sys.modules):
if mod_name.startswith(("pydantic", "pydantic_core", "pydantic_settings")):
del sys.modules[mod_name]
from pydantic_settings import BaseSettings, SettingsConfigDict
class _CheckSettings(BaseSettings):
model_config = SettingsConfigDict(
env_file=ENV_FILE, env_file_encoding="utf-8", extra="ignore",
)
ollama_host: str = "https://ollama.com"
ollama_api_key: str = ""
ollama_model: str = "gemma4:31b-cloud"
embedding_model: str = "onnx-minilm"
redis_url: str = "redis://localhost:6379/0"
neo4j_uri: str = "neo4j://localhost:7687"
neo4j_username: str = "neo4j"
neo4j_password: str = "scenarist123"
chroma_host: str = "localhost"
chroma_port: int = 8100
environment: str = "development"
cfg = _CheckSettings()
except Exception as exc:
_fail(f"Could not load .env: {exc}")
_info("Check .env syntax β€” each line should be KEY=VALUE (no quotes needed).")
return False, {}
ok = True
info = {
"ollama_api_key": bool(cfg.ollama_api_key),
"ollama_host": cfg.ollama_host,
"ollama_model": cfg.ollama_model,
"embedding_model": cfg.embedding_model,
"redis_url": cfg.redis_url,
"neo4j_uri": cfg.neo4j_uri,
"chroma_host": cfg.chroma_host,
"chroma_port": cfg.chroma_port,
}
# Ollama API key
if cfg.ollama_api_key:
_ok(f"OLLAMA_API_KEY is set (host: {cfg.ollama_host})")
else:
_warn(
"OLLAMA_API_KEY is empty β€” LLM features will use rule-based fallbacks.\n"
" β†’ Get a key at https://ollama.com/settings/keys\n"
" β†’ Then set OLLAMA_API_KEY in your .env file"
)
_ok(f"LLM model: {cfg.ollama_model}")
_ok(f"Embedding model: {cfg.embedding_model}")
return ok, info
# ── Step 5: Check service connectivity ───────────────────────────────────────
def check_services(info: dict) -> dict[str, bool]:
_header("Service connectivity")
results: dict[str, bool] = {}
# Redis β€” now embedded; just check if it's already running
try:
import redis as _redis
r = _redis.from_url(info.get("redis_url", "redis://localhost:6379/0"), socket_connect_timeout=2)
r.ping()
_ok(f"Redis: already running ({info.get('redis_url', '')})")
results["redis"] = True
except Exception:
if shutil.which("redis-server"):
_ok("Redis: redis-server found β€” will start automatically with run.py")
results["redis"] = True
else:
_warn(
"Redis: redis-server not installed.\n"
" β†’ run.py will start it automatically if installed.\n"
" β†’ To install: https://redis.io/download\n"
" Windows: winget install Redis.Redis or scoop install redis\n"
" macOS: brew install redis\n"
" Linux: sudo apt install redis-server\n"
" Docker: handled automatically in Dockerfile"
)
results["redis"] = False
# Neo4j β€” embedded; check if running or binary available
try:
from neo4j import GraphDatabase
driver = GraphDatabase.driver(
info.get("neo4j_uri", "neo4j://localhost:7687"),
auth=("neo4j", "scenarist"),
)
driver.verify_connectivity()
driver.close()
_ok(f"Neo4j: already running ({info.get('neo4j_uri', '')})")
results["neo4j"] = True
except Exception:
neo4j_bin = shutil.which("neo4j")
if not neo4j_bin:
# Check common install paths
for candidate in ["/opt/neo4j/bin/neo4j", os.path.expanduser("~/neo4j/bin/neo4j")]:
if os.path.isfile(candidate):
neo4j_bin = candidate
break
if neo4j_bin:
_ok("Neo4j: binary found β€” will start automatically with run.py")
results["neo4j"] = True
else:
_warn(
"Neo4j: not installed.\n"
" β†’ run.py will start it automatically if installed.\n"
" β†’ To install: https://neo4j.com/download/\n"
" Windows: winget install Neo4j.Neo4j or download from neo4j.com\n"
" macOS: brew install neo4j\n"
" Linux: see https://neo4j.com/docs/operations-manual/current/installation/linux/\n"
" Docker: handled automatically in Dockerfile"
)
results["neo4j"] = False
# ChromaDB β€” embedded; check if running or binary available
try:
import chromadb
c = chromadb.HttpClient(
host=info.get("chroma_host", "localhost"),
port=info.get("chroma_port", 8000),
)
c.heartbeat()
_ok(f"ChromaDB: already running ({info.get('chroma_host', '')}:{info.get('chroma_port', '')})")
results["chromadb"] = True
except Exception:
chroma_bin = shutil.which("chroma")
if chroma_bin:
_ok("ChromaDB: chroma CLI found β€” will start automatically with run.py")
results["chromadb"] = True
else:
_warn(
"ChromaDB: chroma CLI not installed.\n"
" β†’ run.py will start it automatically if installed.\n"
" β†’ To install: pip install chromadb\n"
" Docker: handled automatically in Dockerfile"
)
results["chromadb"] = False
# Ollama Cloud
if info.get("ollama_api_key"):
try:
from ollama import Client
headers = {"Authorization": f"Bearer NOT_TESTED"}
client = Client(host=info.get("ollama_host", "https://ollama.com"))
# Just check we can reach the host β€” don't burn tokens
import httpx
resp = httpx.get(f"{info.get('ollama_host', 'https://ollama.com')}/api/tags", timeout=5)
if resp.status_code < 500:
_ok(f"Ollama Cloud: reachable ({info.get('ollama_host', '')})")
results["ollama"] = True
else:
raise ConnectionError(f"HTTP {resp.status_code}")
except Exception:
_warn(
f"Ollama Cloud: not reachable ({info.get('ollama_host', '')})\n"
" β†’ Check your OLLAMA_HOST and internet connection.\n"
" β†’ LLM features will use rule-based fallbacks."
)
results["ollama"] = False
else:
_info("Ollama Cloud: skipped (no API key set)")
results["ollama"] = False
return results
# ── Step 6: Pre-load models ──────────────────────────────────────────────────
def preload_models(check_only: bool = False) -> bool:
_header("Model pre-loading")
if check_only:
_info("Skipped in --check mode")
return True
# Embedding model
try:
from backend.app.core.embedder import EmbeddingRegistry
reg = EmbeddingRegistry.get()
# Import settings fresh
from backend.app.core.config import settings
_info(f"Loading embedding model: {settings.embedding_model} ...")
reg.load_model(settings.embedding_model)
_ok(f"Embedding model ready: {reg.model_name} ({reg.dimension}-dim)")
except Exception as exc:
_fail(
f"Failed to load embedding model: {exc}\n"
" β†’ If using 'onnx-minilm' (default), this should work out of the box.\n"
" β†’ If using 'ollama:<model>', ensure Ollama is reachable and the model exists."
)
return False
# Projection bridge weights
try:
from backend.app.services.projection_bridge import load_weights, is_ready
_info("Loading projection bridge weights ...")
load_weights()
if is_ready():
_ok("Projection bridge weights loaded")
else:
_warn("Projection bridge: using generated default weights (non-critical)")
except Exception as exc:
_warn(f"Projection bridge: {exc} (non-critical, will use defaults)")
return True
# ── Step 7: Write marker ─────────────────────────────────────────────────────
def write_setup_marker() -> None:
"""Write a marker file so run.py knows setup completed successfully."""
with open(SETUP_MARKER, "w") as f:
f.write("ok\n")
def is_setup_done() -> bool:
return os.path.isfile(SETUP_MARKER)
# ── Main ──────────────────────────────────────────────────────────────────────
def main() -> None:
parser = argparse.ArgumentParser(description="Orsync Scenarist project setup")
parser.add_argument(
"--check", action="store_true",
help="Validate only β€” do not install packages or create files",
)
args = parser.parse_args()
os.chdir(ROOT_DIR)
print(f"\n{_BOLD}{'=' * 56}")
print(f" Orsync Scenarist β€” {'Validation' if args.check else 'Setup'}")
print(f"{'=' * 56}{_RESET}")
errors: list[str] = []
# 1. Python
if not check_python_version():
errors.append("Python >= 3.12")
# 2. Dependencies
if not install_dependencies(check_only=args.check):
errors.append("Python dependencies")
# 3. .env
if not ensure_env_file(check_only=args.check):
errors.append(".env file")
# 4. Config validation
config_ok, info = validate_config()
if not config_ok:
errors.append("Configuration")
# 5. Service connectivity
svc = check_services(info)
# 6. Pre-load models
if not errors:
if not preload_models(check_only=args.check):
errors.append("Model pre-loading")
# Summary
_header("Summary")
if errors:
_fail(f"Setup {'validation' if args.check else ''} completed with errors:")
for e in errors:
print(f" - {e}")
print()
_info("Fix the issues above and re-run: python setup.py")
sys.exit(1)
else:
if not args.check:
write_setup_marker()
svc_up = sum(1 for v in svc.values() if v)
svc_total = len(svc)
_ok(f"All checks passed β€” {svc_up}/{svc_total} optional services connected")
if not info.get("ollama_api_key"):
_warn(
"OLLAMA_API_KEY not set β€” LLM features will use rule-based fallbacks.\n"
" β†’ Set it in .env when you have a key from https://ollama.com/settings/keys"
)
print()
if not args.check:
print(f" {_GREEN}Ready! Start the server:{_RESET}")
print(f" python run.py")
print(f" python run.py --reload # dev mode with auto-reload")
print()
if __name__ == "__main__":
main()