Spaces:

thisismrismail
/

scenarist

Sleeping

github-actions[bot]

Sync backend to Hugging Face Space (commit: 39b5c807918249fa80049d49f4b6a74d6a0ed1fc)

6d86412 2 days ago

17.2 kB

	"""Orsync Scenarist — Setup Script.

	Run once before starting the project. Installs dependencies, creates the
	.env file, validates configuration, checks service connectivity, and
	pre-loads models so that ``python run.py`` starts instantly.

	Usage:
	python setup.py # Interactive setup
	python setup.py --check # Validate only (no installs, no .env creation)
	"""
	from __future__ import annotations

	import argparse
	import os
	import shutil
	import subprocess
	import sys
	import textwrap

	# ── Constants ─────────────────────────────────────────────────────────────────

	ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
	PROJECT_PARENT = os.path.dirname(ROOT_DIR)
	if PROJECT_PARENT not in sys.path:
	sys.path.insert(0, PROJECT_PARENT)

	ENV_FILE = os.path.join(ROOT_DIR, ".env")
	ENV_EXAMPLE = os.path.join(ROOT_DIR, ".env.example")
	REQUIREMENTS = os.path.join(ROOT_DIR, "requirements.txt")
	SETUP_MARKER = os.path.join(ROOT_DIR, ".setup_done")

	_BOLD = "\033[1m"
	_GREEN = "\033[92m"
	_YELLOW = "\033[93m"
	_RED = "\033[91m"
	_CYAN = "\033[96m"
	_RESET = "\033[0m"


	def _ok(msg: str) -> None:
	print(f" {_GREEN}✔{_RESET} {msg}")


	def _warn(msg: str) -> None:
	print(f" {_YELLOW}⚠{_RESET} {msg}")


	def _fail(msg: str) -> None:
	print(f" {_RED}✖{_RESET} {msg}")


	def _info(msg: str) -> None:
	print(f" {_CYAN}ℹ{_RESET} {msg}")


	def _header(title: str) -> None:
	print(f"\n{_BOLD}── {title} ──{_RESET}")


	# ── Step 1: Python version ───────────────────────────────────────────────────

	def check_python_version() -> bool:
	_header("Python version")
	major, minor = sys.version_info[:2]
	if (major, minor) >= (3, 12):
	_ok(f"Python {major}.{minor} (>= 3.12 required)")
	return True
	_fail(
	f"Python {major}.{minor} detected — Python >= 3.12 is required.\n"
	" → Install Python 3.12+ from https://www.python.org/downloads/"
	)
	return False


	# ── Step 2: Install dependencies ─────────────────────────────────────────────

	def install_dependencies(check_only: bool = False) -> bool:
	_header("Python dependencies")

	missing: list[str] = []
	probes = {
	"fastapi": "fastapi",
	"uvicorn": "uvicorn",
	"ollama": "ollama",
	"redis": "redis",
	"neo4j": "neo4j",
	"chromadb": "chromadb",
	"httpx": "httpx",
	"pydantic_settings": "pydantic-settings",
	"numpy": "numpy",
	"sklearn": "scikit-learn",
	"scipy": "scipy",
	"jose": "python-jose",
	"bcrypt": "bcrypt",
	"aio_pika": "aio-pika",
	"pytest": "pytest",
	"fakeredis": "fakeredis",
	}

	for import_name, pkg_name in probes.items():
	try:
	__import__(import_name)
	except ImportError:
	missing.append(pkg_name)

	if not missing:
	_ok("All dependencies installed")
	return True

	if check_only:
	_fail(f"Missing packages: {', '.join(missing)}")
	_info("Run `python setup.py` (without --check) to install them.")
	return False

	_info(f"Installing {len(missing)} missing packages from requirements.txt ...")
	try:
	uv_cmd = ["uv", "pip", "install", "-r", REQUIREMENTS]
	# uv requires --system when not inside a virtual environment
	if sys.prefix == sys.base_prefix:
	uv_cmd.insert(3, "--system")
	subprocess.check_call(
	uv_cmd,
	stdout=subprocess.DEVNULL if os.name != "nt" else None,
	)
	_ok("All dependencies installed successfully")
	return True
	except subprocess.CalledProcessError:
	_fail(
	"uv pip install failed.\n"
	" → Try manually: uv pip install -r requirements.txt\n"
	" → On Windows, run your terminal as Administrator if you get permission errors.\n"
	" → If a package conflicts, try: uv pip install -r requirements.txt --user"
	)
	return False


	# ── Step 3: .env file ────────────────────────────────────────────────────────

	def ensure_env_file(check_only: bool = False) -> bool:
	_header("Environment file (.env)")

	if os.path.isfile(ENV_FILE):
	_ok(".env file exists")
	return True

	if check_only:
	_fail(
	".env file not found.\n"
	" → Run `python setup.py` to create it from .env.example\n"
	" → Or manually: copy .env.example .env"
	)
	return False

	if not os.path.isfile(ENV_EXAMPLE):
	_fail(
	".env.example not found — cannot create .env.\n"
	" → Ensure you cloned the full repository."
	)
	return False

	shutil.copy(ENV_EXAMPLE, ENV_FILE)
	_ok("Created .env from .env.example")
	_warn("Edit .env to set your OLLAMA_API_KEY and other secrets.")
	return True


	# ── Step 4: Validate configuration ──────────────────────────────────────────

	def validate_config() -> tuple[bool, dict]:
	_header("Configuration validation")

	# Force-reload settings from the .env we just created/verified
	os.environ.pop("OLLAMA_API_KEY", None)
	try:
	# Clear any stale pydantic module cache before importing
	for mod_name in list(sys.modules):
	if mod_name.startswith(("pydantic", "pydantic_core", "pydantic_settings")):
	del sys.modules[mod_name]

	from pydantic_settings import BaseSettings, SettingsConfigDict

	class _CheckSettings(BaseSettings):
	model_config = SettingsConfigDict(
	env_file=ENV_FILE, env_file_encoding="utf-8", extra="ignore",
	)
	ollama_host: str = "https://ollama.com"
	ollama_api_key: str = ""
	ollama_model: str = "gemma4:31b-cloud"
	embedding_model: str = "onnx-minilm"
	redis_url: str = "redis://localhost:6379/0"
	neo4j_uri: str = "neo4j://localhost:7687"
	neo4j_username: str = "neo4j"
	neo4j_password: str = "scenarist123"
	chroma_host: str = "localhost"
	chroma_port: int = 8100
	environment: str = "development"

	cfg = _CheckSettings()
	except Exception as exc:
	_fail(f"Could not load .env: {exc}")
	_info("Check .env syntax — each line should be KEY=VALUE (no quotes needed).")
	return False, {}

	ok = True
	info = {
	"ollama_api_key": bool(cfg.ollama_api_key),
	"ollama_host": cfg.ollama_host,
	"ollama_model": cfg.ollama_model,
	"embedding_model": cfg.embedding_model,
	"redis_url": cfg.redis_url,
	"neo4j_uri": cfg.neo4j_uri,
	"chroma_host": cfg.chroma_host,
	"chroma_port": cfg.chroma_port,
	}

	# Ollama API key
	if cfg.ollama_api_key:
	_ok(f"OLLAMA_API_KEY is set (host: {cfg.ollama_host})")
	else:
	_warn(
	"OLLAMA_API_KEY is empty — LLM features will use rule-based fallbacks.\n"
	" → Get a key at https://ollama.com/settings/keys\n"
	" → Then set OLLAMA_API_KEY in your .env file"
	)

	_ok(f"LLM model: {cfg.ollama_model}")
	_ok(f"Embedding model: {cfg.embedding_model}")

	return ok, info


	# ── Step 5: Check service connectivity ───────────────────────────────────────

	def check_services(info: dict) -> dict[str, bool]:
	_header("Service connectivity")

	results: dict[str, bool] = {}

	# Redis — now embedded; just check if it's already running
	try:
	import redis as _redis
	r = _redis.from_url(info.get("redis_url", "redis://localhost:6379/0"), socket_connect_timeout=2)
	r.ping()
	_ok(f"Redis: already running ({info.get('redis_url', '')})")
	results["redis"] = True
	except Exception:
	if shutil.which("redis-server"):
	_ok("Redis: redis-server found — will start automatically with run.py")
	results["redis"] = True
	else:
	_warn(
	"Redis: redis-server not installed.\n"
	" → run.py will start it automatically if installed.\n"
	" → To install: https://redis.io/download\n"
	" Windows: winget install Redis.Redis or scoop install redis\n"
	" macOS: brew install redis\n"
	" Linux: sudo apt install redis-server\n"
	" Docker: handled automatically in Dockerfile"
	)
	results["redis"] = False

	# Neo4j — embedded; check if running or binary available
	try:
	from neo4j import GraphDatabase
	driver = GraphDatabase.driver(
	info.get("neo4j_uri", "neo4j://localhost:7687"),
	auth=("neo4j", "scenarist"),
	)
	driver.verify_connectivity()
	driver.close()
	_ok(f"Neo4j: already running ({info.get('neo4j_uri', '')})")
	results["neo4j"] = True
	except Exception:
	neo4j_bin = shutil.which("neo4j")
	if not neo4j_bin:
	# Check common install paths
	for candidate in ["/opt/neo4j/bin/neo4j", os.path.expanduser("~/neo4j/bin/neo4j")]:
	if os.path.isfile(candidate):
	neo4j_bin = candidate
	break
	if neo4j_bin:
	_ok("Neo4j: binary found — will start automatically with run.py")
	results["neo4j"] = True
	else:
	_warn(
	"Neo4j: not installed.\n"
	" → run.py will start it automatically if installed.\n"
	" → To install: https://neo4j.com/download/\n"
	" Windows: winget install Neo4j.Neo4j or download from neo4j.com\n"
	" macOS: brew install neo4j\n"
	" Linux: see https://neo4j.com/docs/operations-manual/current/installation/linux/\n"
	" Docker: handled automatically in Dockerfile"
	)
	results["neo4j"] = False

	# ChromaDB — embedded; check if running or binary available
	try:
	import chromadb
	c = chromadb.HttpClient(
	host=info.get("chroma_host", "localhost"),
	port=info.get("chroma_port", 8000),
	)
	c.heartbeat()
	_ok(f"ChromaDB: already running ({info.get('chroma_host', '')}:{info.get('chroma_port', '')})")
	results["chromadb"] = True
	except Exception:
	chroma_bin = shutil.which("chroma")
	if chroma_bin:
	_ok("ChromaDB: chroma CLI found — will start automatically with run.py")
	results["chromadb"] = True
	else:
	_warn(
	"ChromaDB: chroma CLI not installed.\n"
	" → run.py will start it automatically if installed.\n"
	" → To install: pip install chromadb\n"
	" Docker: handled automatically in Dockerfile"
	)
	results["chromadb"] = False

	# Ollama Cloud
	if info.get("ollama_api_key"):
	try:
	from ollama import Client
	headers = {"Authorization": f"Bearer NOT_TESTED"}
	client = Client(host=info.get("ollama_host", "https://ollama.com"))
	# Just check we can reach the host — don't burn tokens
	import httpx
	resp = httpx.get(f"{info.get('ollama_host', 'https://ollama.com')}/api/tags", timeout=5)
	if resp.status_code < 500:
	_ok(f"Ollama Cloud: reachable ({info.get('ollama_host', '')})")
	results["ollama"] = True
	else:
	raise ConnectionError(f"HTTP {resp.status_code}")
	except Exception:
	_warn(
	f"Ollama Cloud: not reachable ({info.get('ollama_host', '')})\n"
	" → Check your OLLAMA_HOST and internet connection.\n"
	" → LLM features will use rule-based fallbacks."
	)
	results["ollama"] = False
	else:
	_info("Ollama Cloud: skipped (no API key set)")
	results["ollama"] = False

	return results


	# ── Step 6: Pre-load models ──────────────────────────────────────────────────

	def preload_models(check_only: bool = False) -> bool:
	_header("Model pre-loading")

	if check_only:
	_info("Skipped in --check mode")
	return True

	# Embedding model
	try:
	from backend.app.core.embedder import EmbeddingRegistry
	reg = EmbeddingRegistry.get()

	# Import settings fresh
	from backend.app.core.config import settings
	_info(f"Loading embedding model: {settings.embedding_model} ...")
	reg.load_model(settings.embedding_model)
	_ok(f"Embedding model ready: {reg.model_name} ({reg.dimension}-dim)")
	except Exception as exc:
	_fail(
	f"Failed to load embedding model: {exc}\n"
	" → If using 'onnx-minilm' (default), this should work out of the box.\n"
	" → If using 'ollama:<model>', ensure Ollama is reachable and the model exists."
	)
	return False

	# Projection bridge weights
	try:
	from backend.app.services.projection_bridge import load_weights, is_ready
	_info("Loading projection bridge weights ...")
	load_weights()
	if is_ready():
	_ok("Projection bridge weights loaded")
	else:
	_warn("Projection bridge: using generated default weights (non-critical)")
	except Exception as exc:
	_warn(f"Projection bridge: {exc} (non-critical, will use defaults)")

	return True


	# ── Step 7: Write marker ─────────────────────────────────────────────────────

	def write_setup_marker() -> None:
	"""Write a marker file so run.py knows setup completed successfully."""
	with open(SETUP_MARKER, "w") as f:
	f.write("ok\n")


	def is_setup_done() -> bool:
	return os.path.isfile(SETUP_MARKER)


	# ── Main ──────────────────────────────────────────────────────────────────────

	def main() -> None:
	parser = argparse.ArgumentParser(description="Orsync Scenarist project setup")
	parser.add_argument(
	"--check", action="store_true",
	help="Validate only — do not install packages or create files",
	)
	args = parser.parse_args()

	os.chdir(ROOT_DIR)

	print(f"\n{_BOLD}{'=' * 56}")
	print(f" Orsync Scenarist — {'Validation' if args.check else 'Setup'}")
	print(f"{'=' * 56}{_RESET}")

	errors: list[str] = []

	# 1. Python
	if not check_python_version():
	errors.append("Python >= 3.12")

	# 2. Dependencies
	if not install_dependencies(check_only=args.check):
	errors.append("Python dependencies")

	# 3. .env
	if not ensure_env_file(check_only=args.check):
	errors.append(".env file")

	# 4. Config validation
	config_ok, info = validate_config()
	if not config_ok:
	errors.append("Configuration")

	# 5. Service connectivity
	svc = check_services(info)

	# 6. Pre-load models
	if not errors:
	if not preload_models(check_only=args.check):
	errors.append("Model pre-loading")

	# Summary
	_header("Summary")
	if errors:
	_fail(f"Setup {'validation' if args.check else ''} completed with errors:")
	for e in errors:
	print(f" - {e}")
	print()
	_info("Fix the issues above and re-run: python setup.py")
	sys.exit(1)
	else:
	if not args.check:
	write_setup_marker()
	svc_up = sum(1 for v in svc.values() if v)
	svc_total = len(svc)
	_ok(f"All checks passed — {svc_up}/{svc_total} optional services connected")
	if not info.get("ollama_api_key"):
	_warn(
	"OLLAMA_API_KEY not set — LLM features will use rule-based fallbacks.\n"
	" → Set it in .env when you have a key from https://ollama.com/settings/keys"
	)
	print()
	if not args.check:
	print(f" {_GREEN}Ready! Start the server:{_RESET}")
	print(f" python run.py")
	print(f" python run.py --reload # dev mode with auto-reload")
	print()


	if __name__ == "__main__":
	main()