#!/bin/sh # Seed GraphReview SQLite DBs from training_corpus subpaths (application core only). # Run after scripts/clone_training_repos.sh. Executes from code-review-env so `python -m db.seed` resolves. # POSIX sh — safe to run as: sh scripts/seed_training_corpus.sh set -eu ROOT="$(CDPATH='' cd "$(dirname "$0")/.." && pwd)" ENV_DIR="$ROOT/code-review-env" CORPUS_DIR="${CORPUS_DIR:-$ROOT/training_corpus}" OUT_DIR="${CORPUS_DB_DIR:-$ROOT/outputs/corpus_dbs}" if [ ! -d "$ENV_DIR" ]; then echo "error: expected code-review-env at $ENV_DIR" >&2 exit 1 fi mkdir -p "$OUT_DIR" cd "$ENV_DIR" seed_one() { db_basename="$1" relative_path="$2" target="$CORPUS_DIR/$relative_path" db_path="$OUT_DIR/${db_basename}.db" if [ ! -d "$target" ]; then echo "[skip] missing directory: $target" return 0 fi echo "[seed] $target -> $db_path" python -m db.seed "$target" --db-path "$db_path" --force } # Tier 1 — single package roots matching training corpus seed table seed_one corpus_flask "flask/src/flask" # Full celery package (app/, worker/, backends/ live under this tree) seed_one corpus_celery "celery/celery" seed_one corpus_requests "requests/src/requests" seed_one corpus_httpx "httpx/httpx" seed_one corpus_fastapi "fastapi/fastapi" seed_one corpus_sqlalchemy "sqlalchemy/lib/sqlalchemy" seed_one corpus_pydantic "pydantic/pydantic" # Tier 2 seed_one corpus_luigi "luigi/luigi" # Focus: middleware stack modules (omit tests/spiders noise) seed_one corpus_scrapy_core "scrapy/scrapy/core" seed_one corpus_scrapy_pipelines "scrapy/scrapy/pipelines" seed_one corpus_paramiko "paramiko/paramiko" seed_one corpus_airflow "airflow/airflow" # Django: seed focused subtrees (separate DBs — no cross-edges between DBs) seed_one corpus_django_db "django/django/db" seed_one corpus_django_http "django/django/http" seed_one corpus_django_auth "django/django/contrib/auth" # Tier 3 — small templates (paths vary; adjust if upstream layout changes) # App root: models/, resources/, app.py (Flask-Smorest sample) seed_one corpus_rest_api_smorest_docker "rest-api-smorest-docker" seed_one corpus_fullstack_fastapi_template "full-stack-fastapi-template/backend/app" seed_one corpus_flasky "flasky/app" # Layout: project/{app,db,migrations,tests} if [ -d "$CORPUS_DIR/fastapi-tdd-docker/project" ]; then seed_one corpus_fastapi_tdd "fastapi-tdd-docker/project" else echo "[skip] fastapi-tdd-docker/project — clone testdrivenio/fastapi-tdd-docker first" fi echo "Done. Databases under: $OUT_DIR"