File size: 2,174 Bytes
519b145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env bash
set -euo pipefail

show_help() {
  cat <<'EOF'
Usage: refresh_legal_data.sh --file PATH --code CODE [options]

Steps:
  1. python manage.py load_legal_document --file <PATH> --code <CODE>
  2. python scripts/generate_embeddings.py --model legal
  3. python scripts/build_faiss_index.py --model legal

Options:
  --file PATH          PDF/DOCX file to ingest (required unless --skip-ingest)
  --code CODE          Document code (required unless --skip-ingest)
  --skip-ingest        Skip step 1 and only regenerate embeddings/indexes
  --python BIN         Python command to use (default: python3)
  --help               Show this message
EOF
}

PYTHON_BIN="python3"
FILE_PATH=""
DOC_CODE=""
SKIP_INGEST=false

while [[ $# -gt 0 ]]; do
  case "$1" in
    --file)
      FILE_PATH="$2"
      shift 2
      ;;
    --code)
      DOC_CODE="$2"
      shift 2
      ;;
    --skip-ingest)
      SKIP_INGEST=true
      shift
      ;;
    --python)
      PYTHON_BIN="$2"
      shift 2
      ;;
    --help|-h)
      show_help
      exit 0
      ;;
    *)
      echo "Unknown option: $1" >&2
      show_help
      exit 1
      ;;
  esac
done

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$SCRIPT_DIR/.."
DJANGO_DIR="$PROJECT_ROOT/hue_portal"

if [[ "$SKIP_INGEST" = false ]]; then
  if [[ -z "$FILE_PATH" || -z "$DOC_CODE" ]]; then
    echo "--file and --code are required unless --skip-ingest is set" >&2
    exit 1
  fi
  if [[ ! -f "$FILE_PATH" ]]; then
    echo "File not found: $FILE_PATH" >&2
    exit 1
  fi
  echo "[1/3] Ingesting document ${DOC_CODE} ..."
  pushd "$DJANGO_DIR" >/dev/null
  "$PYTHON_BIN" manage.py load_legal_document --file "$FILE_PATH" --code "$DOC_CODE"
  popd >/dev/null
else
  echo "Skipping ingestion step."
fi

echo "[2/3] Generating embeddings (legal) ..."
pushd "$PROJECT_ROOT" >/dev/null
"$PYTHON_BIN" scripts/generate_embeddings.py --model legal
popd >/dev/null

echo "[3/3] Building FAISS index (legal) ..."
pushd "$PROJECT_ROOT" >/dev/null
"$PYTHON_BIN" scripts/build_faiss_index.py --model legal
popd >/dev/null

echo "Done. Updated artifacts located in backend/hue_portal/artifacts/faiss_indexes."