File size: 3,085 Bytes
363df51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/bin/sh
set -eu

APP_DATA_DIR="${DB_PATH:-/home/electerm/data}"
mkdir -p "$APP_DATA_DIR"

if [ -z "${HF_TOKEN:-}" ] || [ -z "${DATASET_ID:-}" ]; then
  echo "Starting without backup functionality - missing HF_TOKEN or DATASET_ID"
  exec node ./src/app/app.js
fi

. /opt/venv/bin/activate

upload_backup() {
    file_path="$1"
    file_name="$2"

    python3 - <<PY
from huggingface_hub import HfApi
api = HfApi(token='${HF_TOKEN}')
repo_id='${DATASET_ID}'
file_name='${file_name}'
file_path='${file_path}'

def manage_backups(max_files=50):
    files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
    backup_files = sorted([f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')])
    if len(backup_files) >= max_files:
        for old in backup_files[:len(backup_files)-max_files+1]:
            try:
                api.delete_file(path_in_repo=old, repo_id=repo_id, repo_type='dataset')
                print(f'Deleted old backup: {old}')
            except Exception as e:
                print(f'Error deleting {old}: {e}')

try:
    api.upload_file(path_or_fileobj=file_path, path_in_repo=file_name, repo_id=repo_id, repo_type='dataset')
    print(f'Successfully uploaded {file_name}')
    manage_backups()
except Exception as e:
    print(f'Error uploading file: {e}')
PY
}

download_latest_backup() {
    python3 - <<PY
from huggingface_hub import HfApi
import os, sys, tarfile, tempfile
api = HfApi(token='${HF_TOKEN}')
repo_id='${DATASET_ID}'
out_dir='${APP_DATA_DIR}'
try:
    files = api.list_repo_files(repo_id=repo_id, repo_type='dataset')
    backup_files = sorted([f for f in files if f.startswith('electerm_backup_') and f.endswith('.tar.gz')])
    if not backup_files:
        print('No backup files found')
        sys.exit(0)
    latest_backup = backup_files[-1]
    with tempfile.TemporaryDirectory() as temp_dir:
        filepath = api.hf_hub_download(repo_id=repo_id, filename=latest_backup, repo_type='dataset', local_dir=temp_dir)
        if filepath and os.path.exists(filepath):
            with tarfile.open(filepath, 'r:gz') as tar:
                tar.extractall(out_dir)
            print(f'Successfully restored backup from {latest_backup}')
except Exception as e:
    print(f'Error downloading backup: {e}')
PY
}

sync_data() {
    while true; do
        echo "Starting sync process at $(date)"
        if [ -d "$APP_DATA_DIR" ]; then
            timestamp=$(date +%Y%m%d_%H%M%S)
            backup_file="electerm_backup_${timestamp}.tar.gz"
            tar -czf "/tmp/${backup_file}" -C "$APP_DATA_DIR" .
            echo "Uploading backup to HuggingFace..."
            upload_backup "/tmp/${backup_file}" "${backup_file}"
            rm -f "/tmp/${backup_file}"
        else
            echo "Data directory does not exist yet, waiting for next sync..."
        fi
        interval=${SYNC_INTERVAL:-7200}
        echo "Next sync in ${interval} seconds..."
        sleep "$interval"
    done
}

echo "Downloading latest backup from HuggingFace..."
download_latest_backup
sync_data &
exec node ./src/app/app.js