Knightleo commited on
Commit
1966c0d
·
verified ·
1 Parent(s): 1438b06

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dokerfile +15 -0
  2. readme.md +9 -0
  3. start.sh +137 -0
Dokerfile ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/h88782481/chat-share:latest
2
+
3
+ COPY start.sh .
4
+
5
+ RUN chmod +x /app/start.sh
6
+
7
+ RUN pip install --no-cache-dir --upgrade pip \
8
+ && pip install --no-cache-dir huggingface-hub
9
+
10
+ RUN mkdir -p /app/data \
11
+ && chmod -R a+rwX /app/data
12
+
13
+ EXPOSE 5100
14
+
15
+ CMD ["bash", "-c", "exec ./start.sh"]
readme.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: 'Share'
3
+ emoji: 📊
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ app_port: 5100
9
+ ---
start.sh ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ # 自动 export 后面所有赋值
6
+ set -a
7
+
8
+ log() {
9
+ echo "[$(date +'%F %T')] $*"
10
+ }
11
+
12
+ # 1. init_backup
13
+ init_backup(){
14
+ if [[ -n "${DATASET_ID:-}" ]]; then
15
+ log "📁 使用外部定义的 DATASET_ID=$DATASET_ID"
16
+ return 0
17
+ fi
18
+ if [[ -z "${HF_TOKEN:-}" ]]; then
19
+ log "⚠️ HF_TOKEN 未设置,跳过备份"
20
+ return 1
21
+ fi
22
+
23
+ USER_ID=$(python3 - <<'PY'
24
+ import os,sys
25
+ from huggingface_hub import HfApi
26
+ try:
27
+ name = HfApi(token=os.getenv("HF_TOKEN")).whoami().get("name","")
28
+ print(name) if name else sys.exit(1)
29
+ except:
30
+ sys.exit(1)
31
+ PY
32
+ )
33
+ if [[ -z "$USER_ID" ]]; then
34
+ log "⚠️ 获取 USER_ID 失败,跳过备份"
35
+ return 1
36
+ fi
37
+
38
+ DATASET_ID="${USER_ID}/data"
39
+ # ← 这里修正了变量名
40
+ log "✅ 设置默认 DATASET_ID=$DATASET_ID"
41
+ return 0
42
+ }
43
+
44
+ # 2. prep_repo
45
+ prep_repo(){
46
+ python3 <<'PY'
47
+ import os
48
+ from huggingface_hub import HfApi
49
+ api = HfApi(token=os.getenv("HF_TOKEN"))
50
+ repo = os.environ["DATASET_ID"]
51
+ author = repo.split("/")[0]
52
+ if not any(d.id == repo for d in api.list_datasets(author=author)):
53
+ api.create_repo(repo_id=repo, repo_type="dataset", private=True)
54
+ branch = "Chat-Share"
55
+ refs = api.list_repo_refs(repo_id=repo, repo_type="dataset").branches
56
+ if branch not in [b.name for b in refs]:
57
+ api.create_branch(repo_id=repo, repo_type="dataset", branch=branch)
58
+ PY
59
+ log "✅ 数据集 & 分支就绪"
60
+ }
61
+
62
+ # 3. restore_latest
63
+ restore_latest(){
64
+ python3 <<'PY'
65
+ import os,sys,tarfile,tempfile
66
+ from huggingface_hub import HfApi
67
+ api = HfApi(token=os.getenv("HF_TOKEN"))
68
+ repo, branch = os.getenv("DATASET_ID"), "Chat-Share"
69
+ files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch)
70
+ backs = sorted(f for f in files if f.endswith(".tar.gz"))
71
+ if not backs: sys.exit(0)
72
+ td = tempfile.mkdtemp()
73
+ path = api.hf_hub_download(repo_id=repo, repo_type="dataset",
74
+ revision=branch, filename=backs[-1], local_dir=td)
75
+ with tarfile.open(path) as t:
76
+ t.extractall(os.getenv("BACKUP_DIR"))
77
+ PY
78
+ log "✅ 恢复最新备份(如果有)"
79
+ }
80
+
81
+ # 4. do_backup
82
+ do_backup(){
83
+ ts=$(date +%Y%m%d_%H%M%S)
84
+ fname="Chat-Share_${ts}.tar.gz"
85
+ tmp=$(mktemp -d)
86
+ tar -czf "$tmp/$fname" -C "$BACKUP_DIR" .
87
+
88
+ python3 <<PY
89
+ import os
90
+ from huggingface_hub import HfApi
91
+ api = HfApi(token=os.getenv("HF_TOKEN"))
92
+ repo, branch = os.getenv("DATASET_ID"), "Chat-Share"
93
+ api.upload_file(path_or_fileobj="$tmp/$fname",
94
+ path_in_repo="$fname",
95
+ repo_id=repo, repo_type="dataset",
96
+ revision=branch)
97
+ keep = int(os.getenv("DATASET_NUM", "10"))
98
+ files = api.list_repo_files(repo_id=repo, repo_type="dataset", revision=branch)
99
+ backs = sorted(f for f in files if f.endswith(".tar.gz"))
100
+ for old in backs[:-keep]:
101
+ api.delete_file(path_in_repo=old,
102
+ repo_id=repo, repo_type="dataset", revision=branch)
103
+ api.super_squash_history(repo_id=repo, repo_type="dataset", branch=branch)
104
+ PY
105
+
106
+ rm -rf "$tmp"
107
+ log "✅ 上传备份并清理临时文件"
108
+ }
109
+
110
+ # 5. sync_loop
111
+ sync_loop(){
112
+ while true; do
113
+ do_backup
114
+ log "⏳ 下次同步在 ${SYNC_INTERVAL}s 后"
115
+ sleep "${SYNC_INTERVAL}"
116
+ done
117
+ }
118
+
119
+ # 主流程
120
+ main(){
121
+ BACKUP_DIR="${BACKUP_DIR:-$HOME/app/data}"
122
+ DATASET_NUM="${DATASET_NUM:-10}"
123
+ SYNC_INTERVAL="${SYNC_INTERVAL:-36000}"
124
+
125
+ if init_backup; then
126
+ log "🚀 启动备份/同步流程,使用数据集:$DATASET_ID"
127
+ prep_repo
128
+ restore_latest
129
+ sync_loop & # 后台
130
+ else
131
+ log "🚀 直接启动主应用,无备份/同步"
132
+ fi
133
+
134
+ exec python app.py
135
+ }
136
+
137
+ main