Proxy commited on
Commit
ed4aaef
·
1 Parent(s): 96ea52c

Proxy cache service

Browse files
Files changed (2) hide show
  1. Dockerfile +5 -0
  2. app.py +186 -0
Dockerfile ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+ WORKDIR /app
3
+ COPY app.py /app/
4
+ EXPOSE 7860
5
+ CMD ["python3", "app.py"]
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import http.server
2
+ import json
3
+ import os
4
+ import time
5
+ import shutil
6
+ import threading
7
+ import hashlib
8
+ from pathlib import Path
9
+ from urllib.parse import urlparse, parse_qs
10
+
11
+ HF_TOKEN = os.environ.get("HF_TOKEN", "")
12
+ CACHE_DIR = Path("/tmp/proxy_cache")
13
+ CACHE_DIR.mkdir(exist_ok=True)
14
+ CACHE_DURATION = 5 * 3600 # 5 hours
15
+ MAX_CACHE_SIZE = 2 * 1024 * 1024 * 1024 # 2GB
16
+
17
+ class SmartCache:
18
+ def __init__(self):
19
+ self.lock = threading.Lock()
20
+ self._start_cleaner()
21
+
22
+ def _start_cleaner(self):
23
+ def clean_loop():
24
+ while True:
25
+ time.sleep(300)
26
+ self.cleanup()
27
+ t = threading.Thread(target=clean_loop, daemon=True)
28
+ t.start()
29
+
30
+ def get(self, key):
31
+ fpath = CACHE_DIR / key
32
+ meta = CACHE_DIR / f"{key}.meta"
33
+ if fpath.exists() and meta.exists():
34
+ with open(meta) as f:
35
+ m = json.load(f)
36
+ if time.time() - m["cached_at"] < CACHE_DURATION:
37
+ m["access_count"] = m.get("access_count", 0) + 1
38
+ with open(meta, "w") as f:
39
+ json.dump(m, f)
40
+ return fpath
41
+ else:
42
+ fpath.unlink(missing_ok=True)
43
+ meta.unlink(missing_ok=True)
44
+ return None
45
+
46
+ def put(self, key, src_path):
47
+ with self.lock:
48
+ fpath = CACHE_DIR / key
49
+ shutil.copy2(src_path, fpath)
50
+ meta = CACHE_DIR / f"{key}.meta"
51
+ with open(meta, "w") as f:
52
+ json.dump({"cached_at": time.time(), "access_count": 0, "size": os.path.getsize(fpath)}, f)
53
+
54
+ def cleanup(self):
55
+ with self.lock:
56
+ total = sum(f.stat().st_size for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith(".meta"))
57
+ if total < MAX_CACHE_SIZE:
58
+ return
59
+
60
+ files = []
61
+ for f in CACHE_DIR.iterdir():
62
+ if f.is_file() and not f.name.endswith(".meta"):
63
+ meta = CACHE_DIR / f"{f.name}.meta"
64
+ access = 0
65
+ if meta.exists():
66
+ with open(meta) as mf:
67
+ access = json.load(mf).get("access_count", 0)
68
+ files.append((access, f.stat().st_mtime, f))
69
+ files.sort()
70
+ removed = 0
71
+ for _, _, f in files:
72
+ if total < MAX_CACHE_SIZE * 0.8:
73
+ break
74
+ sz = f.stat().st_size
75
+ f.unlink()
76
+ (CACHE_DIR / f"{f.name}.meta").unlink(missing_ok=True)
77
+ total -= sz
78
+ removed += 1
79
+
80
+ cache = SmartCache()
81
+
82
+ class Handler(http.server.BaseHTTPRequestHandler):
83
+ def do_GET(self):
84
+ parsed = urlparse(self.path)
85
+ params = parse_qs(parsed.query)
86
+
87
+ if parsed.path == "/health":
88
+ total = sum(f.stat().st_size for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith(".meta"))
89
+ self._json(200, {
90
+ "status": "ok",
91
+ "cached_files": len(list(CACHE_DIR.glob("*"))) // 2,
92
+ "cache_size": total,
93
+ "cache_max": MAX_CACHE_SIZE,
94
+ })
95
+ return
96
+
97
+ if parsed.path.startswith("/stream/"):
98
+ key = parsed.path.split("/stream/")[1]
99
+ if not key:
100
+ self._json(400, {"error": "Missing file key"})
101
+ return
102
+
103
+ cached = cache.get(key)
104
+ if cached:
105
+ self._serve_file(cached, key)
106
+ else:
107
+ self._json(404, {"error": "File not in cache. Use /cache to preload."})
108
+ return
109
+
110
+ self._json(404, {"error": "Not found"})
111
+
112
+ def do_POST(self):
113
+ length = int(self.headers.get("Content-Length", 0))
114
+ body = json.loads(self.rfile.read(length)) if length else {}
115
+
116
+ if self.path == "/cache":
117
+ file_key = body.get("key", "")
118
+ file_url = body.get("url", "")
119
+ if not file_key or not file_url:
120
+ self._json(400, {"error": "Missing key or url"})
121
+ return
122
+ try:
123
+ import urllib.request
124
+ temp = CACHE_DIR / f"dl_{int(time.time())}"
125
+ urllib.request.urlretrieve(file_url, temp)
126
+ cache.put(file_key, temp)
127
+ temp.unlink()
128
+ self._json(200, {"status": "cached", "key": file_key})
129
+ except Exception as e:
130
+ self._json(500, {"error": str(e)})
131
+ return
132
+
133
+ if self.path == "/preload":
134
+ dataset = body.get("dataset", "")
135
+ file_name = body.get("file_name", "")
136
+ if not dataset or not file_name:
137
+ self._json(400, {"error": "Missing dataset or file_name"})
138
+ return
139
+ dl_url = f"https://huggingface.co/datasets/{dataset}/resolve/main/{file_name}"
140
+ try:
141
+ import urllib.request
142
+ req = urllib.request.Request(dl_url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
143
+ temp = CACHE_DIR / f"dl_{int(time.time())}"
144
+ urllib.request.urlretrieve(dl_url, temp)
145
+ key = f"{dataset}/{file_name}"
146
+ cache.put(key, temp)
147
+ temp.unlink()
148
+ self._json(200, {"status": "preloaded", "key": key})
149
+ except Exception as e:
150
+ self._json(500, {"error": str(e)})
151
+ return
152
+
153
+ if self.path == "/cleanup":
154
+ for f in CACHE_DIR.iterdir():
155
+ if f.is_file():
156
+ f.unlink()
157
+ self._json(200, {"status": "cleaned"})
158
+ return
159
+
160
+ self._json(404, {"error": "Not found"})
161
+
162
+ def _serve_file(self, fpath, filename):
163
+ self.send_response(200)
164
+ self.send_header("Content-Type", "video/mp4")
165
+ self.send_header("Content-Disposition", f'inline; filename="{filename}"')
166
+ self.send_header("Accept-Ranges", "bytes")
167
+ self.send_header("Cache-Control", f"public, max-age={CACHE_DURATION}")
168
+ self.end_headers()
169
+ with open(fpath, "rb") as f:
170
+ shutil.copyfileobj(f, self.wfile)
171
+
172
+ def _json(self, code, data):
173
+ self.send_response(code)
174
+ self.send_header("Content-Type", "application/json")
175
+ self.send_header("Access-Control-Allow-Origin", "*")
176
+ self.end_headers()
177
+ self.wfile.write(json.dumps(data).encode())
178
+
179
+ def log_message(self, format, *args):
180
+ pass
181
+
182
+ if __name__ == "__main__":
183
+ port = int(os.environ.get("PORT", 7860))
184
+ httpd = http.server.HTTPServer(("0.0.0.0", port), Handler)
185
+ print(f"Proxy running on port {port}")
186
+ httpd.serve_forever()