Dev commited on
Commit
31d7082
·
1 Parent(s): c8c3d7e
Files changed (1) hide show
  1. app.py +170 -76
app.py CHANGED
@@ -4,78 +4,108 @@ import os
4
  import time
5
  import shutil
6
  import threading
 
7
  import hashlib
 
8
  from pathlib import Path
 
9
  from urllib.parse import urlparse, parse_qs
10
 
11
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
12
  CACHE_DIR = Path("/tmp/proxy_cache")
13
  CACHE_DIR.mkdir(exist_ok=True)
14
- CACHE_DURATION = 5 * 3600 # 5 hours
15
- MAX_CACHE_SIZE = 2 * 1024 * 1024 * 1024 # 2GB
 
 
16
 
17
  class SmartCache:
18
  def __init__(self):
19
- self.lock = threading.Lock()
20
  self._start_cleaner()
21
 
22
  def _start_cleaner(self):
23
- def clean_loop():
24
  while True:
25
- time.sleep(300)
26
  self.cleanup()
27
- t = threading.Thread(target=clean_loop, daemon=True)
28
  t.start()
29
 
30
  def get(self, key):
31
  fpath = CACHE_DIR / key
32
  meta = CACHE_DIR / f"{key}.meta"
33
  if fpath.exists() and meta.exists():
34
- with open(meta) as f:
35
- m = json.load(f)
36
- if time.time() - m["cached_at"] < CACHE_DURATION:
37
- m["access_count"] = m.get("access_count", 0) + 1
38
- with open(meta, "w") as f:
39
- json.dump(m, f)
40
- return fpath
41
- else:
42
- fpath.unlink(missing_ok=True)
43
- meta.unlink(missing_ok=True)
 
 
 
 
44
  return None
45
 
46
- def put(self, key, src_path):
47
- with self.lock:
48
  fpath = CACHE_DIR / key
49
  shutil.copy2(src_path, fpath)
50
  meta = CACHE_DIR / f"{key}.meta"
51
  with open(meta, "w") as f:
52
- json.dump({"cached_at": time.time(), "access_count": 0, "size": os.path.getsize(fpath)}, f)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  def cleanup(self):
55
- with self.lock:
56
- total = sum(f.stat().st_size for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith(".meta"))
57
- if total < MAX_CACHE_SIZE:
 
58
  return
59
 
60
- files = []
61
  for f in CACHE_DIR.iterdir():
62
  if f.is_file() and not f.name.endswith(".meta"):
63
  meta = CACHE_DIR / f"{f.name}.meta"
64
- access = 0
 
65
  if meta.exists():
66
- with open(meta) as mf:
67
- access = json.load(mf).get("access_count", 0)
68
- files.append((access, f.stat().st_mtime, f))
69
- files.sort()
70
- removed = 0
71
- for _, _, f in files:
72
- if total < MAX_CACHE_SIZE * 0.8:
 
 
 
 
 
 
73
  break
74
- sz = f.stat().st_size
75
- f.unlink()
76
  (CACHE_DIR / f"{f.name}.meta").unlink(missing_ok=True)
77
  total -= sz
78
- removed += 1
79
 
80
  cache = SmartCache()
81
 
@@ -85,102 +115,166 @@ class Handler(http.server.BaseHTTPRequestHandler):
85
  params = parse_qs(parsed.query)
86
 
87
  if parsed.path == "/health":
88
- total = sum(f.stat().st_size for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith(".meta"))
 
89
  self._json(200, {
90
  "status": "ok",
91
- "cached_files": len(list(CACHE_DIR.glob("*"))) // 2,
92
- "cache_size": total,
93
- "cache_max": MAX_CACHE_SIZE,
 
 
 
94
  })
95
  return
96
 
97
  if parsed.path.startswith("/stream/"):
98
- key = parsed.path.split("/stream/")[1]
99
  if not key:
100
  self._json(400, {"error": "Missing file key"})
101
  return
102
-
103
- cached = cache.get(key)
104
- if cached:
105
- self._serve_file(cached, key)
106
  else:
107
- self._json(404, {"error": "File not in cache. Use /cache to preload."})
108
  return
109
 
110
- self._json(404, {"error": "Not found"})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  def do_POST(self):
113
  length = int(self.headers.get("Content-Length", 0))
114
  body = json.loads(self.rfile.read(length)) if length else {}
 
 
115
 
116
- if self.path == "/cache":
117
- file_key = body.get("key", "")
118
- file_url = body.get("url", "")
119
- if not file_key or not file_url:
120
  self._json(400, {"error": "Missing key or url"})
121
  return
122
  try:
123
- import urllib.request
124
- temp = CACHE_DIR / f"dl_{int(time.time())}"
125
- urllib.request.urlretrieve(file_url, temp)
126
- cache.put(file_key, temp)
 
 
127
  temp.unlink()
128
- self._json(200, {"status": "cached", "key": file_key})
129
  except Exception as e:
130
- self._json(500, {"error": str(e)})
131
  return
132
 
133
- if self.path == "/preload":
134
  dataset = body.get("dataset", "")
135
  file_name = body.get("file_name", "")
136
  if not dataset or not file_name:
137
  self._json(400, {"error": "Missing dataset or file_name"})
138
  return
 
139
  dl_url = f"https://huggingface.co/datasets/{dataset}/resolve/main/{file_name}"
140
  try:
141
- import urllib.request
142
  req = urllib.request.Request(dl_url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
143
- temp = CACHE_DIR / f"dl_{int(time.time())}"
144
- urllib.request.urlretrieve(dl_url, temp)
145
- key = f"{dataset}/{file_name}"
146
- cache.put(key, temp)
147
  temp.unlink()
148
  self._json(200, {"status": "preloaded", "key": key})
149
  except Exception as e:
150
- self._json(500, {"error": str(e)})
151
  return
152
 
153
- if self.path == "/cleanup":
154
- for f in CACHE_DIR.iterdir():
155
- if f.is_file():
156
- f.unlink()
157
- self._json(200, {"status": "cleaned"})
 
 
 
158
  return
159
 
160
  self._json(404, {"error": "Not found"})
161
 
162
- def _serve_file(self, fpath, filename):
163
- self.send_response(200)
164
- self.send_header("Content-Type", "video/mp4")
165
- self.send_header("Content-Disposition", f'inline; filename="{filename}"')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  self.send_header("Accept-Ranges", "bytes")
167
- self.send_header("Cache-Control", f"public, max-age={CACHE_DURATION}")
 
168
  self.end_headers()
 
169
  with open(fpath, "rb") as f:
170
- shutil.copyfileobj(f, self.wfile)
 
 
 
 
 
 
 
 
171
 
172
  def _json(self, code, data):
173
  self.send_response(code)
174
  self.send_header("Content-Type", "application/json")
175
  self.send_header("Access-Control-Allow-Origin", "*")
176
  self.end_headers()
177
- self.wfile.write(json.dumps(data).encode())
178
 
179
- def log_message(self, format, *args):
180
  pass
181
 
182
  if __name__ == "__main__":
183
  port = int(os.environ.get("PORT", 7860))
 
 
184
  httpd = http.server.HTTPServer(("0.0.0.0", port), Handler)
185
- print(f"Proxy running on port {port}")
186
  httpd.serve_forever()
 
4
  import time
5
  import shutil
6
  import threading
7
+ import urllib.request
8
  import hashlib
9
+ import re
10
  from pathlib import Path
11
+ from datetime import datetime, timedelta
12
  from urllib.parse import urlparse, parse_qs
13
 
14
  HF_TOKEN = os.environ.get("HF_TOKEN", "")
15
  CACHE_DIR = Path("/tmp/proxy_cache")
16
  CACHE_DIR.mkdir(exist_ok=True)
17
+ CACHE_TTL = 5 * 3600
18
+ MAX_CACHE_BYTES = 2 * 1024 * 1024 * 1024
19
+
20
+ LOCK = threading.Lock()
21
 
22
  class SmartCache:
23
  def __init__(self):
 
24
  self._start_cleaner()
25
 
26
  def _start_cleaner(self):
27
+ def loop():
28
  while True:
29
+ time.sleep(180)
30
  self.cleanup()
31
+ t = threading.Thread(target=loop, daemon=True)
32
  t.start()
33
 
34
  def get(self, key):
35
  fpath = CACHE_DIR / key
36
  meta = CACHE_DIR / f"{key}.meta"
37
  if fpath.exists() and meta.exists():
38
+ try:
39
+ with open(meta) as f:
40
+ m = json.load(f)
41
+ age = time.time() - m.get("ts", 0)
42
+ if age < CACHE_TTL:
43
+ m["hits"] = m.get("hits", 0) + 1
44
+ with open(meta, "w") as f:
45
+ json.dump(m, f)
46
+ return fpath
47
+ else:
48
+ fpath.unlink(missing_ok=True)
49
+ meta.unlink(missing_ok=True)
50
+ except:
51
+ pass
52
  return None
53
 
54
+ def put(self, key, src_path, content_type="video/mp4"):
55
+ with LOCK:
56
  fpath = CACHE_DIR / key
57
  shutil.copy2(src_path, fpath)
58
  meta = CACHE_DIR / f"{key}.meta"
59
  with open(meta, "w") as f:
60
+ json.dump({
61
+ "ts": time.time(),
62
+ "expires": time.time() + CACHE_TTL,
63
+ "hits": 1,
64
+ "size": os.path.getsize(fpath),
65
+ "content_type": content_type,
66
+ }, f)
67
+
68
+ def stats(self):
69
+ files = 0
70
+ size = 0
71
+ with LOCK:
72
+ for f in CACHE_DIR.iterdir():
73
+ if f.is_file() and not f.name.endswith(".meta"):
74
+ files += 1
75
+ size += f.stat().st_size
76
+ return files, size
77
 
78
  def cleanup(self):
79
+ with LOCK:
80
+ total = sum(f.stat().st_size for f in CACHE_DIR.iterdir()
81
+ if f.is_file() and not f.name.endswith(".meta"))
82
+ if total < MAX_CACHE_BYTES:
83
  return
84
 
85
+ entries = []
86
  for f in CACHE_DIR.iterdir():
87
  if f.is_file() and not f.name.endswith(".meta"):
88
  meta = CACHE_DIR / f"{f.name}.meta"
89
+ hits = 0
90
+ mtime = f.stat().st_mtime
91
  if meta.exists():
92
+ try:
93
+ with open(meta) as mf:
94
+ m = json.load(mf)
95
+ hits = m.get("hits", 0)
96
+ mtime = m.get("ts", mtime)
97
+ except:
98
+ pass
99
+ entries.append((hits, mtime, f.stat().st_size, f))
100
+ entries.sort()
101
+
102
+ target = int(MAX_CACHE_BYTES * 0.5)
103
+ for hits, mtime, sz, f in entries:
104
+ if total <= target:
105
  break
106
+ f.unlink(missing_ok=True)
 
107
  (CACHE_DIR / f"{f.name}.meta").unlink(missing_ok=True)
108
  total -= sz
 
109
 
110
  cache = SmartCache()
111
 
 
115
  params = parse_qs(parsed.query)
116
 
117
  if parsed.path == "/health":
118
+ files, size = cache.stats()
119
+ free = shutil.disk_usage("/tmp").free
120
  self._json(200, {
121
  "status": "ok",
122
+ "cached_files": files,
123
+ "cache_size_bytes": size,
124
+ "cache_size_gb": round(size / (1024**3), 2),
125
+ "disk_free_bytes": free,
126
+ "disk_free_gb": round(free / (1024**3), 1),
127
+ "ttl_hours": CACHE_TTL / 3600,
128
  })
129
  return
130
 
131
  if parsed.path.startswith("/stream/"):
132
+ key = parsed.path.split("/stream/", 1)[-1]
133
  if not key:
134
  self._json(400, {"error": "Missing file key"})
135
  return
136
+ fpath = cache.get(key)
137
+ if fpath:
138
+ range_header = self.headers.get("Range", "")
139
+ self._stream_file(fpath, key, range_header)
140
  else:
141
+ self._json(404, {"error": "Not in cache. Use /preload first."})
142
  return
143
 
144
+ if parsed.path == "/list":
145
+ files = []
146
+ for f in sorted(CACHE_DIR.iterdir()):
147
+ if f.is_file() and not f.name.endswith(".meta"):
148
+ meta = CACHE_DIR / f"{f.name}.meta"
149
+ expires = 0
150
+ hits = 0
151
+ if meta.exists():
152
+ try:
153
+ with open(meta) as mf:
154
+ m = json.load(mf)
155
+ expires = m.get("expires", 0)
156
+ hits = m.get("hits", 0)
157
+ except:
158
+ pass
159
+ files.append({
160
+ "key": f.name,
161
+ "size": f.stat().st_size,
162
+ "expires_at": datetime.fromtimestamp(expires).isoformat() if expires else "N/A",
163
+ "hits": hits,
164
+ })
165
+ self._json(200, {"files": files, "count": len(files)})
166
+ return
167
+
168
+ self._json(404, {"error": "Not found. Try /health, /stream/{key}, /list"})
169
 
170
  def do_POST(self):
171
  length = int(self.headers.get("Content-Length", 0))
172
  body = json.loads(self.rfile.read(length)) if length else {}
173
+ parsed = urlparse(self.path)
174
+ path = parsed.path
175
 
176
+ if path == "/cache":
177
+ key = body.get("key", "")
178
+ url = body.get("url", "")
179
+ if not key or not url:
180
  self._json(400, {"error": "Missing key or url"})
181
  return
182
  try:
183
+ temp = CACHE_DIR / f"dl_{int(time.time()*1000)}_{os.urandom(2).hex()}"
184
+ req = urllib.request.Request(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
185
+ with urllib.request.urlopen(req, timeout=300) as src:
186
+ with open(temp, "wb") as f:
187
+ shutil.copyfileobj(src, f)
188
+ cache.put(key, temp)
189
  temp.unlink()
190
+ self._json(200, {"status": "cached", "key": key})
191
  except Exception as e:
192
+ self._json(500, {"error": str(e)[:200]})
193
  return
194
 
195
+ if path == "/preload":
196
  dataset = body.get("dataset", "")
197
  file_name = body.get("file_name", "")
198
  if not dataset or not file_name:
199
  self._json(400, {"error": "Missing dataset or file_name"})
200
  return
201
+ key = f"{dataset}/{file_name}"
202
  dl_url = f"https://huggingface.co/datasets/{dataset}/resolve/main/{file_name}"
203
  try:
204
+ temp = CACHE_DIR / f"dl_{int(time.time()*1000)}_{os.urandom(2).hex()}"
205
  req = urllib.request.Request(dl_url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
206
+ with urllib.request.urlopen(req, timeout=300) as src:
207
+ with open(temp, "wb") as f:
208
+ shutil.copyfileobj(src, f)
209
+ cache.put(key, temp, "video/mp4")
210
  temp.unlink()
211
  self._json(200, {"status": "preloaded", "key": key})
212
  except Exception as e:
213
+ self._json(500, {"error": str(e)[:200]})
214
  return
215
 
216
+ if path == "/flush":
217
+ with LOCK:
218
+ count = 0
219
+ for f in CACHE_DIR.iterdir():
220
+ if f.is_file():
221
+ f.unlink()
222
+ count += 1
223
+ self._json(200, {"status": "flushed", "removed": count})
224
  return
225
 
226
  self._json(404, {"error": "Not found"})
227
 
228
+ def _stream_file(self, fpath, filename, range_header):
229
+ file_size = fpath.stat().st_size
230
+ content_type = "video/mp4"
231
+
232
+ if range_header:
233
+ match = re.match(r"bytes=(\d+)-(\d*)", range_header)
234
+ if match:
235
+ start = int(match.group(1))
236
+ end = int(match.group(2)) if match.group(2) else file_size - 1
237
+ length = end - start + 1
238
+ self.send_response(206)
239
+ self.send_header("Content-Range", f"bytes {start}-{end}/{file_size}")
240
+ else:
241
+ start, end, length = 0, file_size - 1, file_size
242
+ self.send_response(200)
243
+ else:
244
+ start, end, length = 0, file_size - 1, file_size
245
+ self.send_response(200)
246
+
247
+ self.send_header("Content-Type", content_type)
248
+ self.send_header("Content-Length", str(length))
249
  self.send_header("Accept-Ranges", "bytes")
250
+ self.send_header("Cache-Control", f"public, max-age={CACHE_TTL}")
251
+ self.send_header("Content-Disposition", f'inline; filename="{filename}"')
252
  self.end_headers()
253
+
254
  with open(fpath, "rb") as f:
255
+ f.seek(start)
256
+ remaining = length
257
+ while remaining > 0:
258
+ chunk_size = min(65536, remaining)
259
+ chunk = f.read(chunk_size)
260
+ if not chunk:
261
+ break
262
+ self.wfile.write(chunk)
263
+ remaining -= len(chunk)
264
 
265
  def _json(self, code, data):
266
  self.send_response(code)
267
  self.send_header("Content-Type", "application/json")
268
  self.send_header("Access-Control-Allow-Origin", "*")
269
  self.end_headers()
270
+ self.wfile.write(json.dumps(data, ensure_ascii=False, default=str).encode())
271
 
272
+ def log_message(self, fmt, *args):
273
  pass
274
 
275
  if __name__ == "__main__":
276
  port = int(os.environ.get("PORT", 7860))
277
+ print(f"Proxy v2 on :{port}")
278
+ print(f"Cache: {CACHE_DIR} | TTL: {CACHE_TTL}s | Max: {MAX_CACHE_BYTES/(1024**3):.0f}GB")
279
  httpd = http.server.HTTPServer(("0.0.0.0", port), Handler)
 
280
  httpd.serve_forever()