| import http.server |
| import json |
| import os |
| import time |
| import shutil |
| import threading |
| import urllib.request |
| import hashlib |
| import re |
| from pathlib import Path |
| from datetime import datetime, timedelta |
| from urllib.parse import urlparse, parse_qs |
|
|
| HF_TOKEN = os.environ.get("HF_TOKEN", "") |
| CACHE_DIR = Path("/tmp/proxy_cache") |
| CACHE_DIR.mkdir(exist_ok=True) |
| CACHE_TTL = 5 * 3600 |
| MAX_CACHE_BYTES = 2 * 1024 * 1024 * 1024 |
|
|
| LOCK = threading.Lock() |
|
|
| class SmartCache: |
| def __init__(self): |
| self._start_cleaner() |
|
|
| def _start_cleaner(self): |
| def loop(): |
| while True: |
| time.sleep(180) |
| self.cleanup() |
| t = threading.Thread(target=loop, daemon=True) |
| t.start() |
|
|
| def get(self, key): |
| fpath = CACHE_DIR / key |
| meta = CACHE_DIR / f"{key}.meta" |
| if fpath.exists() and meta.exists(): |
| try: |
| with open(meta) as f: |
| m = json.load(f) |
| age = time.time() - m.get("ts", 0) |
| if age < CACHE_TTL: |
| m["hits"] = m.get("hits", 0) + 1 |
| with open(meta, "w") as f: |
| json.dump(m, f) |
| return fpath |
| else: |
| fpath.unlink(missing_ok=True) |
| meta.unlink(missing_ok=True) |
| except: |
| pass |
| return None |
|
|
| def put(self, key, src_path, content_type="video/mp4"): |
| with LOCK: |
| fpath = CACHE_DIR / key |
| shutil.copy2(src_path, fpath) |
| meta = CACHE_DIR / f"{key}.meta" |
| with open(meta, "w") as f: |
| json.dump({ |
| "ts": time.time(), |
| "expires": time.time() + CACHE_TTL, |
| "hits": 1, |
| "size": os.path.getsize(fpath), |
| "content_type": content_type, |
| }, f) |
|
|
| def stats(self): |
| files = 0 |
| size = 0 |
| with LOCK: |
| for f in CACHE_DIR.iterdir(): |
| if f.is_file() and not f.name.endswith(".meta"): |
| files += 1 |
| size += f.stat().st_size |
| return files, size |
|
|
| def cleanup(self): |
| with LOCK: |
| total = sum(f.stat().st_size for f in CACHE_DIR.iterdir() |
| if f.is_file() and not f.name.endswith(".meta")) |
| if total < MAX_CACHE_BYTES: |
| return |
|
|
| entries = [] |
| for f in CACHE_DIR.iterdir(): |
| if f.is_file() and not f.name.endswith(".meta"): |
| meta = CACHE_DIR / f"{f.name}.meta" |
| hits = 0 |
| mtime = f.stat().st_mtime |
| if meta.exists(): |
| try: |
| with open(meta) as mf: |
| m = json.load(mf) |
| hits = m.get("hits", 0) |
| mtime = m.get("ts", mtime) |
| except: |
| pass |
| entries.append((hits, mtime, f.stat().st_size, f)) |
| entries.sort() |
|
|
| target = int(MAX_CACHE_BYTES * 0.5) |
| for hits, mtime, sz, f in entries: |
| if total <= target: |
| break |
| f.unlink(missing_ok=True) |
| (CACHE_DIR / f"{f.name}.meta").unlink(missing_ok=True) |
| total -= sz |
|
|
| cache = SmartCache() |
|
|
| class Handler(http.server.BaseHTTPRequestHandler): |
| def do_GET(self): |
| parsed = urlparse(self.path) |
| params = parse_qs(parsed.query) |
|
|
| if parsed.path == "/health": |
| files, size = cache.stats() |
| free = shutil.disk_usage("/tmp").free |
| self._json(200, { |
| "status": "ok", |
| "cached_files": files, |
| "cache_size_bytes": size, |
| "cache_size_gb": round(size / (1024**3), 2), |
| "disk_free_bytes": free, |
| "disk_free_gb": round(free / (1024**3), 1), |
| "ttl_hours": CACHE_TTL / 3600, |
| }) |
| return |
|
|
| if parsed.path.startswith("/stream/"): |
| key = parsed.path.split("/stream/", 1)[-1] |
| if not key: |
| self._json(400, {"error": "Missing file key"}) |
| return |
| fpath = cache.get(key) |
| if fpath: |
| range_header = self.headers.get("Range", "") |
| self._stream_file(fpath, key, range_header) |
| else: |
| self._json(404, {"error": "Not in cache. Use /preload first."}) |
| return |
|
|
| if parsed.path == "/list": |
| files = [] |
| for f in sorted(CACHE_DIR.iterdir()): |
| if f.is_file() and not f.name.endswith(".meta"): |
| meta = CACHE_DIR / f"{f.name}.meta" |
| expires = 0 |
| hits = 0 |
| if meta.exists(): |
| try: |
| with open(meta) as mf: |
| m = json.load(mf) |
| expires = m.get("expires", 0) |
| hits = m.get("hits", 0) |
| except: |
| pass |
| files.append({ |
| "key": f.name, |
| "size": f.stat().st_size, |
| "expires_at": datetime.fromtimestamp(expires).isoformat() if expires else "N/A", |
| "hits": hits, |
| }) |
| self._json(200, {"files": files, "count": len(files)}) |
| return |
|
|
| self._json(404, {"error": "Not found. Try /health, /stream/{key}, /list"}) |
|
|
| def do_POST(self): |
| length = int(self.headers.get("Content-Length", 0)) |
| body = json.loads(self.rfile.read(length)) if length else {} |
| parsed = urlparse(self.path) |
| path = parsed.path |
|
|
| if path == "/cache": |
| key = body.get("key", "") |
| url = body.get("url", "") |
| if not key or not url: |
| self._json(400, {"error": "Missing key or url"}) |
| return |
| try: |
| temp = CACHE_DIR / f"dl_{int(time.time()*1000)}_{os.urandom(2).hex()}" |
| req = urllib.request.Request(url, headers={"Authorization": f"Bearer {HF_TOKEN}"}) |
| with urllib.request.urlopen(req, timeout=300) as src: |
| with open(temp, "wb") as f: |
| shutil.copyfileobj(src, f) |
| cache.put(key, temp) |
| temp.unlink() |
| self._json(200, {"status": "cached", "key": key}) |
| except Exception as e: |
| self._json(500, {"error": str(e)[:200]}) |
| return |
|
|
| if path == "/preload": |
| dataset = body.get("dataset", "") |
| file_name = body.get("file_name", "") |
| if not dataset or not file_name: |
| self._json(400, {"error": "Missing dataset or file_name"}) |
| return |
| key = f"{dataset}/{file_name}" |
| dl_url = f"https://huggingface.co/datasets/{dataset}/resolve/main/{file_name}" |
| try: |
| temp = CACHE_DIR / f"dl_{int(time.time()*1000)}_{os.urandom(2).hex()}" |
| req = urllib.request.Request(dl_url, headers={"Authorization": f"Bearer {HF_TOKEN}"}) |
| with urllib.request.urlopen(req, timeout=300) as src: |
| with open(temp, "wb") as f: |
| shutil.copyfileobj(src, f) |
| cache.put(key, temp, "video/mp4") |
| temp.unlink() |
| self._json(200, {"status": "preloaded", "key": key}) |
| except Exception as e: |
| self._json(500, {"error": str(e)[:200]}) |
| return |
|
|
| if path == "/flush": |
| with LOCK: |
| count = 0 |
| for f in CACHE_DIR.iterdir(): |
| if f.is_file(): |
| f.unlink() |
| count += 1 |
| self._json(200, {"status": "flushed", "removed": count}) |
| return |
|
|
| self._json(404, {"error": "Not found"}) |
|
|
| def _stream_file(self, fpath, filename, range_header): |
| file_size = fpath.stat().st_size |
| content_type = "video/mp4" |
|
|
| if range_header: |
| match = re.match(r"bytes=(\d+)-(\d*)", range_header) |
| if match: |
| start = int(match.group(1)) |
| end = int(match.group(2)) if match.group(2) else file_size - 1 |
| length = end - start + 1 |
| self.send_response(206) |
| self.send_header("Content-Range", f"bytes {start}-{end}/{file_size}") |
| else: |
| start, end, length = 0, file_size - 1, file_size |
| self.send_response(200) |
| else: |
| start, end, length = 0, file_size - 1, file_size |
| self.send_response(200) |
|
|
| self.send_header("Content-Type", content_type) |
| self.send_header("Content-Length", str(length)) |
| self.send_header("Accept-Ranges", "bytes") |
| self.send_header("Cache-Control", f"public, max-age={CACHE_TTL}") |
| self.send_header("Content-Disposition", f'inline; filename="{filename}"') |
| self.end_headers() |
|
|
| with open(fpath, "rb") as f: |
| f.seek(start) |
| remaining = length |
| while remaining > 0: |
| chunk_size = min(65536, remaining) |
| chunk = f.read(chunk_size) |
| if not chunk: |
| break |
| self.wfile.write(chunk) |
| remaining -= len(chunk) |
|
|
| def _json(self, code, data): |
| self.send_response(code) |
| self.send_header("Content-Type", "application/json") |
| self.send_header("Access-Control-Allow-Origin", "*") |
| self.end_headers() |
| self.wfile.write(json.dumps(data, ensure_ascii=False, default=str).encode()) |
|
|
| def log_message(self, fmt, *args): |
| pass |
|
|
| if __name__ == "__main__": |
| port = int(os.environ.get("PORT", 7860)) |
| print(f"Proxy v2 on :{port}") |
| print(f"Cache: {CACHE_DIR} | TTL: {CACHE_TTL}s | Max: {MAX_CACHE_BYTES/(1024**3):.0f}GB") |
| httpd = http.server.HTTPServer(("0.0.0.0", port), Handler) |
| httpd.serve_forever() |
|
|