jamelloverz-sketch commited on
Commit
938468e
·
1 Parent(s): d54156e

v3 stream-through; forward range to HF; cache on full request

Browse files
Files changed (1) hide show
  1. app.py +34 -40
app.py CHANGED
@@ -307,12 +307,14 @@ class Handler(http.server.BaseHTTPRequestHandler):
307
  remaining -= len(chunk)
308
 
309
  def _stream_from_hf_with_cache(self, key, dataset, file_path):
310
- """Stream from HF dataset directly, caching the file simultaneously."""
311
  dl_url = f"https://huggingface.co/datasets/{dataset}/resolve/main/{file_path}"
312
- req_headers = {"Authorization": f"Bearer {HF_TOKEN}"}
313
-
314
  client_range = self.headers.get("Range", "")
315
 
 
 
 
 
316
  try:
317
  hf_req = urllib.request.Request(dl_url, headers=req_headers)
318
  hf_resp = urllib.request.urlopen(hf_req, timeout=300)
@@ -322,25 +324,21 @@ class Handler(http.server.BaseHTTPRequestHandler):
322
  return
323
 
324
  total = int(hf_resp.getheader("Content-Length", 0) or 0)
325
-
326
- if client_range:
327
- match = re.match(r"bytes=(\d+)-(\d*)", client_range)
328
- if match:
329
- cs = int(match.group(1))
330
- ce = int(match.group(2)) if match.group(2) else total - 1
331
- clen = ce - cs + 1
332
- self.send_response(206)
333
- self.send_header("Content-Range", f"bytes {cs}-{ce}/{total}")
334
- else:
335
- cs, ce, clen = 0, total - 1, total
336
- self.send_response(200)
337
  else:
338
- cs, ce, clen = 0, total - 1, total
339
  self.send_response(200)
340
 
341
- ct = hf_resp.getheader("Content-Type", "video/mp4")
342
- self.send_header("Content-Type", ct)
343
- self.send_header("Content-Length", str(clen))
344
  self.send_header("Accept-Ranges", "bytes")
345
  self.send_header("Cache-Control", f"public, max-age={CACHE_TTL}")
346
  self.send_header("Content-Disposition", f'inline; filename="{key}"')
@@ -348,34 +346,30 @@ class Handler(http.server.BaseHTTPRequestHandler):
348
  self.send_header("Access-Control-Expose-Headers", "*")
349
  self.end_headers()
350
 
351
- temp = CACHE_DIR / f"dl_{int(time.time()*1000)}_{os.urandom(2).hex()}"
 
352
  written = 0
353
- skipped = 0
354
  try:
355
- with open(temp, "wb") as tmpf:
356
- while True:
357
- chunk = hf_resp.read(65536)
358
- if not chunk:
359
- break
 
360
  tmpf.write(chunk)
361
- written += len(chunk)
362
- if skipped < cs:
363
- need = cs - skipped
364
- if len(chunk) <= need:
365
- skipped += len(chunk)
366
- continue
367
- chunk = chunk[need:]
368
- skipped = cs
369
- self.wfile.write(chunk)
370
- self.wfile.flush()
371
  except (BrokenPipeError, ConnectionResetError):
372
  pass
373
  finally:
374
  hf_resp.close()
375
-
376
- if total > 0 and written >= total * 0.8:
377
- cache.put(key, temp, ct)
378
- temp.unlink(missing_ok=True)
379
 
380
  def _json(self, code, data):
381
  self.send_response(code)
 
307
  remaining -= len(chunk)
308
 
309
  def _stream_from_hf_with_cache(self, key, dataset, file_path):
310
+ """Stream from HF dataset if full file requested, cache it; otherwise forward range."""
311
  dl_url = f"https://huggingface.co/datasets/{dataset}/resolve/main/{file_path}"
 
 
312
  client_range = self.headers.get("Range", "")
313
 
314
+ req_headers = {"Authorization": f"Bearer {HF_TOKEN}"}
315
+ if client_range:
316
+ req_headers["Range"] = client_range
317
+
318
  try:
319
  hf_req = urllib.request.Request(dl_url, headers=req_headers)
320
  hf_resp = urllib.request.urlopen(hf_req, timeout=300)
 
324
  return
325
 
326
  total = int(hf_resp.getheader("Content-Length", 0) or 0)
327
+ hf_ct = hf_resp.getheader("Content-Type", "video/mp4")
328
+ hf_cr = hf_resp.getheader("Content-Range", "")
329
+
330
+ if hf_cr:
331
+ self.send_response(206)
332
+ self.send_header("Content-Range", hf_cr)
333
+ elif client_range:
334
+ self.send_response(206)
335
+ if total:
336
+ self.send_header("Content-Range", f"bytes 0-{total-1}/{total}")
 
 
337
  else:
 
338
  self.send_response(200)
339
 
340
+ self.send_header("Content-Type", hf_ct)
341
+ self.send_header("Content-Length", str(total))
 
342
  self.send_header("Accept-Ranges", "bytes")
343
  self.send_header("Cache-Control", f"public, max-age={CACHE_TTL}")
344
  self.send_header("Content-Disposition", f'inline; filename="{key}"')
 
346
  self.send_header("Access-Control-Expose-Headers", "*")
347
  self.end_headers()
348
 
349
+ do_cache = not client_range and total > 0
350
+ temp = CACHE_DIR / f"dl_{int(time.time()*1000)}_{os.urandom(2).hex()}" if do_cache else None
351
  written = 0
 
352
  try:
353
+ tmpf = open(temp, "wb") if temp else None
354
+ while True:
355
+ chunk = hf_resp.read(65536)
356
+ if not chunk:
357
+ break
358
+ if tmpf:
359
  tmpf.write(chunk)
360
+ self.wfile.write(chunk)
361
+ self.wfile.flush()
362
+ written += len(chunk)
363
+ if tmpf:
364
+ tmpf.close()
 
 
 
 
 
365
  except (BrokenPipeError, ConnectionResetError):
366
  pass
367
  finally:
368
  hf_resp.close()
369
+ if temp and written > 0 and written == total:
370
+ cache.put(key, temp, hf_ct)
371
+ if temp:
372
+ temp.unlink(missing_ok=True)
373
 
374
  def _json(self, code, data):
375
  self.send_response(code)