SalexAI commited on
Commit
05a8e3b
·
verified ·
1 Parent(s): e8f2302

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -16
app.py CHANGED
@@ -39,8 +39,9 @@ HF_REPO_TYPE = "dataset"
39
  TOTAL_DIVISIONS = 25
40
  VALID_DIVISIONS = set(range(1, TOTAL_DIVISIONS + 1))
41
 
42
- # Local Temporary Storage (for processing before upload)
43
- TEMP_DIR = tempfile.mkdtemp()
 
44
 
45
  # ==================================================
46
  # ADMIN AUTH
@@ -109,7 +110,8 @@ api = HfApi(token=HF_TOKEN)
109
 
110
  def get_hf_url(path_in_repo: str) -> str:
111
  """Returns the direct download URL for a file in the dataset."""
112
- return f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{path_in_repo}"
 
113
 
114
  async def sync_pull_json(filename: str, default: dict) -> dict:
115
  """Download JSON from HF. Returns default if not found."""
@@ -159,10 +161,16 @@ async def sync_push_json(filename: str, data: dict):
159
  except Exception as e:
160
  logging.error(f"Failed to push {filename} to HF: {e}")
161
 
162
- async def sync_push_file(local_path: str, remote_path: str):
163
- """Upload media file to HF."""
164
  if not HF_TOKEN:
165
- return
 
 
 
 
 
 
166
  try:
167
  loop = asyncio.get_event_loop()
168
  await loop.run_in_executor(
@@ -175,8 +183,12 @@ async def sync_push_file(local_path: str, remote_path: str):
175
  commit_message=f"Add media {remote_path}"
176
  )
177
  )
 
 
 
178
  except Exception as e:
179
  logging.error(f"Failed to push media {remote_path}: {e}")
 
180
 
181
  # ==================================================
182
  # STATE MANAGEMENT
@@ -277,13 +289,20 @@ async def get_asset_urls(base_url: str, guids: list) -> dict:
277
  return (await post_json("webasseturls", base_url, payload)).get("items", {})
278
 
279
  async def download_to_temp(url: str, filename: str) -> str:
 
280
  path = os.path.join(TEMP_DIR, filename)
 
 
 
 
 
281
  client = await get_client()
282
  async with client.stream("GET", url) as r:
283
  r.raise_for_status()
284
  with open(path, "wb") as f:
285
  async for chunk in r.aiter_bytes():
286
  f.write(chunk)
 
287
  return path
288
 
289
  # ==================================================
@@ -295,7 +314,13 @@ async def poll_album(token: str):
295
  base_url = await get_redirected_base_url(base_url, token)
296
 
297
  metadata = await get_metadata(base_url)
 
 
 
298
  guids = [p["photoGuid"] for p in metadata]
 
 
 
299
  assets = await get_asset_urls(base_url, guids)
300
 
301
  async with DATA_LOCK:
@@ -326,33 +351,54 @@ async def poll_album(token: str):
326
  asset = assets[best["checksum"]]
327
  video_url = f"https://{asset['url_location']}{asset['url_path']}"
328
 
329
- # Download to temp then Push to HF
330
  temp_vid = await download_to_temp(video_url, f"{vid}.mp4")
331
  hf_vid_path = f"videos/{token}/{vid}.mp4"
332
- await sync_push_file(temp_vid, hf_vid_path)
333
- os.remove(temp_vid)
334
-
335
- # Handle Thumbnail
336
- hf_thumb_path = ""
 
 
 
 
 
 
 
 
 
 
337
  pf = derivatives.get("PosterFrame")
338
  if pf and pf.get("checksum") in assets:
339
  pf_asset = assets[pf["checksum"]]
340
  poster_url = f"https://{pf_asset['url_location']}{pf_asset['url_path']}"
 
341
  temp_thumb = await download_to_temp(poster_url, f"{vid}.jpg")
342
  hf_thumb_path = f"videos/{token}/{vid}.jpg"
343
- await sync_push_file(temp_thumb, hf_thumb_path)
344
- os.remove(temp_thumb)
 
 
 
 
 
 
 
 
 
 
345
 
346
  new_entries.append({
347
  "id": vid,
348
  "name": photo.get("caption") or "Untitled",
349
  "video_url": get_hf_url(hf_vid_path),
350
- "thumbnail": get_hf_url(hf_thumb_path) if hf_thumb_path else "",
351
  "upload_date": photo.get("creationDate") or datetime.now(timezone.utc).isoformat(),
352
  "category": category,
353
  "publisher": publisher,
354
  "source_album": token,
355
- "allowed_divs": [] # Default: Allowed for all
356
  })
357
 
358
  if new_entries:
 
39
  TOTAL_DIVISIONS = 25
40
  VALID_DIVISIONS = set(range(1, TOTAL_DIVISIONS + 1))
41
 
42
+ # Local Temporary Storage (Fixed to use local dir to avoid permission issues)
43
+ TEMP_DIR = os.path.abspath("temp_downloads")
44
+ os.makedirs(TEMP_DIR, exist_ok=True)
45
 
46
  # ==================================================
47
  # ADMIN AUTH
 
110
 
111
  def get_hf_url(path_in_repo: str) -> str:
112
  """Returns the direct download URL for a file in the dataset."""
113
+ # Using ?download=true helps force the browser to treat it as a file
114
+ return f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{path_in_repo}?download=true"
115
 
116
  async def sync_pull_json(filename: str, default: dict) -> dict:
117
  """Download JSON from HF. Returns default if not found."""
 
161
  except Exception as e:
162
  logging.error(f"Failed to push {filename} to HF: {e}")
163
 
164
+ async def sync_push_file(local_path: str, remote_path: str) -> bool:
165
+ """Upload media file to HF. Returns True if successful."""
166
  if not HF_TOKEN:
167
+ return False
168
+
169
+ # Verify file exists and is readable
170
+ if not os.path.exists(local_path):
171
+ logging.error(f"Upload failed: Local file not found at {local_path}")
172
+ return False
173
+
174
  try:
175
  loop = asyncio.get_event_loop()
176
  await loop.run_in_executor(
 
183
  commit_message=f"Add media {remote_path}"
184
  )
185
  )
186
+ # Small delay to let HF API catch up and prevent 429/File Lock issues
187
+ await asyncio.sleep(2.0)
188
+ return True
189
  except Exception as e:
190
  logging.error(f"Failed to push media {remote_path}: {e}")
191
+ return False
192
 
193
  # ==================================================
194
  # STATE MANAGEMENT
 
289
  return (await post_json("webasseturls", base_url, payload)).get("items", {})
290
 
291
  async def download_to_temp(url: str, filename: str) -> str:
292
+ # Use the explicitly created local temp directory
293
  path = os.path.join(TEMP_DIR, filename)
294
+
295
+ # Remove if exists to ensure clean slate
296
+ if os.path.exists(path):
297
+ os.remove(path)
298
+
299
  client = await get_client()
300
  async with client.stream("GET", url) as r:
301
  r.raise_for_status()
302
  with open(path, "wb") as f:
303
  async for chunk in r.aiter_bytes():
304
  f.write(chunk)
305
+
306
  return path
307
 
308
  # ==================================================
 
314
  base_url = await get_redirected_base_url(base_url, token)
315
 
316
  metadata = await get_metadata(base_url)
317
+ if not metadata:
318
+ return # Empty album, skip to prevent errors
319
+
320
  guids = [p["photoGuid"] for p in metadata]
321
+ if not guids:
322
+ return
323
+
324
  assets = await get_asset_urls(base_url, guids)
325
 
326
  async with DATA_LOCK:
 
351
  asset = assets[best["checksum"]]
352
  video_url = f"https://{asset['url_location']}{asset['url_path']}"
353
 
354
+ # --- PROCESS VIDEO ---
355
  temp_vid = await download_to_temp(video_url, f"{vid}.mp4")
356
  hf_vid_path = f"videos/{token}/{vid}.mp4"
357
+
358
+ vid_success = await sync_push_file(temp_vid, hf_vid_path)
359
+
360
+ # Cleanup temp video safely
361
+ try:
362
+ if os.path.exists(temp_vid):
363
+ os.remove(temp_vid)
364
+ except Exception as e:
365
+ logging.warning(f"Cleanup error for {vid}: {e}")
366
+
367
+ if not vid_success:
368
+ continue # Skip entry if video upload failed
369
+
370
+ # --- PROCESS THUMBNAIL ---
371
+ final_thumb_url = ""
372
  pf = derivatives.get("PosterFrame")
373
  if pf and pf.get("checksum") in assets:
374
  pf_asset = assets[pf["checksum"]]
375
  poster_url = f"https://{pf_asset['url_location']}{pf_asset['url_path']}"
376
+
377
  temp_thumb = await download_to_temp(poster_url, f"{vid}.jpg")
378
  hf_thumb_path = f"videos/{token}/{vid}.jpg"
379
+
380
+ thumb_success = await sync_push_file(temp_thumb, hf_thumb_path)
381
+
382
+ # Cleanup temp thumb safely
383
+ try:
384
+ if os.path.exists(temp_thumb):
385
+ os.remove(temp_thumb)
386
+ except Exception as e:
387
+ logging.warning(f"Cleanup error for thumb {vid}: {e}")
388
+
389
+ if thumb_success:
390
+ final_thumb_url = get_hf_url(hf_thumb_path)
391
 
392
  new_entries.append({
393
  "id": vid,
394
  "name": photo.get("caption") or "Untitled",
395
  "video_url": get_hf_url(hf_vid_path),
396
+ "thumbnail": final_thumb_url,
397
  "upload_date": photo.get("creationDate") or datetime.now(timezone.utc).isoformat(),
398
  "category": category,
399
  "publisher": publisher,
400
  "source_album": token,
401
+ "allowed_divs": []
402
  })
403
 
404
  if new_entries: