SalexAI commited on
Commit
1e19ce0
·
verified ·
1 Parent(s): db62cd7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -222
app.py CHANGED
@@ -1,14 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
1
  from fastapi import FastAPI, Request, Form, HTTPException, Query
2
  from fastapi.middleware.cors import CORSMiddleware
3
  from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
4
  import httpx
5
  import json
6
  import logging
7
- import os
8
  import asyncio
9
  import secrets
10
  import shutil
11
- import tempfile
12
  import html as html_escape_lib
13
  from typing import Optional, List
14
  from datetime import datetime, timezone
@@ -44,7 +53,6 @@ TEMP_DIR = "/tmp/fundata_downloads"
44
  try:
45
  os.makedirs(TEMP_DIR, exist_ok=True)
46
  except OSError:
47
- # Fallback to python's temp dir if /tmp/fundata_downloads fails
48
  TEMP_DIR = tempfile.mkdtemp()
49
 
50
  # ==================================================
@@ -93,8 +101,6 @@ ALBUM_CATEGORIES = dict(DEFAULT_ALBUM_CATEGORIES)
93
  # ==================================================
94
  # LOCKS & STATE
95
  # ==================================================
96
- # We use in-memory cache for the index to avoid spamming HF API,
97
- # but we write back to HF on changes.
98
  INDEX_CACHE = {"videos": []}
99
  CONFIG_CACHE = {}
100
  DATA_LOCK = asyncio.Lock()
@@ -114,13 +120,11 @@ api = HfApi(token=HF_TOKEN)
114
 
115
  def get_hf_url(path_in_repo: str) -> str:
116
  """Returns the direct download URL for a file in the dataset."""
117
- # Using ?download=true helps force the browser to treat it as a file
118
  return f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{path_in_repo}?download=true"
119
 
120
  async def sync_pull_json(filename: str, default: dict) -> dict:
121
  """Download JSON from HF. Returns default if not found."""
122
  try:
123
- # Run blocking HF call in thread
124
  loop = asyncio.get_event_loop()
125
  local_path = await loop.run_in_executor(
126
  None,
@@ -129,6 +133,8 @@ async def sync_pull_json(filename: str, default: dict) -> dict:
129
  filename=filename,
130
  repo_type=HF_REPO_TYPE,
131
  token=HF_TOKEN,
 
 
132
  local_dir=TEMP_DIR
133
  )
134
  )
@@ -145,12 +151,10 @@ async def sync_push_json(filename: str, data: dict):
145
  return
146
 
147
  try:
148
- # Save to temp
149
  temp_path = os.path.join(TEMP_DIR, filename)
150
  with open(temp_path, "w", encoding="utf-8") as f:
151
  json.dump(data, f, indent=2)
152
 
153
- # Upload
154
  loop = asyncio.get_event_loop()
155
  await loop.run_in_executor(
156
  None,
@@ -170,7 +174,6 @@ async def sync_push_file(local_path: str, remote_path: str) -> bool:
170
  if not HF_TOKEN:
171
  return False
172
 
173
- # Verify file exists and is readable
174
  if not os.path.exists(local_path):
175
  logging.error(f"Upload failed: Local file not found at {local_path}")
176
  return False
@@ -187,7 +190,6 @@ async def sync_push_file(local_path: str, remote_path: str) -> bool:
187
  commit_message=f"Add media {remote_path}"
188
  )
189
  )
190
- # Small delay to let HF API catch up and prevent 429/File Lock issues
191
  await asyncio.sleep(2.0)
192
  return True
193
  except Exception as e:
@@ -213,7 +215,6 @@ async def load_state_from_hf():
213
  ALBUM_CATEGORIES = dict(CONFIG_CACHE.get("album_categories", {}))
214
 
215
  async def save_index():
216
- # Caller should hold lock usually, but atomic write helps
217
  await sync_push_json("index.json", INDEX_CACHE)
218
 
219
  async def save_config():
@@ -239,9 +240,8 @@ async def backfill_index_categories():
239
  v["category"] = correct_category
240
  changed = True
241
 
242
- # Ensure allowed_divs exists
243
  if "allowed_divs" not in v:
244
- v["allowed_divs"] = [] # Default to empty (Global)
245
  changed = True
246
 
247
  if changed:
@@ -293,10 +293,7 @@ async def get_asset_urls(base_url: str, guids: list) -> dict:
293
  return (await post_json("webasseturls", base_url, payload)).get("items", {})
294
 
295
  async def download_to_temp(url: str, filename: str) -> str:
296
- # Use the explicitly created safe temp directory
297
  path = os.path.join(TEMP_DIR, filename)
298
-
299
- # Remove if exists to ensure clean slate
300
  if os.path.exists(path):
301
  os.remove(path)
302
 
@@ -306,7 +303,6 @@ async def download_to_temp(url: str, filename: str) -> str:
306
  with open(path, "wb") as f:
307
  async for chunk in r.aiter_bytes():
308
  f.write(chunk)
309
-
310
  return path
311
 
312
  # ==================================================
@@ -319,7 +315,7 @@ async def poll_album(token: str):
319
 
320
  metadata = await get_metadata(base_url)
321
  if not metadata:
322
- return # Empty album, skip to prevent errors
323
 
324
  guids = [p["photoGuid"] for p in metadata]
325
  if not guids:
@@ -355,13 +351,11 @@ async def poll_album(token: str):
355
  asset = assets[best["checksum"]]
356
  video_url = f"https://{asset['url_location']}{asset['url_path']}"
357
 
358
- # --- PROCESS VIDEO ---
359
  temp_vid = await download_to_temp(video_url, f"{vid}.mp4")
360
  hf_vid_path = f"videos/{token}/{vid}.mp4"
361
 
362
  vid_success = await sync_push_file(temp_vid, hf_vid_path)
363
 
364
- # Cleanup temp video safely
365
  try:
366
  if os.path.exists(temp_vid):
367
  os.remove(temp_vid)
@@ -369,9 +363,8 @@ async def poll_album(token: str):
369
  logging.warning(f"Cleanup error for {vid}: {e}")
370
 
371
  if not vid_success:
372
- continue # Skip entry if video upload failed
373
 
374
- # --- PROCESS THUMBNAIL ---
375
  final_thumb_url = ""
376
  pf = derivatives.get("PosterFrame")
377
  if pf and pf.get("checksum") in assets:
@@ -383,7 +376,6 @@ async def poll_album(token: str):
383
 
384
  thumb_success = await sync_push_file(temp_thumb, hf_thumb_path)
385
 
386
- # Cleanup temp thumb safely
387
  try:
388
  if os.path.exists(temp_thumb):
389
  os.remove(temp_thumb)
@@ -439,213 +431,16 @@ async def start_polling():
439
  # ==================================================
440
  @app.get("/feed/videos")
441
  async def get_video_feed(div: Optional[int] = Query(None, description="School Division (1-25)")):
442
- """
443
- Returns videos.
444
- If 'div' is provided, returns:
445
- 1. Videos with NO specific allowed_divs (Global).
446
- 2. Videos where 'div' is explicitly in allowed_divs.
447
- """
448
  async with DATA_LOCK:
449
  videos = INDEX_CACHE.get("videos", [])
450
 
451
- # If no div specified, return everything? Or everything that isn't restricted?
452
- # Usually feed returns everything accessible.
453
  if div is None:
454
  return {"videos": videos}
455
 
456
- # Validate Div
457
  if div not in VALID_DIVISIONS:
458
- # If invalid div provided, maybe return empty or error?
459
- # For safety, let's return empty or just global ones.
460
  return {"videos": [v for v in videos if not v.get("allowed_divs")]}
461
 
462
  filtered_videos = []
463
  for v in videos:
464
  allowed = v.get("allowed_divs", [])
465
- # Include if: No restrictions defined OR Div is in list
466
- if not allowed or div in allowed:
467
- filtered_videos.append(v)
468
-
469
- return {"videos": filtered_videos}
470
-
471
- # ==================================================
472
- # ADMIN: LOGIN
473
- # ==================================================
474
- @app.get("/admin/login", response_class=HTMLResponse)
475
- async def admin_login_page():
476
- if not admin_enabled():
477
- return HTMLResponse("Admin disabled (ADMIN_KEY missing)", status_code=503)
478
- return """
479
- <html><body style="background:#111;color:#fff;display:flex;justify-content:center;align-items:center;height:100vh;font-family:sans-serif;">
480
- <form method="post" style="padding:20px;border:1px solid #333;border-radius:10px;background:#1a1a1a;">
481
- <h2>Admin</h2><input type="password" name="key" placeholder="Key" style="padding:10px;width:100%;margin-bottom:10px;">
482
- <button style="padding:10px;width:100%;cursor:pointer;">Login</button>
483
- </form></body></html>
484
- """
485
-
486
- @app.post("/admin/login")
487
- async def admin_login(key: str = Form(...)):
488
- if not admin_enabled() or not secure_equals(key.strip(), str(ADMIN_KEY).strip()):
489
- return HTMLResponse("Unauthorized", status_code=401)
490
- session = secrets.token_hex(16)
491
- ADMIN_SESSIONS.add(session)
492
- resp = RedirectResponse("/admin", status_code=302)
493
- resp.set_cookie(ADMIN_COOKIE, session, httponly=True)
494
- return resp
495
-
496
- @app.get("/admin/logout")
497
- async def admin_logout(req: Request):
498
- if (s := req.cookies.get(ADMIN_COOKIE)) in ADMIN_SESSIONS: ADMIN_SESSIONS.remove(s)
499
- return RedirectResponse("/admin/login")
500
-
501
- # ==================================================
502
- # ADMIN DASHBOARD
503
- # ==================================================
504
- def esc(v) -> str: return html_escape_lib.escape("" if v is None else str(v), quote=True)
505
-
506
- ADMIN_TEMPLATE = """
507
- <html>
508
- <head>
509
- <title>Admin</title>
510
- <style>
511
- body{font-family:sans-serif;background:#111;color:#ddd;padding:20px;}
512
- table{width:100%;border-collapse:collapse;margin-top:20px;}
513
- th,td{border-bottom:1px solid #333;padding:8px;text-align:left;}
514
- input{background:#222;border:1px solid #444;color:#fff;padding:5px;border-radius:4px;width:100%;}
515
- button{background:#444;color:#fff;border:none;padding:5px 10px;cursor:pointer;border-radius:4px;}
516
- .pill{padding:4px 8px;background:#004400;border-radius:10px;font-size:0.8em;}
517
- </style>
518
- </head>
519
- <body>
520
- <h1>Admin Panel <a href="/admin/logout" style="font-size:0.5em;color:#888;">Logout</a></h1>
521
-
522
- <h3>Albums</h3>
523
- <table>
524
- <tr><th>Token</th><th>Publisher</th><th>Category</th><th>Action</th></tr>
525
- __ALBUM_ROWS__
526
- <tr>
527
- <td><input id="n_t" placeholder="Token"></td>
528
- <td><input id="n_p" placeholder="Publisher"></td>
529
- <td><input id="n_c" placeholder="Category"></td>
530
- <td><button onclick="addAlbum()">Add</button></td>
531
- </tr>
532
- </table>
533
-
534
- <h3>Videos</h3>
535
- <p style="font-size:0.8em;color:#888;">Allowed Divs: Comma separated (e.g. <code>1,5,25</code>). Leave empty for ALL.</p>
536
- <table>
537
- <tr><th>ID</th><th>Name</th><th>Divs (1-25)</th><th>Category</th><th>Publisher</th><th>Action</th></tr>
538
- __VIDEO_ROWS__
539
- </table>
540
-
541
- <script>
542
- async function api(ep, data) {
543
- await fetch(ep, {method:'POST', headers:{'Content-Type':'application/json'}, body:JSON.stringify(data)});
544
- location.reload();
545
- }
546
- document.querySelectorAll('input[data-id]').forEach(i => {
547
- i.addEventListener('change', (e) => api('/admin/update', {
548
- id: e.target.dataset.id,
549
- field: e.target.dataset.field,
550
- value: e.target.value
551
- }));
552
- });
553
- function addAlbum(){
554
- api('/admin/albums/add', {
555
- token: document.getElementById('n_t').value,
556
- publisher: document.getElementById('n_p').value,
557
- category: document.getElementById('n_c').value
558
- });
559
- }
560
- </script>
561
- </body>
562
- </html>
563
- """
564
-
565
- @app.get("/admin", response_class=HTMLResponse)
566
- async def admin_dash(req: Request):
567
- if not is_admin(req): return RedirectResponse("/admin/login")
568
-
569
- async with DATA_LOCK:
570
- videos = INDEX_CACHE.get("videos", [])
571
-
572
- v_rows = ""
573
- for v in videos:
574
- divs = ",".join(map(str, v.get("allowed_divs", [])))
575
- v_rows += f"""<tr>
576
- <td>{esc(v['id'])[:8]}...</td>
577
- <td><input data-id="{v['id']}" data-field="name" value="{esc(v.get('name'))}"></td>
578
- <td><input data-id="{v['id']}" data-field="allowed_divs" value="{esc(divs)}" placeholder="All"></td>
579
- <td><input data-id="{v['id']}" data-field="category" value="{esc(v.get('category'))}"></td>
580
- <td><input data-id="{v['id']}" data-field="publisher" value="{esc(v.get('publisher'))}"></td>
581
- <td><button onclick="api('/admin/videos/delete', {{id:'{v['id']}'}})" style="background:#500;">Del</button></td>
582
- </tr>"""
583
-
584
- a_rows = ""
585
- for t, p in ALBUM_PUBLISHERS.items():
586
- c = ALBUM_CATEGORIES.get(t, "")
587
- a_rows += f"<tr><td>{t}</td><td>{p}</td><td>{c}</td><td>-</td></tr>"
588
-
589
- return ADMIN_TEMPLATE.replace("__VIDEO_ROWS__", v_rows).replace("__ALBUM_ROWS__", a_rows)
590
-
591
- # ==================================================
592
- # ADMIN ACTIONS
593
- # ==================================================
594
- @app.post("/admin/update")
595
- async def admin_update(req: Request, payload: dict):
596
- if not is_admin(req): return JSONResponse({}, 403)
597
-
598
- vid_id = payload.get("id")
599
- field = payload.get("field")
600
- value = payload.get("value")
601
-
602
- async with DATA_LOCK:
603
- for v in INDEX_CACHE["videos"]:
604
- if v["id"] == vid_id:
605
- if field == "allowed_divs":
606
- # Parse comma string to list of ints
607
- try:
608
- if not value.strip():
609
- v[field] = []
610
- else:
611
- # Filter only valid integers 1-25
612
- nums = [int(x.strip()) for x in value.split(",") if x.strip().isdigit()]
613
- v[field] = [n for n in nums if n in VALID_DIVISIONS]
614
- except:
615
- pass # Ignore bad input
616
- else:
617
- v[field] = value
618
-
619
- await save_index()
620
- return {"ok": True}
621
- return {"error": "not found"}
622
-
623
- @app.post("/admin/videos/delete")
624
- async def admin_delete(req: Request, payload: dict):
625
- if not is_admin(req): return JSONResponse({}, 403)
626
- vid_id = payload.get("id")
627
-
628
- async with DATA_LOCK:
629
- # We only remove from index. We don't delete actual files from HF to avoid complexity
630
- # (Git operations are heavy), but removing from index hides them from feed.
631
- original_len = len(INDEX_CACHE["videos"])
632
- INDEX_CACHE["videos"] = [v for v in INDEX_CACHE["videos"] if v["id"] != vid_id]
633
-
634
- if len(INDEX_CACHE["videos"]) < original_len:
635
- await save_index()
636
-
637
- return {"ok": True}
638
-
639
- @app.post("/admin/albums/add")
640
- async def admin_album_add(req: Request, payload: dict):
641
- if not is_admin(req): return JSONResponse({}, 403)
642
- t = payload.get("token")
643
- if t:
644
- async with DATA_LOCK:
645
- CONFIG_CACHE["album_publishers"][t] = payload.get("publisher", "Unknown")
646
- CONFIG_CACHE["album_categories"][t] = payload.get("category", "Uncategorized")
647
- global ALBUM_PUBLISHERS, ALBUM_CATEGORIES
648
- ALBUM_PUBLISHERS = CONFIG_CACHE["album_publishers"]
649
- ALBUM_CATEGORIES = CONFIG_CACHE["album_categories"]
650
- await save_config()
651
- return {"ok": True}
 
1
+ import os
2
+ import tempfile
3
+
4
+ # ==================================================
5
+ # CRITICAL FIX: SET HF CACHE TO WRITABLE DIR
6
+ # ==================================================
7
+ # Hugging Face libraries try to write to /home/user/.cache by default,
8
+ # which is read-only in some Space configurations. We map it to /tmp.
9
+ os.environ["HF_HOME"] = "/tmp/hf_home"
10
+ os.environ["XDG_CACHE_HOME"] = "/tmp/xdg_cache"
11
+
12
  from fastapi import FastAPI, Request, Form, HTTPException, Query
13
  from fastapi.middleware.cors import CORSMiddleware
14
  from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
15
  import httpx
16
  import json
17
  import logging
 
18
  import asyncio
19
  import secrets
20
  import shutil
 
21
  import html as html_escape_lib
22
  from typing import Optional, List
23
  from datetime import datetime, timezone
 
53
  try:
54
  os.makedirs(TEMP_DIR, exist_ok=True)
55
  except OSError:
 
56
  TEMP_DIR = tempfile.mkdtemp()
57
 
58
  # ==================================================
 
101
  # ==================================================
102
  # LOCKS & STATE
103
  # ==================================================
 
 
104
  INDEX_CACHE = {"videos": []}
105
  CONFIG_CACHE = {}
106
  DATA_LOCK = asyncio.Lock()
 
120
 
121
  def get_hf_url(path_in_repo: str) -> str:
122
  """Returns the direct download URL for a file in the dataset."""
 
123
  return f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{path_in_repo}?download=true"
124
 
125
  async def sync_pull_json(filename: str, default: dict) -> dict:
126
  """Download JSON from HF. Returns default if not found."""
127
  try:
 
128
  loop = asyncio.get_event_loop()
129
  local_path = await loop.run_in_executor(
130
  None,
 
133
  filename=filename,
134
  repo_type=HF_REPO_TYPE,
135
  token=HF_TOKEN,
136
+ # Explicitly use cache_dir to avoid permission errors
137
+ cache_dir="/tmp/hf_cache",
138
  local_dir=TEMP_DIR
139
  )
140
  )
 
151
  return
152
 
153
  try:
 
154
  temp_path = os.path.join(TEMP_DIR, filename)
155
  with open(temp_path, "w", encoding="utf-8") as f:
156
  json.dump(data, f, indent=2)
157
 
 
158
  loop = asyncio.get_event_loop()
159
  await loop.run_in_executor(
160
  None,
 
174
  if not HF_TOKEN:
175
  return False
176
 
 
177
  if not os.path.exists(local_path):
178
  logging.error(f"Upload failed: Local file not found at {local_path}")
179
  return False
 
190
  commit_message=f"Add media {remote_path}"
191
  )
192
  )
 
193
  await asyncio.sleep(2.0)
194
  return True
195
  except Exception as e:
 
215
  ALBUM_CATEGORIES = dict(CONFIG_CACHE.get("album_categories", {}))
216
 
217
  async def save_index():
 
218
  await sync_push_json("index.json", INDEX_CACHE)
219
 
220
  async def save_config():
 
240
  v["category"] = correct_category
241
  changed = True
242
 
 
243
  if "allowed_divs" not in v:
244
+ v["allowed_divs"] = []
245
  changed = True
246
 
247
  if changed:
 
293
  return (await post_json("webasseturls", base_url, payload)).get("items", {})
294
 
295
  async def download_to_temp(url: str, filename: str) -> str:
 
296
  path = os.path.join(TEMP_DIR, filename)
 
 
297
  if os.path.exists(path):
298
  os.remove(path)
299
 
 
303
  with open(path, "wb") as f:
304
  async for chunk in r.aiter_bytes():
305
  f.write(chunk)
 
306
  return path
307
 
308
  # ==================================================
 
315
 
316
  metadata = await get_metadata(base_url)
317
  if not metadata:
318
+ return
319
 
320
  guids = [p["photoGuid"] for p in metadata]
321
  if not guids:
 
351
  asset = assets[best["checksum"]]
352
  video_url = f"https://{asset['url_location']}{asset['url_path']}"
353
 
 
354
  temp_vid = await download_to_temp(video_url, f"{vid}.mp4")
355
  hf_vid_path = f"videos/{token}/{vid}.mp4"
356
 
357
  vid_success = await sync_push_file(temp_vid, hf_vid_path)
358
 
 
359
  try:
360
  if os.path.exists(temp_vid):
361
  os.remove(temp_vid)
 
363
  logging.warning(f"Cleanup error for {vid}: {e}")
364
 
365
  if not vid_success:
366
+ continue
367
 
 
368
  final_thumb_url = ""
369
  pf = derivatives.get("PosterFrame")
370
  if pf and pf.get("checksum") in assets:
 
376
 
377
  thumb_success = await sync_push_file(temp_thumb, hf_thumb_path)
378
 
 
379
  try:
380
  if os.path.exists(temp_thumb):
381
  os.remove(temp_thumb)
 
431
  # ==================================================
432
  @app.get("/feed/videos")
433
  async def get_video_feed(div: Optional[int] = Query(None, description="School Division (1-25)")):
 
 
 
 
 
 
434
  async with DATA_LOCK:
435
  videos = INDEX_CACHE.get("videos", [])
436
 
 
 
437
  if div is None:
438
  return {"videos": videos}
439
 
 
440
  if div not in VALID_DIVISIONS:
 
 
441
  return {"videos": [v for v in videos if not v.get("allowed_divs")]}
442
 
443
  filtered_videos = []
444
  for v in videos:
445
  allowed = v.get("allowed_divs", [])
446
+ if not allowed or