DivYonko commited on
Commit
c5feb54
·
1 Parent(s): f66bc95

API quota conservation: scraper polling rate now matches UI refresh interval

Browse files
Files changed (2) hide show
  1. app.py +7 -7
  2. shared.py +5 -4
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # -*- coding: utf-8 -*-
2
  """
3
  app.py � Hugging Face Spaces adaptation of frontend/streamlit_app.py
4
  Infrastructure: SQLite store + threading scraper (no Redis, no subprocess).
@@ -209,7 +209,7 @@ def _fetch_chat_messages(live_chat_id: str, api_key: str, page_token: str | None
209
  return [], None, 5000
210
 
211
 
212
- def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event) -> None:
213
  """Background thread � scrapes live chat via YouTube Data API v3."""
214
  api_key = os.getenv("YOUTUBE_API_KEY", "")
215
  logger.info("YOUTUBE_API_KEY present: %s (length=%d)", bool(api_key), len(api_key))
@@ -323,14 +323,14 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
323
  if len(seen_ids) > 5000:
324
  seen_ids = set(list(seen_ids)[-2000:])
325
 
326
- # respect YouTube's requested polling interval (min 3s to be safe)
327
- wait_s = max(poll_ms / 1000, 3.0)
328
  stop_event.wait(timeout=wait_s)
329
 
330
  logger.info("Scraper thread ended � key=%s", redis_key)
331
 
332
 
333
- def start_scraper(slot_idx: int, video_id: str, redis_key: str) -> None:
334
  """Start a scraper thread for the given slot, stopping any existing one first."""
335
  key = str(slot_idx)
336
  stop_scraper(slot_idx)
@@ -338,7 +338,7 @@ def start_scraper(slot_idx: int, video_id: str, redis_key: str) -> None:
338
  stop_event = threading.Event()
339
  t = threading.Thread(
340
  target=_scraper_thread_fn,
341
- args=(video_id, redis_key, stop_event),
342
  daemon=True,
343
  name=f"scraper-{slot_idx}",
344
  )
@@ -997,7 +997,7 @@ with st.sidebar:
997
  vid = extract_video_id(st.session_state[vid_skey])
998
  rkey = st.session_state[rkey_skey].strip() or f"chat_messages_{label.lower()}"
999
  if vid:
1000
- start_scraper(idx, vid, rkey)
1001
  st.session_state.streams[idx]["proc"] = _SCRAPER_THREADS.get(str(idx))
1002
  st.session_state.streams[idx]["video_id"] = vid
1003
  st.session_state.streams[idx]["redis_key"] = rkey
 
1
+ # -*- coding: utf-8 -*-
2
  """
3
  app.py � Hugging Face Spaces adaptation of frontend/streamlit_app.py
4
  Infrastructure: SQLite store + threading scraper (no Redis, no subprocess).
 
209
  return [], None, 5000
210
 
211
 
212
+ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event, min_poll_s: float = 10.0) -> None:
213
  """Background thread � scrapes live chat via YouTube Data API v3."""
214
  api_key = os.getenv("YOUTUBE_API_KEY", "")
215
  logger.info("YOUTUBE_API_KEY present: %s (length=%d)", bool(api_key), len(api_key))
 
323
  if len(seen_ids) > 5000:
324
  seen_ids = set(list(seen_ids)[-2000:])
325
 
326
+ # Respect YouTube's requested polling interval, but never faster than min_poll_s
327
+ wait_s = max(poll_ms / 1000, min_poll_s)
328
  stop_event.wait(timeout=wait_s)
329
 
330
  logger.info("Scraper thread ended � key=%s", redis_key)
331
 
332
 
333
+ def start_scraper(slot_idx: int, video_id: str, redis_key: str, min_poll_s: float = 10.0) -> None:
334
  """Start a scraper thread for the given slot, stopping any existing one first."""
335
  key = str(slot_idx)
336
  stop_scraper(slot_idx)
 
338
  stop_event = threading.Event()
339
  t = threading.Thread(
340
  target=_scraper_thread_fn,
341
+ args=(video_id, redis_key, stop_event, min_poll_s),
342
  daemon=True,
343
  name=f"scraper-{slot_idx}",
344
  )
 
997
  vid = extract_video_id(st.session_state[vid_skey])
998
  rkey = st.session_state[rkey_skey].strip() or f"chat_messages_{label.lower()}"
999
  if vid:
1000
+ start_scraper(idx, vid, rkey, min_poll_s=float(st.session_state.get("refresh_rate", 10)))
1001
  st.session_state.streams[idx]["proc"] = _SCRAPER_THREADS.get(str(idx))
1002
  st.session_state.streams[idx]["video_id"] = vid
1003
  st.session_state.streams[idx]["redis_key"] = rkey
shared.py CHANGED
@@ -208,7 +208,7 @@ def _fetch_chat_messages(live_chat_id: str, api_key: str, page_token: str | None
208
  return [], None, 5000
209
 
210
 
211
- def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event) -> None:
212
  api_key = os.getenv("YOUTUBE_API_KEY", "")
213
  if not api_key:
214
  msg = "YOUTUBE_API_KEY env var not set. Cannot start scraper."
@@ -291,17 +291,18 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
291
  if len(seen_ids) > 5000:
292
  seen_ids = set(list(seen_ids)[-2000:])
293
 
294
- wait_s = max(poll_ms / 1000, 3.0)
 
295
  stop_event.wait(timeout=wait_s)
296
 
297
 
298
- def start_scraper(slot_idx: int, video_id: str, redis_key: str) -> None:
299
  key = str(slot_idx)
300
  stop_scraper(slot_idx)
301
  stop_event = threading.Event()
302
  t = threading.Thread(
303
  target=_scraper_thread_fn,
304
- args=(video_id, redis_key, stop_event),
305
  daemon=True,
306
  name=f"scraper-{slot_idx}",
307
  )
 
208
  return [], None, 5000
209
 
210
 
211
+ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event, min_poll_s: float = 10.0) -> None:
212
  api_key = os.getenv("YOUTUBE_API_KEY", "")
213
  if not api_key:
214
  msg = "YOUTUBE_API_KEY env var not set. Cannot start scraper."
 
291
  if len(seen_ids) > 5000:
292
  seen_ids = set(list(seen_ids)[-2000:])
293
 
294
+ # Respect YouTube's requested polling interval, but never faster than min_poll_s
295
+ wait_s = max(poll_ms / 1000, min_poll_s)
296
  stop_event.wait(timeout=wait_s)
297
 
298
 
299
+ def start_scraper(slot_idx: int, video_id: str, redis_key: str, min_poll_s: float = 10.0) -> None:
300
  key = str(slot_idx)
301
  stop_scraper(slot_idx)
302
  stop_event = threading.Event()
303
  t = threading.Thread(
304
  target=_scraper_thread_fn,
305
+ args=(video_id, redis_key, stop_event, min_poll_s),
306
  daemon=True,
307
  name=f"scraper-{slot_idx}",
308
  )