DivYonko commited on
Commit ·
c5feb54
1
Parent(s): f66bc95
API quota conservation: scraper polling rate now matches UI refresh interval
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
"""
|
| 3 |
app.py � Hugging Face Spaces adaptation of frontend/streamlit_app.py
|
| 4 |
Infrastructure: SQLite store + threading scraper (no Redis, no subprocess).
|
|
@@ -209,7 +209,7 @@ def _fetch_chat_messages(live_chat_id: str, api_key: str, page_token: str | None
|
|
| 209 |
return [], None, 5000
|
| 210 |
|
| 211 |
|
| 212 |
-
def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event) -> None:
|
| 213 |
"""Background thread � scrapes live chat via YouTube Data API v3."""
|
| 214 |
api_key = os.getenv("YOUTUBE_API_KEY", "")
|
| 215 |
logger.info("YOUTUBE_API_KEY present: %s (length=%d)", bool(api_key), len(api_key))
|
|
@@ -323,14 +323,14 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
|
|
| 323 |
if len(seen_ids) > 5000:
|
| 324 |
seen_ids = set(list(seen_ids)[-2000:])
|
| 325 |
|
| 326 |
-
#
|
| 327 |
-
wait_s = max(poll_ms / 1000,
|
| 328 |
stop_event.wait(timeout=wait_s)
|
| 329 |
|
| 330 |
logger.info("Scraper thread ended � key=%s", redis_key)
|
| 331 |
|
| 332 |
|
| 333 |
-
def start_scraper(slot_idx: int, video_id: str, redis_key: str) -> None:
|
| 334 |
"""Start a scraper thread for the given slot, stopping any existing one first."""
|
| 335 |
key = str(slot_idx)
|
| 336 |
stop_scraper(slot_idx)
|
|
@@ -338,7 +338,7 @@ def start_scraper(slot_idx: int, video_id: str, redis_key: str) -> None:
|
|
| 338 |
stop_event = threading.Event()
|
| 339 |
t = threading.Thread(
|
| 340 |
target=_scraper_thread_fn,
|
| 341 |
-
args=(video_id, redis_key, stop_event),
|
| 342 |
daemon=True,
|
| 343 |
name=f"scraper-{slot_idx}",
|
| 344 |
)
|
|
@@ -997,7 +997,7 @@ with st.sidebar:
|
|
| 997 |
vid = extract_video_id(st.session_state[vid_skey])
|
| 998 |
rkey = st.session_state[rkey_skey].strip() or f"chat_messages_{label.lower()}"
|
| 999 |
if vid:
|
| 1000 |
-
start_scraper(idx, vid, rkey)
|
| 1001 |
st.session_state.streams[idx]["proc"] = _SCRAPER_THREADS.get(str(idx))
|
| 1002 |
st.session_state.streams[idx]["video_id"] = vid
|
| 1003 |
st.session_state.streams[idx]["redis_key"] = rkey
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
app.py � Hugging Face Spaces adaptation of frontend/streamlit_app.py
|
| 4 |
Infrastructure: SQLite store + threading scraper (no Redis, no subprocess).
|
|
|
|
| 209 |
return [], None, 5000
|
| 210 |
|
| 211 |
|
| 212 |
+
def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event, min_poll_s: float = 10.0) -> None:
|
| 213 |
"""Background thread � scrapes live chat via YouTube Data API v3."""
|
| 214 |
api_key = os.getenv("YOUTUBE_API_KEY", "")
|
| 215 |
logger.info("YOUTUBE_API_KEY present: %s (length=%d)", bool(api_key), len(api_key))
|
|
|
|
| 323 |
if len(seen_ids) > 5000:
|
| 324 |
seen_ids = set(list(seen_ids)[-2000:])
|
| 325 |
|
| 326 |
+
# Respect YouTube's requested polling interval, but never faster than min_poll_s
|
| 327 |
+
wait_s = max(poll_ms / 1000, min_poll_s)
|
| 328 |
stop_event.wait(timeout=wait_s)
|
| 329 |
|
| 330 |
logger.info("Scraper thread ended � key=%s", redis_key)
|
| 331 |
|
| 332 |
|
| 333 |
+
def start_scraper(slot_idx: int, video_id: str, redis_key: str, min_poll_s: float = 10.0) -> None:
|
| 334 |
"""Start a scraper thread for the given slot, stopping any existing one first."""
|
| 335 |
key = str(slot_idx)
|
| 336 |
stop_scraper(slot_idx)
|
|
|
|
| 338 |
stop_event = threading.Event()
|
| 339 |
t = threading.Thread(
|
| 340 |
target=_scraper_thread_fn,
|
| 341 |
+
args=(video_id, redis_key, stop_event, min_poll_s),
|
| 342 |
daemon=True,
|
| 343 |
name=f"scraper-{slot_idx}",
|
| 344 |
)
|
|
|
|
| 997 |
vid = extract_video_id(st.session_state[vid_skey])
|
| 998 |
rkey = st.session_state[rkey_skey].strip() or f"chat_messages_{label.lower()}"
|
| 999 |
if vid:
|
| 1000 |
+
start_scraper(idx, vid, rkey, min_poll_s=float(st.session_state.get("refresh_rate", 10)))
|
| 1001 |
st.session_state.streams[idx]["proc"] = _SCRAPER_THREADS.get(str(idx))
|
| 1002 |
st.session_state.streams[idx]["video_id"] = vid
|
| 1003 |
st.session_state.streams[idx]["redis_key"] = rkey
|
shared.py
CHANGED
|
@@ -208,7 +208,7 @@ def _fetch_chat_messages(live_chat_id: str, api_key: str, page_token: str | None
|
|
| 208 |
return [], None, 5000
|
| 209 |
|
| 210 |
|
| 211 |
-
def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event) -> None:
|
| 212 |
api_key = os.getenv("YOUTUBE_API_KEY", "")
|
| 213 |
if not api_key:
|
| 214 |
msg = "YOUTUBE_API_KEY env var not set. Cannot start scraper."
|
|
@@ -291,17 +291,18 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
|
|
| 291 |
if len(seen_ids) > 5000:
|
| 292 |
seen_ids = set(list(seen_ids)[-2000:])
|
| 293 |
|
| 294 |
-
|
|
|
|
| 295 |
stop_event.wait(timeout=wait_s)
|
| 296 |
|
| 297 |
|
| 298 |
-
def start_scraper(slot_idx: int, video_id: str, redis_key: str) -> None:
|
| 299 |
key = str(slot_idx)
|
| 300 |
stop_scraper(slot_idx)
|
| 301 |
stop_event = threading.Event()
|
| 302 |
t = threading.Thread(
|
| 303 |
target=_scraper_thread_fn,
|
| 304 |
-
args=(video_id, redis_key, stop_event),
|
| 305 |
daemon=True,
|
| 306 |
name=f"scraper-{slot_idx}",
|
| 307 |
)
|
|
|
|
| 208 |
return [], None, 5000
|
| 209 |
|
| 210 |
|
| 211 |
+
def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Event, min_poll_s: float = 10.0) -> None:
|
| 212 |
api_key = os.getenv("YOUTUBE_API_KEY", "")
|
| 213 |
if not api_key:
|
| 214 |
msg = "YOUTUBE_API_KEY env var not set. Cannot start scraper."
|
|
|
|
| 291 |
if len(seen_ids) > 5000:
|
| 292 |
seen_ids = set(list(seen_ids)[-2000:])
|
| 293 |
|
| 294 |
+
# Respect YouTube's requested polling interval, but never faster than min_poll_s
|
| 295 |
+
wait_s = max(poll_ms / 1000, min_poll_s)
|
| 296 |
stop_event.wait(timeout=wait_s)
|
| 297 |
|
| 298 |
|
| 299 |
+
def start_scraper(slot_idx: int, video_id: str, redis_key: str, min_poll_s: float = 10.0) -> None:
|
| 300 |
key = str(slot_idx)
|
| 301 |
stop_scraper(slot_idx)
|
| 302 |
stop_event = threading.Event()
|
| 303 |
t = threading.Thread(
|
| 304 |
target=_scraper_thread_fn,
|
| 305 |
+
args=(video_id, redis_key, stop_event, min_poll_s),
|
| 306 |
daemon=True,
|
| 307 |
name=f"scraper-{slot_idx}",
|
| 308 |
)
|