Spaces:

Divyonko
/

LivePulse

Sleeping

App Files Files Community

DivYonko commited on Apr 16

Commit

2965fd0

1 Parent(s): c5b07c4

fix: store backlog immediately without ML inference, add per-message error logging

Browse files

Files changed (1) hide show

app.py +47 -14

app.py CHANGED Viewed

@@ -196,10 +196,12 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
     # Step 2: poll for messages
     page_token    = None
     seen_ids: set = set()   # avoid reprocessing messages on first page
     while not stop_event.is_set():
         messages, page_token, poll_ms = _fetch_chat_messages(live_chat_id, api_key, page_token)
         for item in messages:
             if stop_event.is_set():
                 break
@@ -210,7 +212,6 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
             seen_ids.add(msg_id)
             snippet = item.get("snippet", {})
-            # only process text messages
             if snippet.get("type") != "textMessageEvent":
                 continue
@@ -220,19 +221,51 @@ def _scraper_thread_fn(video_id: str, redis_key: str, stop_event: threading.Even
             if not text:
                 continue
-            sentiment, s_conf = _safe_sentiment(text)
-            topic,     t_conf = _safe_topic(text)
-            message_data = {
-                "author":     author,
-                "text":       text,
-                "sentiment":  sentiment,
-                "confidence": round(s_conf, 3),
-                "topic":      topic,
-                "topic_conf": round(t_conf, 3),
-                "time":       datetime.now().isoformat(),
-            }
-            store_rpush(redis_key, json.dumps(message_data))
         # keep seen_ids from growing unbounded
         if len(seen_ids) > 5000:

     # Step 2: poll for messages
     page_token    = None
     seen_ids: set = set()   # avoid reprocessing messages on first page
+    is_first_page = True    # skip ML on backlog to avoid startup delay
     while not stop_event.is_set():
         messages, page_token, poll_ms = _fetch_chat_messages(live_chat_id, api_key, page_token)
+        new_msgs = []
         for item in messages:
             if stop_event.is_set():
                 break
             seen_ids.add(msg_id)
             snippet = item.get("snippet", {})
             if snippet.get("type") != "textMessageEvent":
                 continue
             if not text:
                 continue
+            new_msgs.append((msg_id, text, author))
+        # On the first page (backlog), store messages with placeholder sentiment
+        # so the UI shows something immediately, then process ML on subsequent pages
+        if is_first_page and new_msgs:
+            logger.info("First page: storing %d backlog messages with placeholder sentiment", len(new_msgs))
+            for _, text, author in new_msgs:
+                message_data = {
+                    "author":     author,
+                    "text":       text,
+                    "sentiment":  "Neutral",
+                    "confidence": 0.5,
+                    "topic":      "General",
+                    "topic_conf": 0.5,
+                    "time":       datetime.now().isoformat(),
+                }
+                store_rpush(redis_key, json.dumps(message_data))
+            logger.info("Backlog stored: %d messages now in store", store_llen(redis_key))
+            is_first_page = False
+        else:
+            # Normal processing with full ML inference
+            for _, text, author in new_msgs:
+                if stop_event.is_set():
+                    break
+                try:
+                    sentiment, s_conf = _safe_sentiment(text)
+                    topic,     t_conf = _safe_topic(text)
+                except Exception as exc:
+                    logger.error("ML inference failed for text=%r: %s", text[:50], exc)
+                    sentiment, s_conf = "Neutral", 0.5
+                    topic,     t_conf = "General", 0.5
+                message_data = {
+                    "author":     author,
+                    "text":       text,
+                    "sentiment":  sentiment,
+                    "confidence": round(s_conf, 3),
+                    "topic":      topic,
+                    "topic_conf": round(t_conf, 3),
+                    "time":       datetime.now().isoformat(),
+                }
+                store_rpush(redis_key, json.dumps(message_data))
+            if new_msgs:
+                logger.info("Processed %d new messages, store size=%d", len(new_msgs), store_llen(redis_key))
         # keep seen_ids from growing unbounded
         if len(seen_ids) > 5000: