Spaces:

adelevett
/

readstream

Running

App Files Files Community

adelevett commited on 18 days ago

Commit

eaae12d

1 Parent(s): 9fce4a4

collapsed stages 1b, 1c, and 2 into a single cohesive generation pass in Stage 2, which runs all segments with global context. I also updated the Stage 3 stylization to perform as a final flourish pass to preserve diversity and usernames

Browse files

Files changed (1) hide show

pipeline.py +38 -56

pipeline.py CHANGED Viewed

@@ -8,7 +8,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
 DEFAULT_HF_TOKEN = os.environ.get("HF_TOKEN", "")
 FLASH_MODEL = "openai/gpt-oss-120b:fastest"
-PRO_MODEL = "deepseek-ai/DeepSeek-V4-Pro:novita"
 def extract_youtube_video_id(url: str) -> str:
     url = url.strip()
@@ -148,44 +148,35 @@ def stage_1a_segment_transcript(transcript_text: str, token: str) -> list:
     cleaned = clean_json_text(content)
     return json.loads(cleaned)
-# --- STAGE 1b: Extract themes ---
-def stage_1b_extract_themes(doc_text: str, token: str) -> str:
-    system_prompt = (
-        "You are an expert researcher. Extract the main themes, key concepts, core arguments, facts, terminology, "
-        "and themes from the following document.\n"
-        "Summarize them in a concise but detailed bulleted list that can be used by another AI model to generate chat reactions."
-    )
-    user_prompt = f"Document:\n{doc_text}"
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": user_prompt}
-    ]
-    return call_hf_router(FLASH_MODEL, messages, token)
-# --- STAGE 2: Generate draft comments (Pro model) ---
-def stage_2_generate_draft_segment(segment: dict, themes: str, token: str) -> dict:
     system_prompt = (
         "You are simulating audience chat reactions for a livestream of an educational or historical video.\n"
         "You are given:\n"
-        "1. The caption text of a specific video segment.\n"
-        "2. Extracted themes and concepts from a reference document.\n\n"
-        "Your task is to generate 8 to 15 draft chat messages from different users reacting to this video segment.\n"
-        "Crucially:\n"
-        "- The comments must react directly to the video segment content.\n"
-        "- The comments should reference ideas, concepts, facts, or perspectives from the reference document when relevant "
-        "(e.g., creating conceptual bridges, pointing out contradictions, or validating the video), but they MUST NOT directly quote or paraphrase the source document.\n"
-        "- Translate complex conceptual relationships between the video and the document into raw thoughts or reactions.\n\n"
-        "Format your response as a JSON object with:\n"
         "- timestamp: {timestamp}\n"
-        "- _internal_logic: Briefly state how this segment relates to the source document themes (analogy, contradiction, validation).\n"
         "- messages: a list of objects containing 'username' and 'text'.\n\n"
-        "Return ONLY the JSON. Do not include markdown wraps."
     )
     user_prompt = (
-        f"Video Segment ({segment['start']}s - {segment['end']}s):\n{segment['text']}\n\n"
-        f"Extracted Reference Document Themes:\n{themes}\n\n"
-        f"Generate draft comments for timestamp: {segment['start']}"
     )
     messages = [
@@ -194,21 +185,23 @@ def stage_2_generate_draft_segment(segment: dict, themes: str, token: str) -> di
     ]
     content = call_hf_router(PRO_MODEL, messages, token)
     cleaned = clean_json_text(content)
-    data = json.loads(cleaned)
-    data["timestamp"] = segment["start"]
-    return data
 # --- STAGE 3: Style and pacing (Flash model) ---
 def stage_3_stylize_segment(draft_data: dict, token: str) -> dict:
     system_prompt = (
         "You are a style polisher for livestream chat replays (YouTube/Twitch).\n"
-        "Your job is to take raw draft chat messages and rewrite them to sound like authentic, lively internet comments.\n\n"
-        "Apply these stylistic rules:\n"
-        "- Make them short and concise.\n"
-        "- Inject internet slang (e.g., lol, wtf, lmao, fr, no cap, ngl, bruh) and standard emotes (e.g., LUL, PogChamp, Kappa, MonkaS, BibleThump, 5Head, Pog, Pepega).\n"
-        "- Make username/text combinations feel natural. Some users can be experts reading into philosophical tension; some take things at face value; some only react emotionally or to video aesthetics; some use sarcasm/memes.\n"
-        "- Vary message lengths and pacing.\n"
-        "- Avoid sounding like AI-generated summaries or formal text.\n\n"
         "Return ONLY the updated JSON with the exact same structure. Do not include markdown wraps."
     )
     user_prompt = f"Draft JSON:\n{json.dumps(draft_data)}"
@@ -251,21 +244,10 @@ def run_livestream_pipeline(video_id: str, doc_text: str, transcript_text: str =
     segments = stage_1a_segment_transcript(transcript_text_formatted, token)
     print(f"Segmented into {len(segments)} blocks.")
-    print("Stage 1b: Extracting themes from document...")
-    themes = stage_1b_extract_themes(doc_text, token)
-    print("Themes extracted successfully.")
-    # Stage 2: Parallel comment generation
-    print("Stage 2: Generating draft comments (Pro model)...")
-    draft_segments = []
-    with ThreadPoolExecutor(max_workers=5) as executor:
-        futures = [
-            executor.submit(stage_2_generate_draft_segment, seg, themes, token)
-            for seg in segments
-        ]
-        for fut in futures:
-            draft_segments.append(fut.result())
     # Stage 3: Parallel stylization
     print("Stage 3: Stylizing comments...")
     final_segments = []

 DEFAULT_HF_TOKEN = os.environ.get("HF_TOKEN", "")
 FLASH_MODEL = "openai/gpt-oss-120b:fastest"
+PRO_MODEL = "deepseek-ai/DeepSeek-V4-Pro:fastest"
 def extract_youtube_video_id(url: str) -> str:
     url = url.strip()
     cleaned = clean_json_text(content)
     return json.loads(cleaned)
+# --- STAGE 2: Generate all draft comments (Pro model) ---
+def stage_2_generate_all_drafts(segments: list, doc_text: str, token: str) -> list:
     system_prompt = (
         "You are simulating audience chat reactions for a livestream of an educational or historical video.\n"
         "You are given:\n"
+        "1. A reference document.\n"
+        "2. A chronological list of video segments with their text and timestamps.\n\n"
+        "Your task is to generate 8 to 15 draft chat messages from different users reacting to EACH video segment.\n\n"
+        "Crucially, you must follow these steps:\n"
+        "1. Identify distinct, specific, falsifiable sub-claims from the document (including counterarguments).\n"
+        "2. Sparsely map these document claims to the provided video segments. Some segments will have strong thematic ties, but many should have NONE.\n"
+        "3. Generate chat messages for each segment. Translate complex conceptual relationships into authentic internet reactions.\n\n"
+        "QUOTAS AND PERSONAS (STRICTLY ENFORCED):\n"
+        "- At least half (50%) of all messages should NOT reference the document at all. They must react strictly to the video itself, the speaker, the production, or make a joke.\n"
+        "- Ensure diverse, non-repetitive usernames across the entire video. Do not use the same usernames repeatedly for the same types of comments.\n"
+        "- Maintain diverse livestream audience personas: some are experts reading deeply into philosophical tension, some take things entirely at face value, some only react emotionally or to video aesthetics, some use sarcasm/memes.\n\n"
+        "Format your response as a JSON list of objects, where each object corresponds to a segment and contains:\n"
         "- timestamp: {timestamp}\n"
+        "- _internal_logic: Briefly state how this segment relates to specific sub-claims in the document, or state 'None' if it's off-topic/video-only.\n"
         "- messages: a list of objects containing 'username' and 'text'.\n\n"
+        "Return ONLY a valid JSON list. Do not include markdown wraps or other text."
     )
+    segments_text = "\n\n".join([f"Segment ({seg['start']}s - {seg['end']}s):\n{seg['text']}" for seg in segments])
     user_prompt = (
+        f"Reference Document:\n{doc_text}\n\n"
+        f"Video Segments:\n{segments_text}\n\n"
+        "Generate the JSON list of draft comments for all segments."
     )
     messages = [
     ]
     content = call_hf_router(PRO_MODEL, messages, token)
     cleaned = clean_json_text(content)
+    try:
+        data = json.loads(cleaned)
+        return data
+    except Exception as e:
+        print(f"Failed to parse JSON from Stage 2. Raw content: {content}")
+        raise e
 # --- STAGE 3: Style and pacing (Flash model) ---
 def stage_3_stylize_segment(draft_data: dict, token: str) -> dict:
     system_prompt = (
         "You are a style polisher for livestream chat replays (YouTube/Twitch).\n"
+        "Your job is to take raw draft chat messages and perform a final flourish and alignment pass to make them sound authentic.\n\n"
+        "CRITICAL INSTRUCTIONS:\n"
+        "1. PRESERVE DIVERSITY: The draft already contains carefully balanced personas (jokers, experts, off-topic, skeptics). DO NOT homogenize them. If a message is off-topic, keep it off-topic. If it's a joke about the video, keep it a joke.\n"
+        "2. PRESERVE USERNAMES: You MUST use the exact usernames provided in the draft. Do not invent new ones.\n"
+        "3. ADD FLOURISH: Make them short, concise, and lively. Inject internet slang (e.g., lol, wtf, lmao, fr, no cap, ngl, bruh) and standard emotes (e.g., LUL, PogChamp, Kappa, MonkaS, BibleThump, 5Head, Pog, Pepega) where appropriate, but don't overdo it.\n"
+        "4. Avoid sounding like AI-generated summaries. Do not append emotes to every single message.\n\n"
         "Return ONLY the updated JSON with the exact same structure. Do not include markdown wraps."
     )
     user_prompt = f"Draft JSON:\n{json.dumps(draft_data)}"
     segments = stage_1a_segment_transcript(transcript_text_formatted, token)
     print(f"Segmented into {len(segments)} blocks.")
+    # Stage 2: Single Pro model call for all drafting
+    print("Stage 2: Generating draft comments for all segments (Pro model)...")
+    draft_segments = stage_2_generate_all_drafts(segments, doc_text, token)
     # Stage 3: Parallel stylization
     print("Stage 3: Stylizing comments...")
     final_segments = []