Spaces:
Running
Running
collapsed stages 1b, 1c, and 2 into a single cohesive generation pass in Stage 2, which runs all segments with global context. I also updated the Stage 3 stylization to perform as a final flourish pass to preserve diversity and usernames
Browse files- pipeline.py +38 -56
pipeline.py
CHANGED
|
@@ -8,7 +8,7 @@ from youtube_transcript_api import YouTubeTranscriptApi
|
|
| 8 |
|
| 9 |
DEFAULT_HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 10 |
FLASH_MODEL = "openai/gpt-oss-120b:fastest"
|
| 11 |
-
PRO_MODEL = "deepseek-ai/DeepSeek-V4-Pro:
|
| 12 |
|
| 13 |
def extract_youtube_video_id(url: str) -> str:
|
| 14 |
url = url.strip()
|
|
@@ -148,44 +148,35 @@ def stage_1a_segment_transcript(transcript_text: str, token: str) -> list:
|
|
| 148 |
cleaned = clean_json_text(content)
|
| 149 |
return json.loads(cleaned)
|
| 150 |
|
| 151 |
-
# --- STAGE
|
| 152 |
-
def
|
| 153 |
-
system_prompt = (
|
| 154 |
-
"You are an expert researcher. Extract the main themes, key concepts, core arguments, facts, terminology, "
|
| 155 |
-
"and themes from the following document.\n"
|
| 156 |
-
"Summarize them in a concise but detailed bulleted list that can be used by another AI model to generate chat reactions."
|
| 157 |
-
)
|
| 158 |
-
user_prompt = f"Document:\n{doc_text}"
|
| 159 |
-
|
| 160 |
-
messages = [
|
| 161 |
-
{"role": "system", "content": system_prompt},
|
| 162 |
-
{"role": "user", "content": user_prompt}
|
| 163 |
-
]
|
| 164 |
-
return call_hf_router(FLASH_MODEL, messages, token)
|
| 165 |
-
|
| 166 |
-
# --- STAGE 2: Generate draft comments (Pro model) ---
|
| 167 |
-
def stage_2_generate_draft_segment(segment: dict, themes: str, token: str) -> dict:
|
| 168 |
system_prompt = (
|
| 169 |
"You are simulating audience chat reactions for a livestream of an educational or historical video.\n"
|
| 170 |
"You are given:\n"
|
| 171 |
-
"1.
|
| 172 |
-
"2.
|
| 173 |
-
"Your task is to generate 8 to 15 draft chat messages from different users reacting to
|
| 174 |
-
"Crucially:\n"
|
| 175 |
-
"
|
| 176 |
-
"
|
| 177 |
-
"
|
| 178 |
-
"
|
| 179 |
-
"
|
|
|
|
|
|
|
|
|
|
| 180 |
"- timestamp: {timestamp}\n"
|
| 181 |
-
"- _internal_logic: Briefly state how this segment relates to
|
| 182 |
"- messages: a list of objects containing 'username' and 'text'.\n\n"
|
| 183 |
-
"Return ONLY
|
| 184 |
)
|
|
|
|
|
|
|
|
|
|
| 185 |
user_prompt = (
|
| 186 |
-
f"
|
| 187 |
-
f"
|
| 188 |
-
|
| 189 |
)
|
| 190 |
|
| 191 |
messages = [
|
|
@@ -194,21 +185,23 @@ def stage_2_generate_draft_segment(segment: dict, themes: str, token: str) -> di
|
|
| 194 |
]
|
| 195 |
content = call_hf_router(PRO_MODEL, messages, token)
|
| 196 |
cleaned = clean_json_text(content)
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
# --- STAGE 3: Style and pacing (Flash model) ---
|
| 202 |
def stage_3_stylize_segment(draft_data: dict, token: str) -> dict:
|
| 203 |
system_prompt = (
|
| 204 |
"You are a style polisher for livestream chat replays (YouTube/Twitch).\n"
|
| 205 |
-
"Your job is to take raw draft chat messages and
|
| 206 |
-
"
|
| 207 |
-
"-
|
| 208 |
-
"
|
| 209 |
-
"
|
| 210 |
-
"-
|
| 211 |
-
"- Avoid sounding like AI-generated summaries or formal text.\n\n"
|
| 212 |
"Return ONLY the updated JSON with the exact same structure. Do not include markdown wraps."
|
| 213 |
)
|
| 214 |
user_prompt = f"Draft JSON:\n{json.dumps(draft_data)}"
|
|
@@ -251,21 +244,10 @@ def run_livestream_pipeline(video_id: str, doc_text: str, transcript_text: str =
|
|
| 251 |
segments = stage_1a_segment_transcript(transcript_text_formatted, token)
|
| 252 |
print(f"Segmented into {len(segments)} blocks.")
|
| 253 |
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
|
| 258 |
-
# Stage 2: Parallel comment generation
|
| 259 |
-
print("Stage 2: Generating draft comments (Pro model)...")
|
| 260 |
-
draft_segments = []
|
| 261 |
-
with ThreadPoolExecutor(max_workers=5) as executor:
|
| 262 |
-
futures = [
|
| 263 |
-
executor.submit(stage_2_generate_draft_segment, seg, themes, token)
|
| 264 |
-
for seg in segments
|
| 265 |
-
]
|
| 266 |
-
for fut in futures:
|
| 267 |
-
draft_segments.append(fut.result())
|
| 268 |
-
|
| 269 |
# Stage 3: Parallel stylization
|
| 270 |
print("Stage 3: Stylizing comments...")
|
| 271 |
final_segments = []
|
|
|
|
| 8 |
|
| 9 |
DEFAULT_HF_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 10 |
FLASH_MODEL = "openai/gpt-oss-120b:fastest"
|
| 11 |
+
PRO_MODEL = "deepseek-ai/DeepSeek-V4-Pro:fastest"
|
| 12 |
|
| 13 |
def extract_youtube_video_id(url: str) -> str:
|
| 14 |
url = url.strip()
|
|
|
|
| 148 |
cleaned = clean_json_text(content)
|
| 149 |
return json.loads(cleaned)
|
| 150 |
|
| 151 |
+
# --- STAGE 2: Generate all draft comments (Pro model) ---
|
| 152 |
+
def stage_2_generate_all_drafts(segments: list, doc_text: str, token: str) -> list:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
system_prompt = (
|
| 154 |
"You are simulating audience chat reactions for a livestream of an educational or historical video.\n"
|
| 155 |
"You are given:\n"
|
| 156 |
+
"1. A reference document.\n"
|
| 157 |
+
"2. A chronological list of video segments with their text and timestamps.\n\n"
|
| 158 |
+
"Your task is to generate 8 to 15 draft chat messages from different users reacting to EACH video segment.\n\n"
|
| 159 |
+
"Crucially, you must follow these steps:\n"
|
| 160 |
+
"1. Identify distinct, specific, falsifiable sub-claims from the document (including counterarguments).\n"
|
| 161 |
+
"2. Sparsely map these document claims to the provided video segments. Some segments will have strong thematic ties, but many should have NONE.\n"
|
| 162 |
+
"3. Generate chat messages for each segment. Translate complex conceptual relationships into authentic internet reactions.\n\n"
|
| 163 |
+
"QUOTAS AND PERSONAS (STRICTLY ENFORCED):\n"
|
| 164 |
+
"- At least half (50%) of all messages should NOT reference the document at all. They must react strictly to the video itself, the speaker, the production, or make a joke.\n"
|
| 165 |
+
"- Ensure diverse, non-repetitive usernames across the entire video. Do not use the same usernames repeatedly for the same types of comments.\n"
|
| 166 |
+
"- Maintain diverse livestream audience personas: some are experts reading deeply into philosophical tension, some take things entirely at face value, some only react emotionally or to video aesthetics, some use sarcasm/memes.\n\n"
|
| 167 |
+
"Format your response as a JSON list of objects, where each object corresponds to a segment and contains:\n"
|
| 168 |
"- timestamp: {timestamp}\n"
|
| 169 |
+
"- _internal_logic: Briefly state how this segment relates to specific sub-claims in the document, or state 'None' if it's off-topic/video-only.\n"
|
| 170 |
"- messages: a list of objects containing 'username' and 'text'.\n\n"
|
| 171 |
+
"Return ONLY a valid JSON list. Do not include markdown wraps or other text."
|
| 172 |
)
|
| 173 |
+
|
| 174 |
+
segments_text = "\n\n".join([f"Segment ({seg['start']}s - {seg['end']}s):\n{seg['text']}" for seg in segments])
|
| 175 |
+
|
| 176 |
user_prompt = (
|
| 177 |
+
f"Reference Document:\n{doc_text}\n\n"
|
| 178 |
+
f"Video Segments:\n{segments_text}\n\n"
|
| 179 |
+
"Generate the JSON list of draft comments for all segments."
|
| 180 |
)
|
| 181 |
|
| 182 |
messages = [
|
|
|
|
| 185 |
]
|
| 186 |
content = call_hf_router(PRO_MODEL, messages, token)
|
| 187 |
cleaned = clean_json_text(content)
|
| 188 |
+
try:
|
| 189 |
+
data = json.loads(cleaned)
|
| 190 |
+
return data
|
| 191 |
+
except Exception as e:
|
| 192 |
+
print(f"Failed to parse JSON from Stage 2. Raw content: {content}")
|
| 193 |
+
raise e
|
| 194 |
|
| 195 |
# --- STAGE 3: Style and pacing (Flash model) ---
|
| 196 |
def stage_3_stylize_segment(draft_data: dict, token: str) -> dict:
|
| 197 |
system_prompt = (
|
| 198 |
"You are a style polisher for livestream chat replays (YouTube/Twitch).\n"
|
| 199 |
+
"Your job is to take raw draft chat messages and perform a final flourish and alignment pass to make them sound authentic.\n\n"
|
| 200 |
+
"CRITICAL INSTRUCTIONS:\n"
|
| 201 |
+
"1. PRESERVE DIVERSITY: The draft already contains carefully balanced personas (jokers, experts, off-topic, skeptics). DO NOT homogenize them. If a message is off-topic, keep it off-topic. If it's a joke about the video, keep it a joke.\n"
|
| 202 |
+
"2. PRESERVE USERNAMES: You MUST use the exact usernames provided in the draft. Do not invent new ones.\n"
|
| 203 |
+
"3. ADD FLOURISH: Make them short, concise, and lively. Inject internet slang (e.g., lol, wtf, lmao, fr, no cap, ngl, bruh) and standard emotes (e.g., LUL, PogChamp, Kappa, MonkaS, BibleThump, 5Head, Pog, Pepega) where appropriate, but don't overdo it.\n"
|
| 204 |
+
"4. Avoid sounding like AI-generated summaries. Do not append emotes to every single message.\n\n"
|
|
|
|
| 205 |
"Return ONLY the updated JSON with the exact same structure. Do not include markdown wraps."
|
| 206 |
)
|
| 207 |
user_prompt = f"Draft JSON:\n{json.dumps(draft_data)}"
|
|
|
|
| 244 |
segments = stage_1a_segment_transcript(transcript_text_formatted, token)
|
| 245 |
print(f"Segmented into {len(segments)} blocks.")
|
| 246 |
|
| 247 |
+
# Stage 2: Single Pro model call for all drafting
|
| 248 |
+
print("Stage 2: Generating draft comments for all segments (Pro model)...")
|
| 249 |
+
draft_segments = stage_2_generate_all_drafts(segments, doc_text, token)
|
| 250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
# Stage 3: Parallel stylization
|
| 252 |
print("Stage 3: Stylizing comments...")
|
| 253 |
final_segments = []
|