""" content_gen.py — LLM Layer (Local Copy) Generates a structured manifest from a topic prompt. Updated to use DashScope (Alibaba Cloud) with Qwen3.6-Plus. """ import json import re import os from typing import Dict, Any import requests from datetime import datetime class ContentGenerator: # API endpoints # Using the international endpoint as it successfully authenticated with the provided key DASHSCOPE_API_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" def __init__(self, config: Dict[str, Any]): self.config = config # ENFORCE qwen3.6-plus to avoid 404 errors from outdated config values self.llm_model = "qwen3.6-plus" # DashScope API key provided by user self.dashscope_api_key = "sk-ws-H.HRRIYI.hUUL.MEUCIQDv1QpE1B2xqwrl2OUSt1o7XbluYkzzaW1sCkp_FMCrewIgSbClEsN--mLhn2JAWt5kyrmaX30grEhaAGcav2TeLS0" self.api_url = self.DASHSCOPE_API_URL self.scenes_per_video = config.get("scenes_per_video", 8) if not self.dashscope_api_key: raise ValueError("DashScope API key is not provided") def generate_manifest(self, topic: str) -> Dict[str, Any]: """ Generate a manifest (scene list, caption, hashtags) from a topic prompt. Args: topic: User's topic prompt (e.g., "sunset photography tips") Returns: manifest dict with scenes, caption, hashtags """ print(f"[content] Generating manifest for topic: '{topic}' using {self.llm_model}") # Build LLM prompt prompt = self._build_prompt(topic) # Call DashScope API manifest_text = self._call_llm(prompt) # Parse JSON from response manifest = self._parse_manifest(manifest_text, topic) print(f"[content] Manifest ready — {len(manifest['scenes'])} scenes") return manifest def _build_prompt(self, topic: str) -> str: """Build the LLM prompt to generate a manifest.""" return f"""You are a viral TikTok/Reels content expert. Generate a structured JSON manifest for a short-form video. Topic: {topic} Generate exactly {self.scenes_per_video} scenes. Each scene should: - Have a memorable label (3-5 words, can start with ~) - Have an aesthetic image_query (for Pinterest/Google Images search) Also provide: - A catchy title - An engaging caption (hooks viewers to comment/share) - 4-5 trending hashtags Respond with ONLY valid JSON, no markdown or extra text: {{ "title": "...", "scenes": [ {{"label": "...", "image_query": "..."}}, ... ], "caption": "...", "hashtags": ["#...", "#...", ...] }}""" def _call_llm(self, prompt: str) -> str: """Call the LLM via DashScope OpenAI-compatible API.""" headers = { "Authorization": f"Bearer {self.dashscope_api_key}", "Content-Type": "application/json", } payload = { "model": self.llm_model, "messages": [ { "role": "user", "content": prompt } ], "temperature": 0.7, "max_tokens": 2000, } try: response = requests.post( f"{self.api_url}/chat/completions", headers=headers, json=payload, timeout=180 ) response.raise_for_status() data = response.json() # Check for errors in response if "error" in data: raise Exception(f"API error: {data['error']}") content = data.get("choices", [{}])[0].get("message", {}).get("content") if not content: raise Exception("Empty response from LLM") # Remove potential markdown code blocks if the model included them content = re.sub(r'^```json\s*', '', content, flags=re.MULTILINE) content = re.sub(r'\s*```$', '', content, flags=re.MULTILINE) return content.strip() except Exception as e: print(f"[content] ERROR calling LLM: {e}") raise def _parse_manifest(self, text: str, topic: str) -> Dict[str, Any]: """Parse and validate the JSON manifest from LLM response.""" if not text: print(f"[content] ERROR: Empty response from LLM, using fallback manifest") return self._create_fallback_manifest(topic) try: # Try to extract JSON from the response manifest = json.loads(text) except json.JSONDecodeError: # Try to find JSON block in case there's still extra text json_match = re.search(r'\{.*\}', text, re.DOTALL) if json_match: try: manifest = json.loads(json_match.group()) except json.JSONDecodeError: print(f"[content] WARNING: Could not parse LLM JSON, using fallback manifest") return self._create_fallback_manifest(topic) else: print(f"[content] WARNING: No JSON found in response, using fallback manifest") return self._create_fallback_manifest(topic) # Validate structure required_keys = ["title", "scenes", "caption", "hashtags"] for key in required_keys: if key not in manifest: print(f"[content] WARNING: Missing key '{key}' in manifest") fallback = self._create_fallback_manifest(topic) manifest[key] = fallback.get(key) # Ensure exactly scenes_per_video scenes if "scenes" in manifest and isinstance(manifest["scenes"], list): if len(manifest["scenes"]) != self.scenes_per_video: print(f"[content] WARNING: Expected {self.scenes_per_video} scenes, got {len(manifest['scenes'])}") if len(manifest['scenes']) > self.scenes_per_video: manifest["scenes"] = manifest["scenes"][:self.scenes_per_video] else: while len(manifest["scenes"]) < self.scenes_per_video: manifest["scenes"].append({ "label": f"Bonus Tip {len(manifest['scenes'])+1}", "image_query": topic }) # Ensure each scene has label and image_query if "scenes" in manifest and isinstance(manifest["scenes"], list): for i, scene in enumerate(manifest["scenes"]): if not isinstance(scene, dict): manifest["scenes"][i] = {"label": str(scene), "image_query": topic} continue if "label" not in scene: scene["label"] = f"Scene {i+1}" if "image_query" not in scene: scene["image_query"] = topic # Remove emojis from caption and hashtags if "caption" in manifest and isinstance(manifest["caption"], str): manifest["caption"] = self._remove_emojis(manifest["caption"]) if "hashtags" in manifest and isinstance(manifest["hashtags"], list): manifest["hashtags"] = [self._remove_emojis(str(tag)) for tag in manifest["hashtags"]] # Add timestamp manifest["timestamp"] = datetime.now().isoformat() return manifest def _create_fallback_manifest(self, topic: str) -> Dict[str, Any]: """Create a basic fallback manifest if LLM fails.""" return { "title": topic.title(), "scenes": [ {"label": f"Scene {i+1}", "image_query": topic} for i in range(self.scenes_per_video) ], "caption": "Which one hits hardest?", "hashtags": ["#relatable", "#fyp", "#trending"], "timestamp": datetime.now().isoformat() } def _remove_emojis(self, text: str) -> str: """Remove all emojis from text, keeping only ASCII characters and common punctuation.""" emoji_pattern = re.compile( "[" "\U0001F600-\U0001F64F" # Emoticons "\U0001F300-\U0001F5FF" # Symbols & pictographs "\U0001F680-\U0001F6FF" # Transport & map symbols "\U0001F1E0-\U0001F1FF" # Flags (iOS) "\U00002702-\U000027B0" "\U000024C2-\U0001F251" "\U0001f926-\U0001f937" "\U00010000-\U0010ffff" "\u2640-\u2642" "\u2600-\u2B55" "\u200d" "\u23cf" "\u23e9" "\u231a" "\ufe0f" # Dingbats "\u3030" "]+", re.UNICODE ) text = emoji_pattern.sub(' ', text) text = re.sub(r'\s+', ' ', text).strip() return text