Spaces:
Sleeping
Sleeping
| """ | |
| content_gen.py — LLM Layer (Local Copy) | |
| Generates a structured manifest from a topic prompt. | |
| Updated to use DashScope (Alibaba Cloud) with Qwen3.6-Plus. | |
| """ | |
| import json | |
| import re | |
| import os | |
| from typing import Dict, Any | |
| import requests | |
| from datetime import datetime | |
| class ContentGenerator: | |
| # API endpoints | |
| # Using the international endpoint as it successfully authenticated with the provided key | |
| DASHSCOPE_API_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1" | |
| def __init__(self, config: Dict[str, Any]): | |
| self.config = config | |
| # ENFORCE qwen3.6-plus to avoid 404 errors from outdated config values | |
| self.llm_model = "qwen3.6-plus" | |
| # DashScope API key provided by user | |
| self.dashscope_api_key = "sk-ws-H.HRRIYI.hUUL.MEUCIQDv1QpE1B2xqwrl2OUSt1o7XbluYkzzaW1sCkp_FMCrewIgSbClEsN--mLhn2JAWt5kyrmaX30grEhaAGcav2TeLS0" | |
| self.api_url = self.DASHSCOPE_API_URL | |
| self.scenes_per_video = config.get("scenes_per_video", 8) | |
| if not self.dashscope_api_key: | |
| raise ValueError("DashScope API key is not provided") | |
| def generate_manifest(self, topic: str) -> Dict[str, Any]: | |
| """ | |
| Generate a manifest (scene list, caption, hashtags) from a topic prompt. | |
| Args: | |
| topic: User's topic prompt (e.g., "sunset photography tips") | |
| Returns: | |
| manifest dict with scenes, caption, hashtags | |
| """ | |
| print(f"[content] Generating manifest for topic: '{topic}' using {self.llm_model}") | |
| # Build LLM prompt | |
| prompt = self._build_prompt(topic) | |
| # Call DashScope API | |
| manifest_text = self._call_llm(prompt) | |
| # Parse JSON from response | |
| manifest = self._parse_manifest(manifest_text, topic) | |
| print(f"[content] Manifest ready — {len(manifest['scenes'])} scenes") | |
| return manifest | |
| def _build_prompt(self, topic: str) -> str: | |
| """Build the LLM prompt to generate a manifest.""" | |
| return f"""You are a viral TikTok/Reels content expert. Generate a structured JSON manifest for a short-form video. | |
| Topic: {topic} | |
| Generate exactly {self.scenes_per_video} scenes. Each scene should: | |
| - Have a memorable label (3-5 words, can start with ~) | |
| - Have an aesthetic image_query (for Pinterest/Google Images search) | |
| Also provide: | |
| - A catchy title | |
| - An engaging caption (hooks viewers to comment/share) | |
| - 4-5 trending hashtags | |
| Respond with ONLY valid JSON, no markdown or extra text: | |
| {{ | |
| "title": "...", | |
| "scenes": [ | |
| {{"label": "...", "image_query": "..."}}, | |
| ... | |
| ], | |
| "caption": "...", | |
| "hashtags": ["#...", "#...", ...] | |
| }}""" | |
| def _call_llm(self, prompt: str) -> str: | |
| """Call the LLM via DashScope OpenAI-compatible API.""" | |
| headers = { | |
| "Authorization": f"Bearer {self.dashscope_api_key}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "model": self.llm_model, | |
| "messages": [ | |
| { | |
| "role": "user", | |
| "content": prompt | |
| } | |
| ], | |
| "temperature": 0.7, | |
| "max_tokens": 2000, | |
| } | |
| try: | |
| response = requests.post( | |
| f"{self.api_url}/chat/completions", | |
| headers=headers, | |
| json=payload, | |
| timeout=180 | |
| ) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Check for errors in response | |
| if "error" in data: | |
| raise Exception(f"API error: {data['error']}") | |
| content = data.get("choices", [{}])[0].get("message", {}).get("content") | |
| if not content: | |
| raise Exception("Empty response from LLM") | |
| # Remove potential markdown code blocks if the model included them | |
| content = re.sub(r'^```json\s*', '', content, flags=re.MULTILINE) | |
| content = re.sub(r'\s*```$', '', content, flags=re.MULTILINE) | |
| return content.strip() | |
| except Exception as e: | |
| print(f"[content] ERROR calling LLM: {e}") | |
| raise | |
| def _parse_manifest(self, text: str, topic: str) -> Dict[str, Any]: | |
| """Parse and validate the JSON manifest from LLM response.""" | |
| if not text: | |
| print(f"[content] ERROR: Empty response from LLM, using fallback manifest") | |
| return self._create_fallback_manifest(topic) | |
| try: | |
| # Try to extract JSON from the response | |
| manifest = json.loads(text) | |
| except json.JSONDecodeError: | |
| # Try to find JSON block in case there's still extra text | |
| json_match = re.search(r'\{.*\}', text, re.DOTALL) | |
| if json_match: | |
| try: | |
| manifest = json.loads(json_match.group()) | |
| except json.JSONDecodeError: | |
| print(f"[content] WARNING: Could not parse LLM JSON, using fallback manifest") | |
| return self._create_fallback_manifest(topic) | |
| else: | |
| print(f"[content] WARNING: No JSON found in response, using fallback manifest") | |
| return self._create_fallback_manifest(topic) | |
| # Validate structure | |
| required_keys = ["title", "scenes", "caption", "hashtags"] | |
| for key in required_keys: | |
| if key not in manifest: | |
| print(f"[content] WARNING: Missing key '{key}' in manifest") | |
| fallback = self._create_fallback_manifest(topic) | |
| manifest[key] = fallback.get(key) | |
| # Ensure exactly scenes_per_video scenes | |
| if "scenes" in manifest and isinstance(manifest["scenes"], list): | |
| if len(manifest["scenes"]) != self.scenes_per_video: | |
| print(f"[content] WARNING: Expected {self.scenes_per_video} scenes, got {len(manifest['scenes'])}") | |
| if len(manifest['scenes']) > self.scenes_per_video: | |
| manifest["scenes"] = manifest["scenes"][:self.scenes_per_video] | |
| else: | |
| while len(manifest["scenes"]) < self.scenes_per_video: | |
| manifest["scenes"].append({ | |
| "label": f"Bonus Tip {len(manifest['scenes'])+1}", | |
| "image_query": topic | |
| }) | |
| # Ensure each scene has label and image_query | |
| if "scenes" in manifest and isinstance(manifest["scenes"], list): | |
| for i, scene in enumerate(manifest["scenes"]): | |
| if not isinstance(scene, dict): | |
| manifest["scenes"][i] = {"label": str(scene), "image_query": topic} | |
| continue | |
| if "label" not in scene: | |
| scene["label"] = f"Scene {i+1}" | |
| if "image_query" not in scene: | |
| scene["image_query"] = topic | |
| # Remove emojis from caption and hashtags | |
| if "caption" in manifest and isinstance(manifest["caption"], str): | |
| manifest["caption"] = self._remove_emojis(manifest["caption"]) | |
| if "hashtags" in manifest and isinstance(manifest["hashtags"], list): | |
| manifest["hashtags"] = [self._remove_emojis(str(tag)) for tag in manifest["hashtags"]] | |
| # Add timestamp | |
| manifest["timestamp"] = datetime.now().isoformat() | |
| return manifest | |
| def _create_fallback_manifest(self, topic: str) -> Dict[str, Any]: | |
| """Create a basic fallback manifest if LLM fails.""" | |
| return { | |
| "title": topic.title(), | |
| "scenes": [ | |
| {"label": f"Scene {i+1}", "image_query": topic} | |
| for i in range(self.scenes_per_video) | |
| ], | |
| "caption": "Which one hits hardest?", | |
| "hashtags": ["#relatable", "#fyp", "#trending"], | |
| "timestamp": datetime.now().isoformat() | |
| } | |
| def _remove_emojis(self, text: str) -> str: | |
| """Remove all emojis from text, keeping only ASCII characters and common punctuation.""" | |
| emoji_pattern = re.compile( | |
| "[" | |
| "\U0001F600-\U0001F64F" # Emoticons | |
| "\U0001F300-\U0001F5FF" # Symbols & pictographs | |
| "\U0001F680-\U0001F6FF" # Transport & map symbols | |
| "\U0001F1E0-\U0001F1FF" # Flags (iOS) | |
| "\U00002702-\U000027B0" | |
| "\U000024C2-\U0001F251" | |
| "\U0001f926-\U0001f937" | |
| "\U00010000-\U0010ffff" | |
| "\u2640-\u2642" | |
| "\u2600-\u2B55" | |
| "\u200d" | |
| "\u23cf" | |
| "\u23e9" | |
| "\u231a" | |
| "\ufe0f" # Dingbats | |
| "\u3030" | |
| "]+", re.UNICODE | |
| ) | |
| text = emoji_pattern.sub(' ', text) | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| return text | |