Spaces:
Sleeping
Sleeping
File size: 9,224 Bytes
3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c b352a97 e6c8f7e 98cb49b 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c fb79d27 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c e6c8f7e 3577c5c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 | """
content_gen.py — LLM Layer (Local Copy)
Generates a structured manifest from a topic prompt.
Updated to use DashScope (Alibaba Cloud) with Qwen3.6-Plus.
"""
import json
import re
import os
from typing import Dict, Any
import requests
from datetime import datetime
class ContentGenerator:
# API endpoints
# Using the international endpoint as it successfully authenticated with the provided key
DASHSCOPE_API_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
def __init__(self, config: Dict[str, Any]):
self.config = config
# ENFORCE qwen3.6-plus to avoid 404 errors from outdated config values
self.llm_model = "qwen3.6-plus"
# DashScope API key provided by user
self.dashscope_api_key = "sk-ws-H.HRRIYI.hUUL.MEUCIQDv1QpE1B2xqwrl2OUSt1o7XbluYkzzaW1sCkp_FMCrewIgSbClEsN--mLhn2JAWt5kyrmaX30grEhaAGcav2TeLS0"
self.api_url = self.DASHSCOPE_API_URL
self.scenes_per_video = config.get("scenes_per_video", 8)
if not self.dashscope_api_key:
raise ValueError("DashScope API key is not provided")
def generate_manifest(self, topic: str) -> Dict[str, Any]:
"""
Generate a manifest (scene list, caption, hashtags) from a topic prompt.
Args:
topic: User's topic prompt (e.g., "sunset photography tips")
Returns:
manifest dict with scenes, caption, hashtags
"""
print(f"[content] Generating manifest for topic: '{topic}' using {self.llm_model}")
# Build LLM prompt
prompt = self._build_prompt(topic)
# Call DashScope API
manifest_text = self._call_llm(prompt)
# Parse JSON from response
manifest = self._parse_manifest(manifest_text, topic)
print(f"[content] Manifest ready — {len(manifest['scenes'])} scenes")
return manifest
def _build_prompt(self, topic: str) -> str:
"""Build the LLM prompt to generate a manifest."""
return f"""You are a viral TikTok/Reels content expert. Generate a structured JSON manifest for a short-form video.
Topic: {topic}
Generate exactly {self.scenes_per_video} scenes. Each scene should:
- Have a memorable label (3-5 words, can start with ~)
- Have an aesthetic image_query (for Pinterest/Google Images search)
Also provide:
- A catchy title
- An engaging caption (hooks viewers to comment/share)
- 4-5 trending hashtags
Respond with ONLY valid JSON, no markdown or extra text:
{{
"title": "...",
"scenes": [
{{"label": "...", "image_query": "..."}},
...
],
"caption": "...",
"hashtags": ["#...", "#...", ...]
}}"""
def _call_llm(self, prompt: str) -> str:
"""Call the LLM via DashScope OpenAI-compatible API."""
headers = {
"Authorization": f"Bearer {self.dashscope_api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.llm_model,
"messages": [
{
"role": "user",
"content": prompt
}
],
"temperature": 0.7,
"max_tokens": 2000,
}
try:
response = requests.post(
f"{self.api_url}/chat/completions",
headers=headers,
json=payload,
timeout=180
)
response.raise_for_status()
data = response.json()
# Check for errors in response
if "error" in data:
raise Exception(f"API error: {data['error']}")
content = data.get("choices", [{}])[0].get("message", {}).get("content")
if not content:
raise Exception("Empty response from LLM")
# Remove potential markdown code blocks if the model included them
content = re.sub(r'^```json\s*', '', content, flags=re.MULTILINE)
content = re.sub(r'\s*```$', '', content, flags=re.MULTILINE)
return content.strip()
except Exception as e:
print(f"[content] ERROR calling LLM: {e}")
raise
def _parse_manifest(self, text: str, topic: str) -> Dict[str, Any]:
"""Parse and validate the JSON manifest from LLM response."""
if not text:
print(f"[content] ERROR: Empty response from LLM, using fallback manifest")
return self._create_fallback_manifest(topic)
try:
# Try to extract JSON from the response
manifest = json.loads(text)
except json.JSONDecodeError:
# Try to find JSON block in case there's still extra text
json_match = re.search(r'\{.*\}', text, re.DOTALL)
if json_match:
try:
manifest = json.loads(json_match.group())
except json.JSONDecodeError:
print(f"[content] WARNING: Could not parse LLM JSON, using fallback manifest")
return self._create_fallback_manifest(topic)
else:
print(f"[content] WARNING: No JSON found in response, using fallback manifest")
return self._create_fallback_manifest(topic)
# Validate structure
required_keys = ["title", "scenes", "caption", "hashtags"]
for key in required_keys:
if key not in manifest:
print(f"[content] WARNING: Missing key '{key}' in manifest")
fallback = self._create_fallback_manifest(topic)
manifest[key] = fallback.get(key)
# Ensure exactly scenes_per_video scenes
if "scenes" in manifest and isinstance(manifest["scenes"], list):
if len(manifest["scenes"]) != self.scenes_per_video:
print(f"[content] WARNING: Expected {self.scenes_per_video} scenes, got {len(manifest['scenes'])}")
if len(manifest['scenes']) > self.scenes_per_video:
manifest["scenes"] = manifest["scenes"][:self.scenes_per_video]
else:
while len(manifest["scenes"]) < self.scenes_per_video:
manifest["scenes"].append({
"label": f"Bonus Tip {len(manifest['scenes'])+1}",
"image_query": topic
})
# Ensure each scene has label and image_query
if "scenes" in manifest and isinstance(manifest["scenes"], list):
for i, scene in enumerate(manifest["scenes"]):
if not isinstance(scene, dict):
manifest["scenes"][i] = {"label": str(scene), "image_query": topic}
continue
if "label" not in scene:
scene["label"] = f"Scene {i+1}"
if "image_query" not in scene:
scene["image_query"] = topic
# Remove emojis from caption and hashtags
if "caption" in manifest and isinstance(manifest["caption"], str):
manifest["caption"] = self._remove_emojis(manifest["caption"])
if "hashtags" in manifest and isinstance(manifest["hashtags"], list):
manifest["hashtags"] = [self._remove_emojis(str(tag)) for tag in manifest["hashtags"]]
# Add timestamp
manifest["timestamp"] = datetime.now().isoformat()
return manifest
def _create_fallback_manifest(self, topic: str) -> Dict[str, Any]:
"""Create a basic fallback manifest if LLM fails."""
return {
"title": topic.title(),
"scenes": [
{"label": f"Scene {i+1}", "image_query": topic}
for i in range(self.scenes_per_video)
],
"caption": "Which one hits hardest?",
"hashtags": ["#relatable", "#fyp", "#trending"],
"timestamp": datetime.now().isoformat()
}
def _remove_emojis(self, text: str) -> str:
"""Remove all emojis from text, keeping only ASCII characters and common punctuation."""
emoji_pattern = re.compile(
"["
"\U0001F600-\U0001F64F" # Emoticons
"\U0001F300-\U0001F5FF" # Symbols & pictographs
"\U0001F680-\U0001F6FF" # Transport & map symbols
"\U0001F1E0-\U0001F1FF" # Flags (iOS)
"\U00002702-\U000027B0"
"\U000024C2-\U0001F251"
"\U0001f926-\U0001f937"
"\U00010000-\U0010ffff"
"\u2640-\u2642"
"\u2600-\u2B55"
"\u200d"
"\u23cf"
"\u23e9"
"\u231a"
"\ufe0f" # Dingbats
"\u3030"
"]+", re.UNICODE
)
text = emoji_pattern.sub(' ', text)
text = re.sub(r'\s+', ' ', text).strip()
return text
|