File size: 9,224 Bytes
3577c5c
 
 
e6c8f7e
3577c5c
 
 
 
 
 
 
 
 
 
 
e6c8f7e
 
3577c5c
 
 
b352a97
 
 
e6c8f7e
 
 
98cb49b
3577c5c
e6c8f7e
 
3577c5c
 
 
 
 
 
 
 
 
 
 
e6c8f7e
3577c5c
 
 
 
e6c8f7e
3577c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6c8f7e
3577c5c
e6c8f7e
3577c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6c8f7e
3577c5c
 
fb79d27
3577c5c
 
 
 
 
 
 
 
 
 
 
 
e6c8f7e
 
 
 
 
3577c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
e6c8f7e
 
 
 
 
 
 
 
 
 
 
3577c5c
 
 
 
 
e6c8f7e
 
 
3577c5c
 
e6c8f7e
 
 
 
 
 
 
 
 
 
 
3577c5c
 
e6c8f7e
 
 
 
 
 
 
 
 
3577c5c
 
e6c8f7e
 
 
 
3577c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
"""

content_gen.py — LLM Layer (Local Copy)

Generates a structured manifest from a topic prompt.

Updated to use DashScope (Alibaba Cloud) with Qwen3.6-Plus.

"""
import json
import re
import os
from typing import Dict, Any
import requests
from datetime import datetime


class ContentGenerator:
    # API endpoints
    # Using the international endpoint as it successfully authenticated with the provided key
    DASHSCOPE_API_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"
    
    def __init__(self, config: Dict[str, Any]):
        self.config = config
        # ENFORCE qwen3.6-plus to avoid 404 errors from outdated config values
        self.llm_model = "qwen3.6-plus"
        
        # DashScope API key provided by user
        self.dashscope_api_key = "sk-ws-H.HRRIYI.hUUL.MEUCIQDv1QpE1B2xqwrl2OUSt1o7XbluYkzzaW1sCkp_FMCrewIgSbClEsN--mLhn2JAWt5kyrmaX30grEhaAGcav2TeLS0"
        self.api_url = self.DASHSCOPE_API_URL
        self.scenes_per_video = config.get("scenes_per_video", 8)
        
        if not self.dashscope_api_key:
            raise ValueError("DashScope API key is not provided")

    def generate_manifest(self, topic: str) -> Dict[str, Any]:
        """

        Generate a manifest (scene list, caption, hashtags) from a topic prompt.

        

        Args:

            topic: User's topic prompt (e.g., "sunset photography tips")

        

        Returns:

            manifest dict with scenes, caption, hashtags

        """
        print(f"[content] Generating manifest for topic: '{topic}' using {self.llm_model}")
        
        # Build LLM prompt
        prompt = self._build_prompt(topic)
        
        # Call DashScope API
        manifest_text = self._call_llm(prompt)
        
        # Parse JSON from response
        manifest = self._parse_manifest(manifest_text, topic)
        
        print(f"[content] Manifest ready — {len(manifest['scenes'])} scenes")
        return manifest

    def _build_prompt(self, topic: str) -> str:
        """Build the LLM prompt to generate a manifest."""
        return f"""You are a viral TikTok/Reels content expert. Generate a structured JSON manifest for a short-form video.



Topic: {topic}



Generate exactly {self.scenes_per_video} scenes. Each scene should:

- Have a memorable label (3-5 words, can start with ~)

- Have an aesthetic image_query (for Pinterest/Google Images search)



Also provide:

- A catchy title

- An engaging caption (hooks viewers to comment/share)

- 4-5 trending hashtags



Respond with ONLY valid JSON, no markdown or extra text:



{{

  "title": "...",

  "scenes": [

    {{"label": "...", "image_query": "..."}},

    ...

  ],

  "caption": "...",

  "hashtags": ["#...", "#...", ...]

}}"""

    def _call_llm(self, prompt: str) -> str:
        """Call the LLM via DashScope OpenAI-compatible API."""
        headers = {
            "Authorization": f"Bearer {self.dashscope_api_key}",
            "Content-Type": "application/json",
        }
        
        payload = {
            "model": self.llm_model,
            "messages": [
                {
                    "role": "user",
                    "content": prompt
                }
            ],
            "temperature": 0.7,
            "max_tokens": 2000,
        }
        
        try:
            response = requests.post(
                f"{self.api_url}/chat/completions",
                headers=headers,
                json=payload,
                timeout=180
            )
            response.raise_for_status()
            data = response.json()
            
            # Check for errors in response
            if "error" in data:
                raise Exception(f"API error: {data['error']}")
            
            content = data.get("choices", [{}])[0].get("message", {}).get("content")
            if not content:
                raise Exception("Empty response from LLM")
            
            # Remove potential markdown code blocks if the model included them
            content = re.sub(r'^```json\s*', '', content, flags=re.MULTILINE)
            content = re.sub(r'\s*```$', '', content, flags=re.MULTILINE)
            
            return content.strip()
        except Exception as e:
            print(f"[content] ERROR calling LLM: {e}")
            raise

    def _parse_manifest(self, text: str, topic: str) -> Dict[str, Any]:
        """Parse and validate the JSON manifest from LLM response."""
        if not text:
            print(f"[content] ERROR: Empty response from LLM, using fallback manifest")
            return self._create_fallback_manifest(topic)
        
        try:
            # Try to extract JSON from the response
            manifest = json.loads(text)
        except json.JSONDecodeError:
            # Try to find JSON block in case there's still extra text
            json_match = re.search(r'\{.*\}', text, re.DOTALL)
            if json_match:
                try:
                    manifest = json.loads(json_match.group())
                except json.JSONDecodeError:
                    print(f"[content] WARNING: Could not parse LLM JSON, using fallback manifest")
                    return self._create_fallback_manifest(topic)
            else:
                print(f"[content] WARNING: No JSON found in response, using fallback manifest")
                return self._create_fallback_manifest(topic)
        
        # Validate structure
        required_keys = ["title", "scenes", "caption", "hashtags"]
        for key in required_keys:
            if key not in manifest:
                print(f"[content] WARNING: Missing key '{key}' in manifest")
                fallback = self._create_fallback_manifest(topic)
                manifest[key] = fallback.get(key)
        
        # Ensure exactly scenes_per_video scenes
        if "scenes" in manifest and isinstance(manifest["scenes"], list):
            if len(manifest["scenes"]) != self.scenes_per_video:
                print(f"[content] WARNING: Expected {self.scenes_per_video} scenes, got {len(manifest['scenes'])}")
                if len(manifest['scenes']) > self.scenes_per_video:
                    manifest["scenes"] = manifest["scenes"][:self.scenes_per_video]
                else:
                    while len(manifest["scenes"]) < self.scenes_per_video:
                        manifest["scenes"].append({
                            "label": f"Bonus Tip {len(manifest['scenes'])+1}",
                            "image_query": topic
                        })
        
        # Ensure each scene has label and image_query
        if "scenes" in manifest and isinstance(manifest["scenes"], list):
            for i, scene in enumerate(manifest["scenes"]):
                if not isinstance(scene, dict):
                    manifest["scenes"][i] = {"label": str(scene), "image_query": topic}
                    continue
                if "label" not in scene:
                    scene["label"] = f"Scene {i+1}"
                if "image_query" not in scene:
                    scene["image_query"] = topic
        
        # Remove emojis from caption and hashtags
        if "caption" in manifest and isinstance(manifest["caption"], str):
            manifest["caption"] = self._remove_emojis(manifest["caption"])
        if "hashtags" in manifest and isinstance(manifest["hashtags"], list):
            manifest["hashtags"] = [self._remove_emojis(str(tag)) for tag in manifest["hashtags"]]
        
        # Add timestamp
        manifest["timestamp"] = datetime.now().isoformat()
        
        return manifest

    def _create_fallback_manifest(self, topic: str) -> Dict[str, Any]:
        """Create a basic fallback manifest if LLM fails."""
        return {
            "title": topic.title(),
            "scenes": [
                {"label": f"Scene {i+1}", "image_query": topic}
                for i in range(self.scenes_per_video)
            ],
            "caption": "Which one hits hardest?",
            "hashtags": ["#relatable", "#fyp", "#trending"],
            "timestamp": datetime.now().isoformat()
        }

    def _remove_emojis(self, text: str) -> str:
        """Remove all emojis from text, keeping only ASCII characters and common punctuation."""
        emoji_pattern = re.compile(
            "["
            "\U0001F600-\U0001F64F"  # Emoticons
            "\U0001F300-\U0001F5FF"  # Symbols & pictographs
            "\U0001F680-\U0001F6FF"  # Transport & map symbols
            "\U0001F1E0-\U0001F1FF"  # Flags (iOS)
            "\U00002702-\U000027B0"
            "\U000024C2-\U0001F251"
            "\U0001f926-\U0001f937"
            "\U00010000-\U0010ffff"
            "\u2640-\u2642"
            "\u2600-\u2B55"
            "\u200d"
            "\u23cf"
            "\u23e9"
            "\u231a"
            "\ufe0f"  # Dingbats
            "\u3030"
            "]+", re.UNICODE
        )
        text = emoji_pattern.sub(' ', text)
        text = re.sub(r'\s+', ' ', text).strip()
        return text