Spaces:

factorstudios
/

content_gen

Sleeping

App Files Files Community

content_gen / content_gen.py

factorstudios

Update content_gen.py

98cb49b verified 29 days ago

Raw

History Blame Contribute Delete

9.22 kB

	"""
	content_gen.py — LLM Layer (Local Copy)
	Generates a structured manifest from a topic prompt.
	Updated to use DashScope (Alibaba Cloud) with Qwen3.6-Plus.
	"""
	import json
	import re
	import os
	from typing import Dict, Any
	import requests
	from datetime import datetime


	class ContentGenerator:
	# API endpoints
	# Using the international endpoint as it successfully authenticated with the provided key
	DASHSCOPE_API_URL = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"

	def __init__(self, config: Dict[str, Any]):
	self.config = config
	# ENFORCE qwen3.6-plus to avoid 404 errors from outdated config values
	self.llm_model = "qwen3.6-plus"

	# DashScope API key provided by user
	self.dashscope_api_key = "sk-ws-H.HRRIYI.hUUL.MEUCIQDv1QpE1B2xqwrl2OUSt1o7XbluYkzzaW1sCkp_FMCrewIgSbClEsN--mLhn2JAWt5kyrmaX30grEhaAGcav2TeLS0"
	self.api_url = self.DASHSCOPE_API_URL
	self.scenes_per_video = config.get("scenes_per_video", 8)

	if not self.dashscope_api_key:
	raise ValueError("DashScope API key is not provided")

	def generate_manifest(self, topic: str) -> Dict[str, Any]:
	"""
	Generate a manifest (scene list, caption, hashtags) from a topic prompt.

	Args:
	topic: User's topic prompt (e.g., "sunset photography tips")

	Returns:
	manifest dict with scenes, caption, hashtags
	"""
	print(f"[content] Generating manifest for topic: '{topic}' using {self.llm_model}")

	# Build LLM prompt
	prompt = self._build_prompt(topic)

	# Call DashScope API
	manifest_text = self._call_llm(prompt)

	# Parse JSON from response
	manifest = self._parse_manifest(manifest_text, topic)

	print(f"[content] Manifest ready — {len(manifest['scenes'])} scenes")
	return manifest

	def _build_prompt(self, topic: str) -> str:
	"""Build the LLM prompt to generate a manifest."""
	return f"""You are a viral TikTok/Reels content expert. Generate a structured JSON manifest for a short-form video.

	Topic: {topic}

	Generate exactly {self.scenes_per_video} scenes. Each scene should:
	- Have a memorable label (3-5 words, can start with ~)
	- Have an aesthetic image_query (for Pinterest/Google Images search)

	Also provide:
	- A catchy title
	- An engaging caption (hooks viewers to comment/share)
	- 4-5 trending hashtags

	Respond with ONLY valid JSON, no markdown or extra text:

	{{
	"title": "...",
	"scenes": [
	{{"label": "...", "image_query": "..."}},
	...
	],
	"caption": "...",
	"hashtags": ["#...", "#...", ...]
	}}"""

	def _call_llm(self, prompt: str) -> str:
	"""Call the LLM via DashScope OpenAI-compatible API."""
	headers = {
	"Authorization": f"Bearer {self.dashscope_api_key}",
	"Content-Type": "application/json",
	}

	payload = {
	"model": self.llm_model,
	"messages": [
	{
	"role": "user",
	"content": prompt
	}
	],
	"temperature": 0.7,
	"max_tokens": 2000,
	}

	try:
	response = requests.post(
	f"{self.api_url}/chat/completions",
	headers=headers,
	json=payload,
	timeout=180
	)
	response.raise_for_status()
	data = response.json()

	# Check for errors in response
	if "error" in data:
	raise Exception(f"API error: {data['error']}")

	content = data.get("choices", [{}])[0].get("message", {}).get("content")
	if not content:
	raise Exception("Empty response from LLM")

	# Remove potential markdown code blocks if the model included them
	content = re.sub(r'^```json\s*', '', content, flags=re.MULTILINE)
	content = re.sub(r'\s*```$', '', content, flags=re.MULTILINE)

	return content.strip()
	except Exception as e:
	print(f"[content] ERROR calling LLM: {e}")
	raise

	def _parse_manifest(self, text: str, topic: str) -> Dict[str, Any]:
	"""Parse and validate the JSON manifest from LLM response."""
	if not text:
	print(f"[content] ERROR: Empty response from LLM, using fallback manifest")
	return self._create_fallback_manifest(topic)

	try:
	# Try to extract JSON from the response
	manifest = json.loads(text)
	except json.JSONDecodeError:
	# Try to find JSON block in case there's still extra text
	json_match = re.search(r'\{.*\}', text, re.DOTALL)
	if json_match:
	try:
	manifest = json.loads(json_match.group())
	except json.JSONDecodeError:
	print(f"[content] WARNING: Could not parse LLM JSON, using fallback manifest")
	return self._create_fallback_manifest(topic)
	else:
	print(f"[content] WARNING: No JSON found in response, using fallback manifest")
	return self._create_fallback_manifest(topic)

	# Validate structure
	required_keys = ["title", "scenes", "caption", "hashtags"]
	for key in required_keys:
	if key not in manifest:
	print(f"[content] WARNING: Missing key '{key}' in manifest")
	fallback = self._create_fallback_manifest(topic)
	manifest[key] = fallback.get(key)

	# Ensure exactly scenes_per_video scenes
	if "scenes" in manifest and isinstance(manifest["scenes"], list):
	if len(manifest["scenes"]) != self.scenes_per_video:
	print(f"[content] WARNING: Expected {self.scenes_per_video} scenes, got {len(manifest['scenes'])}")
	if len(manifest['scenes']) > self.scenes_per_video:
	manifest["scenes"] = manifest["scenes"][:self.scenes_per_video]
	else:
	while len(manifest["scenes"]) < self.scenes_per_video:
	manifest["scenes"].append({
	"label": f"Bonus Tip {len(manifest['scenes'])+1}",
	"image_query": topic
	})

	# Ensure each scene has label and image_query
	if "scenes" in manifest and isinstance(manifest["scenes"], list):
	for i, scene in enumerate(manifest["scenes"]):
	if not isinstance(scene, dict):
	manifest["scenes"][i] = {"label": str(scene), "image_query": topic}
	continue
	if "label" not in scene:
	scene["label"] = f"Scene {i+1}"
	if "image_query" not in scene:
	scene["image_query"] = topic

	# Remove emojis from caption and hashtags
	if "caption" in manifest and isinstance(manifest["caption"], str):
	manifest["caption"] = self._remove_emojis(manifest["caption"])
	if "hashtags" in manifest and isinstance(manifest["hashtags"], list):
	manifest["hashtags"] = [self._remove_emojis(str(tag)) for tag in manifest["hashtags"]]

	# Add timestamp
	manifest["timestamp"] = datetime.now().isoformat()

	return manifest

	def _create_fallback_manifest(self, topic: str) -> Dict[str, Any]:
	"""Create a basic fallback manifest if LLM fails."""
	return {
	"title": topic.title(),
	"scenes": [
	{"label": f"Scene {i+1}", "image_query": topic}
	for i in range(self.scenes_per_video)
	],
	"caption": "Which one hits hardest?",
	"hashtags": ["#relatable", "#fyp", "#trending"],
	"timestamp": datetime.now().isoformat()
	}

	def _remove_emojis(self, text: str) -> str:
	"""Remove all emojis from text, keeping only ASCII characters and common punctuation."""
	emoji_pattern = re.compile(
	"["
	"\U0001F600-\U0001F64F" # Emoticons
	"\U0001F300-\U0001F5FF" # Symbols & pictographs
	"\U0001F680-\U0001F6FF" # Transport & map symbols
	"\U0001F1E0-\U0001F1FF" # Flags (iOS)
	"\U00002702-\U000027B0"
	"\U000024C2-\U0001F251"
	"\U0001f926-\U0001f937"
	"\U00010000-\U0010ffff"
	"\u2640-\u2642"
	"\u2600-\u2B55"
	"\u200d"
	"\u23cf"
	"\u23e9"
	"\u231a"
	"\ufe0f" # Dingbats
	"\u3030"
	"]+", re.UNICODE
	)
	text = emoji_pattern.sub(' ', text)
	text = re.sub(r'\s+', ' ', text).strip()
	return text