suno / video_generation.py
Stanley03's picture
Update video_generation.py
b188eea verified
import os
import io
import base64
import time
import requests
import logging
import random
from typing import Optional
logger = logging.getLogger(__name__)
class FreeVideoGenerator:
"""
Free video generation using Hugging Face Inference API
"""
def __init__(self, hf_token: Optional[str] = None):
self.hf_token = hf_token or os.getenv('HF_TOKEN', '')
# Available free models
self.models = [
"cerspense/zeroscope_v2_576w",
"damo-vilab/text-to-video-ms-1.7b"
]
self.current_model = self.models[0]
self.timeout = 120
self.max_retries = 2
# Video settings
self.width = 576
self.height = 320
self.fps = 8
self.frames = 24
def enhance_prompt_with_context(self, prompt: str) -> str:
"""Enhance video prompts with cinematic context"""
# Cinematic enhancements
cinematic = [
"cinematic, 8k, ultra detailed, high quality, masterpiece",
"epic, dramatic lighting, film grain, cinematic shot, professional",
"beautiful, stunning, visually striking, vivid colors, trending",
"high resolution, detailed, sharp focus, studio quality"
]
# Cultural enhancements for African themes
if any(word in prompt.lower() for word in ['africa', 'kenya', 'tanzania', 'safari', 'wildlife', 'cultural']):
cultural = [
"African style, vibrant colors, cultural elements, traditional",
"East African landscape, warm colors, cultural symbolism",
"African documentary style, natural lighting, authentic",
"Traditional African art style, symbolic, meaningful"
]
enhanced = f"{prompt}, {random.choice(cinematic)}, {random.choice(cultural)}"
else:
enhanced = f"{prompt}, {random.choice(cinematic)}"
# Add technical specs
enhanced += f", {self.width}x{self.height} resolution, {self.fps} fps"
return enhanced
def generate_text_to_video(self, prompt: str) -> Optional[str]:
"""
Generate video from text prompt using Hugging Face API
"""
try:
enhanced_prompt = self.enhance_prompt_with_context(prompt)
headers = {}
if self.hf_token:
headers["Authorization"] = f"Bearer {self.hf_token}"
payload = {
"inputs": enhanced_prompt,
"parameters": {
"num_frames": self.frames,
"num_inference_steps": 25,
"guidance_scale": 7.5,
"fps": self.fps,
"height": self.height,
"width": self.width,
"negative_prompt": "blurry, low quality, distorted, watermark, text"
}
}
for attempt in range(self.max_retries):
try:
logger.info(f"🎬 Generating video (attempt {attempt + 1}): {prompt[:50]}...")
response = requests.post(
f"https://api-inference.huggingface.co/models/{self.current_model}",
headers=headers,
json=payload,
timeout=self.timeout
)
if response.status_code == 200:
video_b64 = base64.b64encode(response.content).decode('utf-8')
return f"data:video/mp4;base64,{video_b64}"
elif response.status_code == 503:
wait_time = (attempt + 1) * 10
logger.info(f"⏳ Model loading, waiting {wait_time}s...")
time.sleep(wait_time)
continue
else:
logger.error(f"Video API error {response.status_code}")
except requests.exceptions.Timeout:
logger.warning(f"⏰ Request timeout, attempt {attempt + 1}")
continue
except Exception as e:
logger.error(f"Video generation error: {e}")
break
except Exception as e:
logger.error(f"Video generation failed: {e}")
return None
def create_cultural_video(self, theme: str, style: str = "animated") -> Optional[str]:
"""
Create videos with African cultural themes
"""
# Cultural themes database
cultural_themes = {
"safari": {
"prompt": "African safari sunset with elephants and giraffes walking, majestic savanna landscape, acacia trees, warm colors",
"styles": {
"animated": "animated, cartoon style, smooth motion, vibrant colors",
"realistic": "realistic, documentary style, cinematic, natural lighting"
}
},
"dance": {
"prompt": "Traditional Maasai warriors dancing, vibrant colors, cultural celebration, energetic movement, community",
"styles": {
"animated": "animated, lively motion, colorful, celebratory",
"realistic": "realistic, documentary footage, authentic, cultural"
}
},
"market": {
"prompt": "Busy African market scene, vibrant colors, people trading goods, lively atmosphere, traditional clothing",
"styles": {
"animated": "animated, bustling market, colorful stalls, lively",
"realistic": "realistic, documentary style, authentic market scene"
}
},
"coastal": {
"prompt": "Swahili coast with traditional dhows sailing, Indian Ocean waves, beach scenery, palm trees, traditional architecture",
"styles": {
"animated": "animated, ocean waves, sailing dhows, coastal life",
"realistic": "realistic, coastal documentary, ocean scenery"
}
},
"wildlife": {
"prompt": "African wildlife documentary style, lions hunting on savanna, dramatic nature scene, wildlife behavior",
"styles": {
"animated": "animated, wildlife cartoon, animal movement",
"realistic": "realistic, nature documentary, wildlife footage"
}
},
"village": {
"prompt": "Traditional African village life, community activities, sunset over huts, daily life, cultural activities",
"styles": {
"animated": "animated, village life, community activities",
"realistic": "realistic, documentary style, authentic village"
}
}
}
# Get theme data
theme_data = cultural_themes.get(theme, cultural_themes["safari"])
base_prompt = theme_data["prompt"]
style_enhancement = theme_data["styles"].get(style, "animated, vibrant colors")
# Combine prompt
full_prompt = f"{base_prompt}, {style_enhancement}, cultural, authentic, East African"
return self.generate_text_to_video(full_prompt)
def get_video_info(self) -> dict:
"""Get information about video generation capabilities"""
return {
"available_models": self.models,
"current_model": self.current_model,
"resolution": f"{self.width}x{self.height}",
"fps": self.fps,
"max_frames": self.frames,
"max_duration": f"{self.frames/self.fps:.1f} seconds",
"cultural_themes": ["safari", "dance", "market", "coastal", "wildlife", "village"],
"styles": ["animated", "realistic"],
"free": True,
"provider": "Hugging Face Inference API",
"creator": "Stanley Samwel Owino"
}