| | import databases |
| | import orm |
| | import asyncio, os |
| | import uuid, random |
| | from pydub import AudioSegment |
| | from .DescriptAPI import Speak |
| | from .ElevenLab import ElevenLab |
| | from .Vercel import AsyncImageGenerator |
| | from .Video3d import VideoGenerator |
| | import aiohttp |
| | from typing import List |
| | from pydantic import BaseModel |
| | import tempfile |
| | import json |
| |
|
| | SUPABASE = os.environ.get("SUPABASE", "RANDOM_STRING") |
| | database_url = SUPABASE |
| | database = databases.Database(database_url) |
| | models = orm.ModelRegistry(database=database) |
| |
|
| |
|
| | class WordAlignment(BaseModel): |
| | text: str |
| | alignedWord: str |
| | start: float |
| | end: float |
| | hasFailedAlignment: bool |
| |
|
| | @classmethod |
| | def from_old_format(cls, data: dict, offset: float = 0.0): |
| | return cls( |
| | text=data["word"], |
| | alignedWord=data["alignedWord"], |
| | start=data["startTime"] + offset, |
| | end=data["endTime"] + offset, |
| | hasFailedAlignment=data["hasFailedAlignment"], |
| | ) |
| |
|
| |
|
| | def transform_alignment_data(data: List[dict], offset: float = 0.0) -> List[dict]: |
| | return [WordAlignment.from_old_format(item, offset).model_dump() for item in data] |
| |
|
| |
|
| | class Project(orm.Model): |
| | tablename = "projects" |
| | start = 0 |
| | registry = models |
| | fields = { |
| | "id": orm.Integer(primary_key=True), |
| | "name": orm.String(max_length=10_000), |
| | "aspect_ratio": orm.Float(allow_null=True, default=0), |
| | "transcript": orm.JSON(allow_null=True, default=[]), |
| | "duration": orm.Integer(allow_null=True, default=0), |
| | "assets": orm.JSON(allow_null=True, default=[]), |
| | "links": orm.JSON(allow_null=True, default=[]), |
| | "constants": orm.JSON(allow_null=True, default={}), |
| | } |
| |
|
| | async def get_all_scenes(self): |
| | return await Scene.objects.filter(project=self).order_by("id").all() |
| |
|
| | async def generate_json(self): |
| | project_scenes: List[Scene] = await self.get_all_scenes() |
| | self.links = [] |
| | self.assets = [] |
| | image_assets = [] |
| | video_assets = [] |
| | audio_assets = [] |
| | text_stream = [] |
| |
|
| | transitions = [ |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | "Happy_transparent.webm", |
| | |
| | |
| | |
| | ] |
| |
|
| | self.links.append( |
| | { |
| | "file_name": "sfx_1.mp3", |
| | "link": "https://dm0qx8t0i9gc9.cloudfront.net/previews/audio/BsTwCwBHBjzwub4i4/click-match_My50GP4u_NWM.mp3?type=preview&origin=AUDIOBLOCKS×tamp_ms=1715843203035&publicKey=kUhrS9sKVrQMTvByQMAGMM0jwRbJ4s31HTPVkfDGmwGhYqzmWJHsjIw5fZCkI7ba&organizationId=105711&apiVersion=2.0&stockItemId=28820&resolution=&endUserId=414d29f16694d76c58e7998200a8dcf6f28dc165&projectId=f734c6d7-e39d-4c1d-8f41-417f94cd37ce&searchId=adb77624-5919-41ee-84c6-58e7af098a6d&searchPageId=9124f65b-3e21-47ac-af6b-81387328b7b5", |
| | } |
| | ) |
| | for scene in project_scenes: |
| | _, file_name = os.path.split(scene.narration_path) |
| | self.duration += scene.narration_duration |
| | self.links.append({"file_name": file_name, "link": scene.narration_link}) |
| |
|
| | |
| | temp = await scene.generate_scene_transcript(offset=self.start) |
| | await asyncio.sleep(1) |
| | end_word = temp[-1] |
| |
|
| | |
| | audio_assets.append( |
| | { |
| | "type": "audio", |
| | "name": file_name, |
| | "start": self.start, |
| | "end": end_word["start"], |
| | "props": { |
| | "startFrom": 0, |
| | "endAt": end_word["start"] * 30, |
| | |
| | }, |
| | } |
| | ) |
| | text_stream.extend(temp[:-1]) |
| |
|
| | sample_image_extension = scene.images[0].split(".")[-1] |
| |
|
| | if sample_image_extension == "mp4": |
| | |
| | for image in scene.images: |
| | file_name = str(uuid.uuid4()) + ".mp4" |
| | self.links.append({"file_name": file_name, "link": image}) |
| | video_assets.append( |
| | { |
| | "type": "video", |
| | "name": file_name, |
| | "start": self.start, |
| | "loop": "true", |
| | "end": self.start + scene.image_duration, |
| | "props": { |
| | "volume": 0, |
| | "startFrom": 1 * 30, |
| | "endAt": 2.9 * 30, |
| | "playbackRate": 0.7, |
| | "style": { |
| | "transform": "translate(-50%, -50%)", |
| | "position": "absolute", |
| | "top": "50%", |
| | "left": "50%", |
| | "width": 1080, |
| | "height": 1920, |
| | "objectFit": "cover", |
| | }, |
| | }, |
| | } |
| | ) |
| | self.start = self.start + scene.image_duration |
| |
|
| | else: |
| | |
| | for image in scene.images: |
| | file_name = str(uuid.uuid4()) + ".png" |
| | self.links.append({"file_name": file_name, "link": image}) |
| | image_assets.append( |
| | { |
| | "type": "image", |
| | "name": file_name, |
| | "start": self.start, |
| | "end": self.start + scene.image_duration, |
| | } |
| | ) |
| | self.start = self.start + scene.image_duration |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | self.assets.append({"type": "audio", "sequence": audio_assets}) |
| | |
| | self.assets.append({"type": "image", "sequence": image_assets}) |
| | self.assets.append( |
| | {"type": "video", "sequence": video_assets}, |
| | ) |
| | self.constants = { |
| | "duration": self.duration * 30, |
| | "height": 1920, |
| | "width": 1080, |
| | } |
| | self.assets.append({"type": "text", "sequence": text_stream}) |
| |
|
| | await self.update(**self.__dict__) |
| | return {"links": self.links, "assets": self.assets, "constants": self.constants} |
| |
|
| |
|
| | class Scene(orm.Model): |
| | tts = ElevenLab() |
| | |
| | tablename = "scenes" |
| | registry = models |
| | fields = { |
| | "id": orm.Integer(primary_key=True), |
| | "project": orm.ForeignKey(Project), |
| | "images": orm.JSON(default=None), |
| | "narration": orm.String(max_length=10_000, allow_null=True, default=""), |
| | "image_prompts": orm.JSON(default=None), |
| | "narration_duration": orm.Float(allow_null=True, default=0), |
| | "image_duration": orm.Float(allow_null=True, default=0), |
| | "narration_path": orm.String( |
| | max_length=100, |
| | allow_null=True, |
| | default="", |
| | ), |
| | "narration_link": orm.String(max_length=10_000, allow_null=True, default=""), |
| | } |
| |
|
| | async def generate_scene_transcript(self, offset): |
| | links = [self.narration_link] |
| | text = self.narration + " master" |
| | transcript = await self.tts._make_transcript(links=links, text=text) |
| | return transform_alignment_data(data=transcript, offset=offset) |
| |
|
| | async def generate_scene_data(self): |
| | |
| | await asyncio.gather(self.narrate(), self.generate_images()) |
| | self.calculate_durations() |
| |
|
| | async def narrate(self): |
| | link, path = await self.retry_narration_generation() |
| | self.narration_path = path |
| | self.narration_link = link |
| |
|
| | async def retry_narration_generation(self): |
| | retry_count = 0 |
| | while retry_count < 3: |
| | try: |
| | return await self.tts.say( |
| | text=self.narration + " master" |
| | ) |
| | except Exception as e: |
| | print(f"Failed to generate narration: {e}") |
| | retry_count += 1 |
| | await asyncio.sleep(1) |
| |
|
| | print("Failed to generate narration after 3 attempts.") |
| |
|
| | def calculate_durations(self): |
| | file_format = self.narration_path.split(".")[-1] |
| | audio_file = AudioSegment.from_file(self.narration_path, format=file_format) |
| | self.narration_duration = int(len(audio_file) / 1000) |
| | self.image_duration = self.narration_duration / len(self.image_prompts) |
| |
|
| | async def generate_images(self): |
| | self.images = [] |
| | async with aiohttp.ClientSession() as session: |
| | image_generator = AsyncImageGenerator(session) |
| | for payload in self.image_prompts: |
| | while True: |
| | result = await image_generator.generate_image(payload) |
| | status = await image_generator.fetch_image_status(result["id"]) |
| | if status["output"]: |
| | self.images.extend(status["output"]) |
| | break |
| |
|
| |
|
| | class Transition(orm.Model): |
| | tablename = "transitions" |
| | registry = models |
| | fields = { |
| | "id": orm.Integer(primary_key=True), |
| | "name": orm.String(max_length=100), |
| | "file_path": orm.String(max_length=100), |
| | } |
| |
|
| |
|
| | class BackgroundMusic(orm.Model): |
| | tablename = "background_music" |
| | registry = models |
| | fields = { |
| | "id": orm.Integer(primary_key=True), |
| | "name": orm.String(max_length=100), |
| | "file_path": orm.String(max_length=100), |
| | } |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| |
|