Spaces:
Sleeping
Sleeping
text to video: v1
Browse files- .gitignore +162 -0
- README.md +1 -1
- app.py +38 -0
- constants.py +14 -0
- env.example +2 -0
- requirements.txt +85 -0
- response_schemas.py +14 -0
- structured_output_extractor.py +102 -0
- utils.py +419 -0
.gitignore
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
venv/
|
| 3 |
+
|
| 4 |
+
# Byte-compiled / optimized / DLL files
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*$py.class
|
| 8 |
+
|
| 9 |
+
# C extensions
|
| 10 |
+
*.so
|
| 11 |
+
|
| 12 |
+
# Distribution / packaging
|
| 13 |
+
.Python
|
| 14 |
+
build/
|
| 15 |
+
develop-eggs/
|
| 16 |
+
dist/
|
| 17 |
+
downloads/
|
| 18 |
+
eggs/
|
| 19 |
+
.eggs/
|
| 20 |
+
lib/
|
| 21 |
+
lib64/
|
| 22 |
+
parts/
|
| 23 |
+
sdist/
|
| 24 |
+
var/
|
| 25 |
+
wheels/
|
| 26 |
+
share/python-wheels/
|
| 27 |
+
*.egg-info/
|
| 28 |
+
.installed.cfg
|
| 29 |
+
*.egg
|
| 30 |
+
MANIFEST
|
| 31 |
+
|
| 32 |
+
# PyInstaller
|
| 33 |
+
# Usually these files are written by a python script from a template
|
| 34 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 35 |
+
*.manifest
|
| 36 |
+
*.spec
|
| 37 |
+
|
| 38 |
+
# Installer logs
|
| 39 |
+
pip-log.txt
|
| 40 |
+
pip-delete-this-directory.txt
|
| 41 |
+
|
| 42 |
+
# Unit test / coverage reports
|
| 43 |
+
htmlcov/
|
| 44 |
+
.tox/
|
| 45 |
+
.nox/
|
| 46 |
+
.coverage
|
| 47 |
+
.coverage.*
|
| 48 |
+
.cache
|
| 49 |
+
nosetests.xml
|
| 50 |
+
coverage.xml
|
| 51 |
+
*.cover
|
| 52 |
+
*.py,cover
|
| 53 |
+
.hypothesis/
|
| 54 |
+
.pytest_cache/
|
| 55 |
+
cover/
|
| 56 |
+
|
| 57 |
+
# Translations
|
| 58 |
+
*.mo
|
| 59 |
+
*.pot
|
| 60 |
+
|
| 61 |
+
# Django stuff:
|
| 62 |
+
*.log
|
| 63 |
+
local_settings.py
|
| 64 |
+
# db.sqlite3
|
| 65 |
+
db.sqlite3-journal
|
| 66 |
+
|
| 67 |
+
# Flask stuff:
|
| 68 |
+
instance/
|
| 69 |
+
.webassets-cache
|
| 70 |
+
|
| 71 |
+
# Scrapy stuff:
|
| 72 |
+
.scrapy
|
| 73 |
+
|
| 74 |
+
# Sphinx documentation
|
| 75 |
+
docs/_build/
|
| 76 |
+
|
| 77 |
+
# PyBuilder
|
| 78 |
+
.pybuilder/
|
| 79 |
+
target/
|
| 80 |
+
|
| 81 |
+
# Jupyter Notebook
|
| 82 |
+
.ipynb_checkpoints
|
| 83 |
+
|
| 84 |
+
# IPython
|
| 85 |
+
profile_default/
|
| 86 |
+
ipython_config.py
|
| 87 |
+
|
| 88 |
+
# pyenv
|
| 89 |
+
# For a library or package, you might want to ignore these files since the code is
|
| 90 |
+
# intended to run in multiple environments; otherwise, check them in:
|
| 91 |
+
# .python-version
|
| 92 |
+
|
| 93 |
+
# pipenv
|
| 94 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 95 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 96 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 97 |
+
# install all needed dependencies.
|
| 98 |
+
#Pipfile.lock
|
| 99 |
+
|
| 100 |
+
# poetry
|
| 101 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
| 102 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
| 103 |
+
# commonly ignored for libraries.
|
| 104 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
| 105 |
+
#poetry.lock
|
| 106 |
+
|
| 107 |
+
# pdm
|
| 108 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
| 109 |
+
#pdm.lock
|
| 110 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
| 111 |
+
# in version control.
|
| 112 |
+
# https://pdm.fming.dev/#use-with-ide
|
| 113 |
+
.pdm.toml
|
| 114 |
+
|
| 115 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
| 116 |
+
__pypackages__/
|
| 117 |
+
|
| 118 |
+
# Celery stuff
|
| 119 |
+
celerybeat-schedule
|
| 120 |
+
celerybeat.pid
|
| 121 |
+
|
| 122 |
+
# SageMath parsed files
|
| 123 |
+
*.sage.py
|
| 124 |
+
|
| 125 |
+
# Environments
|
| 126 |
+
.venv
|
| 127 |
+
env/
|
| 128 |
+
venv/
|
| 129 |
+
ENV/
|
| 130 |
+
env.bak/
|
| 131 |
+
venv.bak/
|
| 132 |
+
|
| 133 |
+
# Spyder project settings
|
| 134 |
+
.spyderproject
|
| 135 |
+
.spyproject
|
| 136 |
+
|
| 137 |
+
# Rope project settings
|
| 138 |
+
.ropeproject
|
| 139 |
+
|
| 140 |
+
# mkdocs documentation
|
| 141 |
+
/site
|
| 142 |
+
|
| 143 |
+
# mypy
|
| 144 |
+
.mypy_cache/
|
| 145 |
+
.dmypy.json
|
| 146 |
+
dmypy.json
|
| 147 |
+
|
| 148 |
+
# Pyre type checker
|
| 149 |
+
.pyre/
|
| 150 |
+
|
| 151 |
+
# pytype static type analyzer
|
| 152 |
+
.pytype/
|
| 153 |
+
|
| 154 |
+
# Cython debug symbols
|
| 155 |
+
cython_debug/
|
| 156 |
+
|
| 157 |
+
# PyCharm
|
| 158 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
| 159 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
| 160 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
| 161 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
| 162 |
+
#.idea/
|
README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
---
|
| 2 |
title: Text To Video Generator
|
| 3 |
emoji: 🚀
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: yellow
|
| 6 |
sdk: streamlit
|
| 7 |
sdk_version: 1.42.0
|
|
|
|
| 1 |
---
|
| 2 |
title: Text To Video Generator
|
| 3 |
emoji: 🚀
|
| 4 |
+
colorFrom: green
|
| 5 |
colorTo: yellow
|
| 6 |
sdk: streamlit
|
| 7 |
sdk_version: 1.42.0
|
app.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
from gradio_client import Client
|
| 3 |
+
from utils import get_scenes, generate_video_assets, generate_video # Import the function from utils.py
|
| 4 |
+
|
| 5 |
+
# Streamlit app
|
| 6 |
+
st.title("Text to Video Generator")
|
| 7 |
+
|
| 8 |
+
# Text input box with a max of 1500 characters
|
| 9 |
+
text_script = st.text_area("Enter your text (max 1500 characters):", max_chars=1500)
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
# Initialize the client with the hosted model
|
| 13 |
+
client = Client("habib926653/Multilingual-TTS")
|
| 14 |
+
|
| 15 |
+
# Dropdown for language selection
|
| 16 |
+
language = st.selectbox("Choose Language:", ["Urdu", "English"]) # Add more languages as needed
|
| 17 |
+
|
| 18 |
+
# Get available speakers for the selected language
|
| 19 |
+
speakers_response = client.predict(language=language, api_name="/get_speakers")
|
| 20 |
+
|
| 21 |
+
# Extract speakers list
|
| 22 |
+
speakers = [choice[0] for choice in speakers_response["choices"]]
|
| 23 |
+
selected_speaker = st.selectbox("Choose Speaker:", speakers)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Button to trigger the processing
|
| 28 |
+
if st.button("Generate Video"):
|
| 29 |
+
if text_script:
|
| 30 |
+
# Call the function from utils.py to process the text
|
| 31 |
+
scenes = get_scenes(text_script)
|
| 32 |
+
video_assets_folder = generate_video_assets(scenes, language, selected_speaker)
|
| 33 |
+
st.write(video_assets_folder)
|
| 34 |
+
generated_video_path = generate_video(video_assets_folder)
|
| 35 |
+
st.video(generated_video_path)
|
| 36 |
+
|
| 37 |
+
else:
|
| 38 |
+
st.warning("Please enter some text to generate prompts.")
|
constants.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from dotenv import load_dotenv
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
HF_TOKEN = os.getenv("HF_TOKEN", None)
|
| 7 |
+
|
| 8 |
+
SUMMARIZATION_ENDPOINT="https://habib926653-text-translator-agent-api.hf.space/generate"
|
| 9 |
+
IMAGE_GENERATION_SPACE_NAME="habib926653/stabilityai-stable-diffusion-3.5-large-turbo"
|
| 10 |
+
|
| 11 |
+
# Supported formats
|
| 12 |
+
SUPPORTED_FORMATS = ["mp3", "wav", "ogg", "flac", "aac", "m4a"]
|
| 13 |
+
|
| 14 |
+
|
env.example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
HF_TOKEN=HUGGING_FACE_TOKEN
|
| 2 |
+
GROQ_API_KEY=GROQ_API_KEY
|
requirements.txt
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
altair==5.5.0
|
| 2 |
+
annotated-types==0.7.0
|
| 3 |
+
anyio==4.8.0
|
| 4 |
+
attrs==24.3.0
|
| 5 |
+
audeer==2.2.1
|
| 6 |
+
audiofile==1.5.1
|
| 7 |
+
audmath==1.4.1
|
| 8 |
+
blinker==1.9.0
|
| 9 |
+
cachetools==5.5.0
|
| 10 |
+
certifi==2024.12.14
|
| 11 |
+
cffi==1.17.1
|
| 12 |
+
charset-normalizer==3.4.1
|
| 13 |
+
click==8.1.8
|
| 14 |
+
decorator==4.4.2
|
| 15 |
+
distro==1.9.0
|
| 16 |
+
exceptiongroup==1.2.2
|
| 17 |
+
filelock==3.16.1
|
| 18 |
+
fsspec==2024.12.0
|
| 19 |
+
gitdb==4.0.12
|
| 20 |
+
GitPython==3.1.44
|
| 21 |
+
gradio_client==1.5.4
|
| 22 |
+
groq==0.15.0
|
| 23 |
+
h11==0.14.0
|
| 24 |
+
httpcore==1.0.7
|
| 25 |
+
httpx==0.28.1
|
| 26 |
+
huggingface-hub==0.27.1
|
| 27 |
+
idna==3.10
|
| 28 |
+
imageio==2.36.1
|
| 29 |
+
imageio-ffmpeg==0.5.1
|
| 30 |
+
Jinja2==3.1.5
|
| 31 |
+
jsonpatch==1.33
|
| 32 |
+
jsonpointer==3.0.0
|
| 33 |
+
jsonschema==4.23.0
|
| 34 |
+
jsonschema-specifications==2024.10.1
|
| 35 |
+
langchain-core==0.3.29
|
| 36 |
+
langchain-groq==0.2.3
|
| 37 |
+
langgraph==0.2.62
|
| 38 |
+
langgraph-checkpoint==2.0.9
|
| 39 |
+
langgraph-sdk==0.1.51
|
| 40 |
+
langsmith==0.2.10
|
| 41 |
+
markdown-it-py==3.0.0
|
| 42 |
+
MarkupSafe==3.0.2
|
| 43 |
+
mdurl==0.1.2
|
| 44 |
+
moviepy==1.0.3
|
| 45 |
+
msgpack==1.1.0
|
| 46 |
+
narwhals==1.21.1
|
| 47 |
+
numpy==2.2.1
|
| 48 |
+
opencv-python==4.10.0.84
|
| 49 |
+
orjson==3.10.14
|
| 50 |
+
packaging==24.2
|
| 51 |
+
pandas==2.2.3
|
| 52 |
+
pillow==11.1.0
|
| 53 |
+
proglog==0.1.10
|
| 54 |
+
protobuf==5.29.3
|
| 55 |
+
pyarrow==18.1.0
|
| 56 |
+
pycparser==2.22
|
| 57 |
+
pydantic==2.10.5
|
| 58 |
+
pydantic_core==2.27.2
|
| 59 |
+
pydeck==0.9.1
|
| 60 |
+
pydub==0.25.1
|
| 61 |
+
Pygments==2.19.1
|
| 62 |
+
python-dateutil==2.9.0.post0
|
| 63 |
+
python-dotenv==1.0.1
|
| 64 |
+
pytz==2024.2
|
| 65 |
+
PyYAML==6.0.2
|
| 66 |
+
referencing==0.35.1
|
| 67 |
+
requests==2.32.3
|
| 68 |
+
requests-toolbelt==1.0.0
|
| 69 |
+
rich==13.9.4
|
| 70 |
+
rpds-py==0.22.3
|
| 71 |
+
scipy==1.15.1
|
| 72 |
+
six==1.17.0
|
| 73 |
+
smmap==5.0.2
|
| 74 |
+
sniffio==1.3.1
|
| 75 |
+
soundfile==0.13.0
|
| 76 |
+
streamlit==1.41.1
|
| 77 |
+
tenacity==9.0.0
|
| 78 |
+
toml==0.10.2
|
| 79 |
+
tornado==6.4.2
|
| 80 |
+
tqdm==4.67.1
|
| 81 |
+
typing_extensions==4.12.2
|
| 82 |
+
tzdata==2024.2
|
| 83 |
+
urllib3==2.3.0
|
| 84 |
+
watchdog==6.0.0
|
| 85 |
+
websockets==14.1
|
response_schemas.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from pydantic import BaseModel, Field
|
| 2 |
+
from typing import List
|
| 3 |
+
|
| 4 |
+
class SingleScene(BaseModel):
|
| 5 |
+
text: str = Field(description="Actual Segment of text from the complete story")
|
| 6 |
+
image_prompts: List[str] = Field(
|
| 7 |
+
description="""List of detailed and descriptive image prompts for the segment
|
| 8 |
+
prompt format: [theme: {atmosphere/mood}] [style: {artistic/photorealistic}] [focus: {main subject}] [details: {specific elements}] [lighting: {day/night/mystic}] [perspective: {close-up/wide-angle}]"
|
| 9 |
+
Example: "theme: eerie forest | style: cinematic realism | focus: abandoned cabin | details: broken windows, overgrown vines | lighting: moonlit fog | perspective: wide-angle shot"
|
| 10 |
+
"""
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
class ScenesResponseSchema(BaseModel):
|
| 14 |
+
scenes: List[SingleScene]
|
structured_output_extractor.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Type, Optional
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
from langgraph.graph import StateGraph, START, END
|
| 4 |
+
from typing import TypedDict
|
| 5 |
+
import constants # Assuming constants.py holds LLM provider configurations
|
| 6 |
+
from langchain_groq import ChatGroq
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
# Define the State structure (similar to previous definition)
|
| 10 |
+
class State(TypedDict):
|
| 11 |
+
messages: list
|
| 12 |
+
output: Optional[BaseModel]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Generic Pydantic model-based structured output extractor
|
| 16 |
+
class StructuredOutputExtractor:
|
| 17 |
+
def __init__(self, response_schema: Type[BaseModel]):
|
| 18 |
+
"""
|
| 19 |
+
Initializes the extractor for any given structured output model.
|
| 20 |
+
|
| 21 |
+
:param response_schema: Pydantic model class used for structured output extraction
|
| 22 |
+
"""
|
| 23 |
+
self.response_schema = response_schema
|
| 24 |
+
|
| 25 |
+
# Initialize language model (provider and API keys come from constants.py)
|
| 26 |
+
# self.llm = ChatGroq(model="llama-3.3-70b-versatile") # token limit 100k tokens
|
| 27 |
+
self.llm = ChatGroq(model="deepseek-r1-distill-llama-70b") # currently no limit per day
|
| 28 |
+
|
| 29 |
+
# Bind the model with structured output capability
|
| 30 |
+
self.structured_llm = self.llm.with_structured_output(response_schema)
|
| 31 |
+
|
| 32 |
+
# Build the graph for structured output
|
| 33 |
+
self._build_graph()
|
| 34 |
+
|
| 35 |
+
def _build_graph(self):
|
| 36 |
+
"""
|
| 37 |
+
Build the LangGraph computational graph for structured extraction.
|
| 38 |
+
"""
|
| 39 |
+
graph_builder = StateGraph(State)
|
| 40 |
+
|
| 41 |
+
# Add nodes and edges for structured output
|
| 42 |
+
graph_builder.add_node("extract", self._extract_structured_info)
|
| 43 |
+
graph_builder.add_edge(START, "extract")
|
| 44 |
+
graph_builder.add_edge("extract", END)
|
| 45 |
+
|
| 46 |
+
self.graph = graph_builder.compile()
|
| 47 |
+
|
| 48 |
+
def _extract_structured_info(self, state: dict):
|
| 49 |
+
"""
|
| 50 |
+
Extract structured information using the specified response model.
|
| 51 |
+
|
| 52 |
+
:param state: Current graph state
|
| 53 |
+
:return: Updated state with structured output
|
| 54 |
+
"""
|
| 55 |
+
query = state['messages'][-1].content
|
| 56 |
+
print(f"Processing query: {query}")
|
| 57 |
+
try:
|
| 58 |
+
# Extract details using the structured model
|
| 59 |
+
output = self.structured_llm.invoke(query)
|
| 60 |
+
# Return the structured response
|
| 61 |
+
return {"output": output}
|
| 62 |
+
except Exception as e:
|
| 63 |
+
print(f"Error during extraction: {e}")
|
| 64 |
+
return {"output": None}
|
| 65 |
+
|
| 66 |
+
def extract(self, query: str) -> Optional[BaseModel]:
|
| 67 |
+
"""
|
| 68 |
+
Public method to extract structured information.
|
| 69 |
+
|
| 70 |
+
:param query: Input query for structured output extraction
|
| 71 |
+
:return: Structured model object or None
|
| 72 |
+
"""
|
| 73 |
+
from langchain_core.messages import SystemMessage
|
| 74 |
+
|
| 75 |
+
result = self.graph.invoke({
|
| 76 |
+
"messages": [SystemMessage(content=query)]
|
| 77 |
+
})
|
| 78 |
+
# Return the structured model response, if available
|
| 79 |
+
result = result.get('output')
|
| 80 |
+
return result
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
if __name__ == '__main__':
|
| 84 |
+
|
| 85 |
+
# Example Pydantic model (e.g., Movie)
|
| 86 |
+
class Movie(BaseModel):
|
| 87 |
+
title: str
|
| 88 |
+
year: int
|
| 89 |
+
genre: str
|
| 90 |
+
rating: Optional[float] = None
|
| 91 |
+
actors: list[str] = []
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# Example usage with a generic structured extractor
|
| 95 |
+
extractor = StructuredOutputExtractor(response_schema=Movie)
|
| 96 |
+
|
| 97 |
+
query = "Tell me about the movie Inception. Provide details about its title, year, genre, rating, and main actors."
|
| 98 |
+
|
| 99 |
+
result = extractor.extract(query)
|
| 100 |
+
print(type(result))
|
| 101 |
+
if result:
|
| 102 |
+
print(result)
|
utils.py
ADDED
|
@@ -0,0 +1,419 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import constants
|
| 2 |
+
import os
|
| 3 |
+
from PIL import Image
|
| 4 |
+
from gradio_client import Client
|
| 5 |
+
import moviepy.editor as mp
|
| 6 |
+
from moviepy.video.VideoClip import ImageClip
|
| 7 |
+
from moviepy.editor import AudioFileClip
|
| 8 |
+
from structured_output_extractor import StructuredOutputExtractor
|
| 9 |
+
from response_schemas import ScenesResponseSchema
|
| 10 |
+
from typing import List, Dict
|
| 11 |
+
import tempfile
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_scenes(text_script: str):
|
| 16 |
+
|
| 17 |
+
prompt = f"""
|
| 18 |
+
ROLE: Story to Scene Generator
|
| 19 |
+
Tasks: For the given story
|
| 20 |
+
1. Read it Completely and Understand the Complete Context
|
| 21 |
+
2. Rewrite the story in tiny segments(but without even changing a word) and a descriptive image or list of image prompts to visualize each segment
|
| 22 |
+
3. Make sure each image prompt matches the theme of overall scene and ultimately the story
|
| 23 |
+
4. If necessary, a scene can have more than one image prompts
|
| 24 |
+
|
| 25 |
+
Here is the Complete Story: {text_script}
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
extractor = StructuredOutputExtractor(response_schema=ScenesResponseSchema)
|
| 30 |
+
result = extractor.extract(prompt)
|
| 31 |
+
return result.model_dump() # returns dictionary version pydantic model
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media"):
|
| 37 |
+
# """
|
| 38 |
+
# Generate video assets (images and audio) for each scene in a structured folder hierarchy.
|
| 39 |
+
|
| 40 |
+
# Args:
|
| 41 |
+
# scenes (Dict): A dictionary containing a list of scenes under the key "scenes".
|
| 42 |
+
# language (str): The language code for audio generation.
|
| 43 |
+
# speaker (str): The speaker for audio generation.
|
| 44 |
+
# base_path (str): The base folder where all assets will be stored. Default is "media".
|
| 45 |
+
|
| 46 |
+
# Returns:
|
| 47 |
+
# Dict: A dictionary containing the paths to the generated assets.
|
| 48 |
+
# """
|
| 49 |
+
# try:
|
| 50 |
+
# # Ensure the base folder exists
|
| 51 |
+
# if not os.path.exists(base_path):
|
| 52 |
+
# os.makedirs(base_path)
|
| 53 |
+
|
| 54 |
+
# # Extract scenes from the input dictionary
|
| 55 |
+
# scenes_list = scenes.get("scenes", [])
|
| 56 |
+
# print(f"Total Scenes: {len(scenes_list)}")
|
| 57 |
+
|
| 58 |
+
# # Dictionary to store asset paths
|
| 59 |
+
# assets = {"scenes": []}
|
| 60 |
+
|
| 61 |
+
# # Create a folder for the current video
|
| 62 |
+
# video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
|
| 63 |
+
# if not os.path.exists(video_folder):
|
| 64 |
+
# os.makedirs(video_folder)
|
| 65 |
+
|
| 66 |
+
# # Create 'images' and 'audio' folders inside the video folder
|
| 67 |
+
# images_folder = os.path.join(video_folder, "images")
|
| 68 |
+
# audio_folder = os.path.join(video_folder, "audio")
|
| 69 |
+
# os.makedirs(images_folder, exist_ok=True)
|
| 70 |
+
# os.makedirs(audio_folder, exist_ok=True)
|
| 71 |
+
|
| 72 |
+
# for scene_count, scene in enumerate(scenes_list):
|
| 73 |
+
# text: str = scene.get("text", "")
|
| 74 |
+
# image_prompts: List[str] = scene.get("image_prompts", [])
|
| 75 |
+
|
| 76 |
+
# # Create a folder for the current scene inside the 'images' folder
|
| 77 |
+
# scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
|
| 78 |
+
# os.makedirs(scene_images_folder, exist_ok=True)
|
| 79 |
+
|
| 80 |
+
# # Generate audio for the scene
|
| 81 |
+
# audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
|
| 82 |
+
# audio_result = generate_audio(text, language, speaker, path=audio_path)
|
| 83 |
+
|
| 84 |
+
# if "error" in audio_result:
|
| 85 |
+
# print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
|
| 86 |
+
# continue
|
| 87 |
+
|
| 88 |
+
# # Generate images for the scene
|
| 89 |
+
# image_paths = []
|
| 90 |
+
# for count, prompt in enumerate(image_prompts):
|
| 91 |
+
# image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
|
| 92 |
+
# image_result = generate_image(prompt=prompt, path=image_path)
|
| 93 |
+
|
| 94 |
+
# if "error" in image_result:
|
| 95 |
+
# print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
|
| 96 |
+
# else:
|
| 97 |
+
# image_paths.append(image_path)
|
| 98 |
+
|
| 99 |
+
# # Add the scene's asset paths to the dictionary
|
| 100 |
+
# assets["scenes"].append({
|
| 101 |
+
# "scene_number": scene_count + 1,
|
| 102 |
+
# "audio_path": audio_path,
|
| 103 |
+
# "image_paths": image_paths
|
| 104 |
+
# })
|
| 105 |
+
|
| 106 |
+
# print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
|
| 107 |
+
|
| 108 |
+
# return assets
|
| 109 |
+
|
| 110 |
+
# except Exception as e:
|
| 111 |
+
# print(f"Error during video asset generation: {e}")
|
| 112 |
+
# return {"error": str(e)}
|
| 113 |
+
|
| 114 |
+
def generate_video_assets(scenes: Dict, language: str, speaker: str, base_path: str = "media") -> str:
|
| 115 |
+
try:
|
| 116 |
+
# Ensure the base folder exists
|
| 117 |
+
if not os.path.exists(base_path):
|
| 118 |
+
os.makedirs(base_path)
|
| 119 |
+
|
| 120 |
+
# Extract scenes from the input dictionary
|
| 121 |
+
scenes_list = scenes.get("scenes", [])
|
| 122 |
+
print(f"Total Scenes: {len(scenes_list)}")
|
| 123 |
+
|
| 124 |
+
# Create a folder for the current video
|
| 125 |
+
video_folder = os.path.join(base_path, f"video_{len(os.listdir(base_path)) + 1}")
|
| 126 |
+
if not os.path.exists(video_folder):
|
| 127 |
+
os.makedirs(video_folder)
|
| 128 |
+
|
| 129 |
+
# Create 'images' and 'audio' folders inside the video folder
|
| 130 |
+
images_folder = os.path.join(video_folder, "images")
|
| 131 |
+
audio_folder = os.path.join(video_folder, "audio")
|
| 132 |
+
os.makedirs(images_folder, exist_ok=True)
|
| 133 |
+
os.makedirs(audio_folder, exist_ok=True)
|
| 134 |
+
|
| 135 |
+
for scene_count, scene in enumerate(scenes_list):
|
| 136 |
+
text: str = scene.get("text", "")
|
| 137 |
+
image_prompts: List[str] = scene.get("image_prompts", [])
|
| 138 |
+
|
| 139 |
+
# Create a folder for the current scene inside the 'images' folder
|
| 140 |
+
scene_images_folder = os.path.join(images_folder, f"scene_{scene_count + 1}")
|
| 141 |
+
os.makedirs(scene_images_folder, exist_ok=True)
|
| 142 |
+
|
| 143 |
+
# Generate audio for the scene
|
| 144 |
+
audio_path = os.path.join(audio_folder, f"scene_{scene_count + 1}.mp3")
|
| 145 |
+
audio_result = generate_audio(text, language, speaker, path=audio_path)
|
| 146 |
+
|
| 147 |
+
if "error" in audio_result:
|
| 148 |
+
print(f"Error generating audio for scene {scene_count + 1}: {audio_result['error']}")
|
| 149 |
+
continue
|
| 150 |
+
|
| 151 |
+
# Generate images for the scene
|
| 152 |
+
image_paths = []
|
| 153 |
+
for count, prompt in enumerate(image_prompts):
|
| 154 |
+
image_path = os.path.join(scene_images_folder, f"scene_{scene_count + 1}_image_{count + 1}.png")
|
| 155 |
+
image_result = generate_image(prompt=prompt, path=image_path)
|
| 156 |
+
|
| 157 |
+
if "error" in image_result:
|
| 158 |
+
print(f"Error generating image {count + 1} for scene {scene_count + 1}: {image_result['error']}")
|
| 159 |
+
else:
|
| 160 |
+
image_paths.append(image_path)
|
| 161 |
+
|
| 162 |
+
print(f"Scene: {scene_count + 1}\t No of Images in Scene {scene_count + 1}: {len(image_paths)}")
|
| 163 |
+
|
| 164 |
+
# Return the path of the main video folder
|
| 165 |
+
return video_folder
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
print(f"Error during video asset generation: {e}")
|
| 169 |
+
return {"error": str(e)}
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
def generate_audio(text, language_code, speaker, path='test_audio.mp3'):
|
| 173 |
+
try:
|
| 174 |
+
# Initialize the Gradio Client with the hosted model
|
| 175 |
+
client = Client("habib926653/Multilingual-TTS")
|
| 176 |
+
|
| 177 |
+
# Make the API request
|
| 178 |
+
result = client.predict(
|
| 179 |
+
text=text, # Text input for audio generation
|
| 180 |
+
language_code=language_code, # Language code (e.g., "Urdu")
|
| 181 |
+
speaker=speaker, # Selected speaker (e.g., "Asad")
|
| 182 |
+
api_name="/text_to_speech_edge"
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
# The result is a tuple: (text, audio_file_path)
|
| 186 |
+
audio_file_path = result[1] # The generated audio file path
|
| 187 |
+
|
| 188 |
+
# Read the audio file as bytes
|
| 189 |
+
with open(audio_file_path, 'rb') as f:
|
| 190 |
+
audio_bytes = f.read()
|
| 191 |
+
|
| 192 |
+
# Save the audio bytes to the specified path
|
| 193 |
+
with open(path, 'wb') as f:
|
| 194 |
+
f.write(audio_bytes)
|
| 195 |
+
|
| 196 |
+
# Return the result (which includes the file path)
|
| 197 |
+
return {"audio_file": path}
|
| 198 |
+
|
| 199 |
+
except Exception as e:
|
| 200 |
+
print(f"Error during audio generation: {e}")
|
| 201 |
+
return {"error": str(e)}
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def generate_image(prompt, path='test_image.png'):
|
| 205 |
+
try:
|
| 206 |
+
# Initialize the Gradio Client with Hugging Face token
|
| 207 |
+
client = Client(constants.IMAGE_GENERATION_SPACE_NAME, hf_token=constants.HF_TOKEN)
|
| 208 |
+
|
| 209 |
+
# Make the API request
|
| 210 |
+
result = client.predict(
|
| 211 |
+
prompt=prompt, # Text prompt for image generation
|
| 212 |
+
width=1280,
|
| 213 |
+
height=720,
|
| 214 |
+
api_name="/generate_image"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
image = Image.open(result)
|
| 218 |
+
image.save(path)
|
| 219 |
+
|
| 220 |
+
# Return the result (which includes the URL or file path)
|
| 221 |
+
return result
|
| 222 |
+
|
| 223 |
+
except Exception as e:
|
| 224 |
+
print(f"Error during image generation: {e}")
|
| 225 |
+
return {"error": str(e)}
|
| 226 |
+
|
| 227 |
+
def generate_images(image_prompts, folder_name='test_folder'):
|
| 228 |
+
folder_path = tmp_folder(folder_name)
|
| 229 |
+
for index, prompt in enumerate(image_prompts):
|
| 230 |
+
print(index, prompt)
|
| 231 |
+
image_path = generate_image(prompt=prompt, path=f"{folder_path}/{index}.png")
|
| 232 |
+
yield prompt, image_path
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
def tmp_folder(folder_name: str) -> str:
|
| 237 |
+
# Use the current working directory or any other accessible path for temp folders
|
| 238 |
+
base_tmp_path = os.path.join(os.getcwd(), "tmp_dir") # Change this to any path you prefer
|
| 239 |
+
|
| 240 |
+
# Ensure that the base temp folder exists
|
| 241 |
+
if not os.path.exists(base_tmp_path):
|
| 242 |
+
os.makedirs(base_tmp_path)
|
| 243 |
+
print(f"Base temporary folder '{base_tmp_path}' created.")
|
| 244 |
+
|
| 245 |
+
# Define the path for the specific temporary folder
|
| 246 |
+
folder_path = os.path.join(base_tmp_path, folder_name)
|
| 247 |
+
|
| 248 |
+
# Create the specific temporary folder if it doesn't exist
|
| 249 |
+
os.makedirs(folder_path, exist_ok=True)
|
| 250 |
+
|
| 251 |
+
print(f"Temporary folder '{folder_name}' is ready at {folder_path}.")
|
| 252 |
+
|
| 253 |
+
return folder_path
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
from moviepy.editor import *
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
import os
|
| 261 |
+
import tempfile
|
| 262 |
+
from moviepy.editor import AudioFileClip, ImageClip, concatenate_videoclips
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def generate_video(video_folder: str, output_filename: str = "final_video.mp4"):
|
| 267 |
+
audio_folder = os.path.join(video_folder, "audio")
|
| 268 |
+
images_folder = os.path.join(video_folder, "images")
|
| 269 |
+
final_clips = []
|
| 270 |
+
|
| 271 |
+
# Get all scene folders
|
| 272 |
+
scene_folders = sorted(os.listdir(images_folder))
|
| 273 |
+
|
| 274 |
+
for scene in scene_folders:
|
| 275 |
+
scene_path = os.path.join(images_folder, scene)
|
| 276 |
+
audio_path = os.path.join(audio_folder, f"{scene}.mp3")
|
| 277 |
+
|
| 278 |
+
if not os.path.exists(audio_path):
|
| 279 |
+
print(f"Warning: Audio file {audio_path} not found. Skipping scene {scene}.")
|
| 280 |
+
continue
|
| 281 |
+
|
| 282 |
+
# Get all images for the scene
|
| 283 |
+
image_files = sorted([os.path.join(scene_path, img) for img in os.listdir(scene_path) if img.endswith(('.png', '.jpg', '.jpeg'))])
|
| 284 |
+
|
| 285 |
+
if not image_files:
|
| 286 |
+
print(f"Warning: No images found in {scene_path}. Skipping scene {scene}.")
|
| 287 |
+
continue
|
| 288 |
+
|
| 289 |
+
# Load audio file
|
| 290 |
+
audio_clip = mp.AudioFileClip(audio_path)
|
| 291 |
+
|
| 292 |
+
# Calculate duration per image
|
| 293 |
+
duration_per_image = audio_clip.duration / len(image_files)
|
| 294 |
+
|
| 295 |
+
# Create image clips
|
| 296 |
+
image_clips = [mp.ImageClip(img).set_duration(duration_per_image) for img in image_files]
|
| 297 |
+
|
| 298 |
+
# Concatenate image clips
|
| 299 |
+
scene_video = mp.concatenate_videoclips(image_clips, method="compose").set_audio(audio_clip)
|
| 300 |
+
|
| 301 |
+
final_clips.append(scene_video)
|
| 302 |
+
|
| 303 |
+
if not final_clips:
|
| 304 |
+
print("Error: No valid scenes processed.")
|
| 305 |
+
return None
|
| 306 |
+
|
| 307 |
+
# Concatenate all scenes
|
| 308 |
+
final_video = mp.concatenate_videoclips(final_clips, method="compose")
|
| 309 |
+
output_path = os.path.join(video_folder, output_filename)
|
| 310 |
+
final_video.write_videofile(output_path, fps=24, codec='libx264')
|
| 311 |
+
|
| 312 |
+
return output_path
|
| 313 |
+
|
| 314 |
+
def generate_video_old(audio_file, images, segments):
|
| 315 |
+
try:
|
| 316 |
+
# Save the uploaded audio file to a temporary location
|
| 317 |
+
file_extension = os.path.splitext(audio_file.name)[1]
|
| 318 |
+
temp_audio_path = tempfile.NamedTemporaryFile(delete=False, suffix=f"{file_extension}")
|
| 319 |
+
temp_audio_path.write(audio_file.read())
|
| 320 |
+
temp_audio_path.close()
|
| 321 |
+
|
| 322 |
+
# Load the audio file using MoviePy
|
| 323 |
+
audio = AudioFileClip(temp_audio_path.name)
|
| 324 |
+
|
| 325 |
+
# Define YouTube-like dimensions (16:9 aspect ratio)
|
| 326 |
+
frame_width = 1280
|
| 327 |
+
frame_height = 720
|
| 328 |
+
|
| 329 |
+
video_clips = []
|
| 330 |
+
total_segments = len(segments)
|
| 331 |
+
|
| 332 |
+
for i, current_segment in enumerate(segments):
|
| 333 |
+
start_time = current_segment["start"]
|
| 334 |
+
end_time = current_segment["end"]
|
| 335 |
+
|
| 336 |
+
# Calculate the actual duration including any gap until the next segment
|
| 337 |
+
if i < total_segments - 1:
|
| 338 |
+
# If there's a next segment, extend until it starts
|
| 339 |
+
next_segment = segments[i + 1]
|
| 340 |
+
actual_end_time = next_segment["start"]
|
| 341 |
+
else:
|
| 342 |
+
# For the last segment, use its end time
|
| 343 |
+
actual_end_time = end_time
|
| 344 |
+
|
| 345 |
+
# Calculate total duration including any gap
|
| 346 |
+
segment_duration = actual_end_time - start_time
|
| 347 |
+
|
| 348 |
+
print(f"\nProcessing segment {i + 1}/{total_segments}:")
|
| 349 |
+
print(f" Start time: {start_time}s")
|
| 350 |
+
print(f" Base end time: {end_time}s")
|
| 351 |
+
print(f" Actual end time: {actual_end_time}s")
|
| 352 |
+
print(f" Total duration: {segment_duration}s")
|
| 353 |
+
print(f" Text: '{current_segment['text']}'")
|
| 354 |
+
|
| 355 |
+
# Ensure the image index is within bounds
|
| 356 |
+
image_path = images[min(i, len(images) - 1)]
|
| 357 |
+
|
| 358 |
+
# Create an ImageClip for the current segment
|
| 359 |
+
image_clip = ImageClip(image_path)
|
| 360 |
+
|
| 361 |
+
# Resize and pad the image to fit a 16:9 aspect ratio
|
| 362 |
+
image_clip = image_clip.resize(height=frame_height).on_color(
|
| 363 |
+
size=(frame_width, frame_height),
|
| 364 |
+
color=(0, 0, 0), # Black background
|
| 365 |
+
pos="center" # Center the image
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
# Set the duration and start time for the clip
|
| 369 |
+
image_clip = image_clip.set_duration(segment_duration)
|
| 370 |
+
image_clip = image_clip.set_start(start_time) # Set the start time explicitly
|
| 371 |
+
|
| 372 |
+
video_clips.append(image_clip)
|
| 373 |
+
|
| 374 |
+
# Concatenate all the image clips to form the video
|
| 375 |
+
print("Concatenating video clips...")
|
| 376 |
+
video = concatenate_videoclips(video_clips, method="compose")
|
| 377 |
+
|
| 378 |
+
# Add the audio to the video
|
| 379 |
+
video = video.set_audio(audio)
|
| 380 |
+
|
| 381 |
+
# Save the video to a temporary file
|
| 382 |
+
temp_dir = tempfile.gettempdir()
|
| 383 |
+
video_path = os.path.join(temp_dir, "generated_video.mp4")
|
| 384 |
+
print(f"Writing video file to {video_path}...")
|
| 385 |
+
video.write_videofile(video_path, fps=30, codec="libx264", audio_codec="aac")
|
| 386 |
+
|
| 387 |
+
# Clean up the temporary audio file
|
| 388 |
+
os.remove(temp_audio_path.name)
|
| 389 |
+
print("Temporary audio file removed.")
|
| 390 |
+
|
| 391 |
+
return video_path
|
| 392 |
+
|
| 393 |
+
except Exception as e:
|
| 394 |
+
print(f"Error generating video: {e}")
|
| 395 |
+
return None
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
|
| 399 |
+
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
# Example usage:
|
| 403 |
+
if __name__ == "__main__":
|
| 404 |
+
short_story = """
|
| 405 |
+
In a quiet village, a young girl named Lily discovered a hidden garden.
|
| 406 |
+
Every flower in the garden glowed with a magical light, revealing secrets of the past.
|
| 407 |
+
Lily knew she had found something truly extraordinary.
|
| 408 |
+
"""
|
| 409 |
+
generate_audio(short_story, "Urdu", "Asad")
|
| 410 |
+
# scenes_response = get_scenes(short_story)
|
| 411 |
+
# scenes = scenes_response.get("scenes")
|
| 412 |
+
# print("total scenes: ", len(scenes))
|
| 413 |
+
# for scene in scenes:
|
| 414 |
+
# print("image prompts for this scene", len(scene.get("image_prompts")))
|
| 415 |
+
# print("\n\n")
|
| 416 |
+
# for scene_count,scene in enumerate(scenes):
|
| 417 |
+
# image_prompts = scene.get("image_prompts")
|
| 418 |
+
# for count, prompt in enumerate(image_prompts):
|
| 419 |
+
# generate_image(prompt=prompt, path=f"scene_{scene_count+1}_image_{count+1}.png")
|