Spaces:

userIdc2024
/

Video-Generator-Tools

Sleeping

File size: 9,642 Bytes

import os
import logging
import streamlit as st
from functools import lru_cache

from audio_gen import audio_generation
from caption_gen import caption_generation
from image_gen import image_generation_change_background
from video_gen import video_generation
from prompt_generator import generate_segments_payload, VeoInputs

from dotenv import load_dotenv


load_dotenv()

# --- Logger ---
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
)
logger = logging.getLogger("video_generator_tools")

st.set_page_config(page_title="Video Generator Tools", layout="wide")

def main_app():
    st.title("Video Generator Tools")

    tabs = st.tabs([
        "JSON Prompt Generator",
        "Audio Generator",
        "Image Generation",
        "Generate Caption",
    ])

    # JSON Prompt Generator
    with tabs[0]:
        st.subheader("JSON Prompt Generator")
        with st.form("json_prompt_form", clear_on_submit=False):
            script = st.text_area("Script", height=200, placeholder="Paste the full script...")
            style = st.text_input("Style", value="clean, lifestyle UGC")
            json_format = st.selectbox("jsonFormat", ["standard", "compact", "verbose"], index=0)
            continuation = st.toggle("continuationMode", value=True)
            voice_type = st.text_input("voiceType", value="")
            energy_level = st.text_input("energyLevel", value="")
            setting_mode = st.text_input("settingMode", ["single"])
            camera_style = st.text_input("cameraStyle", value="handheld steadicam")
            energy_arc = st.text_input("energyArc", value="")
            narrative_style = st.text_input("narrativeStyle", value="direct address")
            accent_region = st.text_input("accentRegion", value="")
            image_upload = st.file_uploader("First frame image (optional)", type=["png", "jpg", "jpeg", "webp"])
            submitted = st.form_submit_button("Generate Segments Payload")

        if submitted:
            try:
                image_bytes = image_upload.read() if image_upload else None
                logger.info("JSON Prompt: script_len=%d style='%s' jsonFormat='%s' img=%s",
                            len(script or ""), style, json_format, bool(image_bytes))

                inputs = VeoInputs(
                    script=script or "",
                    style=style or "",
                    jsonFormat=json_format,
                    continuationMode=continuation,
                    voiceType=voice_type or None,
                    energyLevel=energy_level or None,
                    settingMode=setting_mode,
                    cameraStyle=camera_style or None,
                    energyArc=energy_arc or None,
                    narrativeStyle=narrative_style or None,
                    accentRegion=accent_region or None,
                )

                with st.spinner("Generating segments payload..."):
                    payload = generate_segments_payload(inputs, image_path=image_bytes, model="gpt-4o")
                st.success("Segments payload generated")
                st.json(payload)
                st.toast("Segments JSON ready", icon="✅")
            except Exception as e:
                logger.exception("JSON Prompt Generator failed")
                st.error(f"Error: {e}")

    # Audio Generator
    with tabs[1]:
        st.subheader("Audio Generator")
        with st.form("audio_generation_form", clear_on_submit=False):
            scripts = st.text_area("Scripts", height=180, placeholder="Enter narration text…")
            voice_id = st.selectbox("Voice ID", ["Wise_Woman", "Friendly_Person", "Inspirational_girl", "Deep_Voice_Man", "Calm_Woman", "Casual_Guy", "Lively_Girl", "Patient_Man", "Young_Knight", "Determined_Man", "Lovely_Girl", "Decent_Boy", "Imposing_Manner", "Elegant_Man", "Abbess", "Sweet_Girl_2", "Exuberant_Girl"], index=0)
            speed = st.slider("Speed", min_value=0.5, max_value=2.0, value=1.0, step=0.1)
            volume = st.slider("Volume", min_value=1.0, max_value=5.0, value=1.0, step=0.5)
            pitch = st.slider("Pitch (semitones)", min_value=-12, max_value=12, value=0, step=1)
            emotion = st.selectbox("Emotion", ["neutral", "confident", "warm", "excited", "serious"], index=0)
            make_audio = st.form_submit_button("Generate Audio")

        if make_audio:
            if not scripts.strip():
                st.error("Please enter scripts text.")
            else:
                try:
                    logger.info("Audio Gen: voice_id=%s speed=%.2f volume=%.2f pitch=%d emotion=%s text_len=%d",
                                voice_id, speed, volume, pitch, emotion, len(scripts or ""))

                    with st.spinner("Generating audio..."):
                        url = audio_generation(scripts, voice_id, speed, volume, pitch, emotion)

                    if url:
                        st.success("Audio generated")
                        st.audio(url)
                        st.write(url)
                        st.toast("Audio ready", icon="🔊")
                    else:
                        st.error("No audio URL returned.")
                        logger.warning("Audio Gen: empty URL")
                except Exception as e:
                    logger.exception("Audio Generation failed")
                    st.error(f"Error: {e}")

    # Image Generation (BG Change)
    with tabs[2]:
        st.subheader("Image Generation (for changing character's background)")
        with st.form("image_generation_form", clear_on_submit=False):
            img = st.file_uploader("Input image", type=["png", "jpg", "jpeg", "webp"])
            img_prompt = st.text_input("Prompt", placeholder="New background description, style cues, lighting…")
            ar = st.selectbox("Aspect ratio", ["9:16", "16:9"], index=0)
            gen_img = st.form_submit_button("Generate Image")

        if gen_img:
            if not img:
                st.error("Please upload an image.")
            else:
                try:
                    raw = img.read()
                    logger.info("Image Gen: bytes=%d ar=%s prompt_len=%d", len(raw or b""), ar, len(img_prompt or ""))

                    with st.spinner("Generating image..."):
                        url = image_generation_change_background(raw, img_prompt, ar)

                    if url:
                        st.success("Image processed")
                        cols = st.columns(2)
                        with cols[0]:
                            st.caption("Input")
                            st.image(img, use_container_width=True)
                        with cols[1]:
                            st.caption("Output")
                            st.image(url, use_container_width=True)
                        st.write(url)
                        st.toast("Image ready", icon="🖼️")
                    else:
                        st.error("No image URL returned.")
                        logger.warning("Image Gen: empty URL")
                except Exception as e:
                    logger.exception("Image Generation failed")
                    st.error(f"Error: {e}")

    # Generate Caption
    with tabs[3]:
        st.subheader("Generate Caption")
        with st.form("caption_form", clear_on_submit=False):
            cap_video = st.file_uploader("Input video", type=["mp4", "mov", "mkv"])
            cap_size = st.slider("Caption size (characters)", min_value=10, max_value=100, value=30, step=5)
            color = st.text_input("Highlight color (hex)", value="#FFD400")
            make_caps = st.form_submit_button("Generate Captions")

        if make_caps:
            if not cap_video:
                st.error("Please upload a video.")
            else:
                try:
                    vbytes = cap_video.read()
                    logger.info("Caption Gen: video_bytes=%d cap_size=%d color=%s",
                                len(vbytes or b""), int(cap_size), color)

                    with st.spinner("Generating captions..."):
                        url = caption_generation(vbytes, cap_size, color)

                    if url:
                        st.success("Captions generated")
                        st.video(url)
                        st.write(url)
                        st.toast("Captioned video ready", icon="🎞️")
                    else:
                        st.error("No captioned video URL returned.")
                        logger.warning("Caption Gen: empty URL")
                except Exception as e:
                    logger.exception("Caption Generation failed")
                    st.error(f"Error: {e}")

@lru_cache(maxsize=1)
def check_token_cached(user_token: str):
    ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
    if not ACCESS_TOKEN:
        return False, "Server error: Access token not configured."
    if user_token == ACCESS_TOKEN:
        return True, ""
    return False, "Invalid token."

def main():
    if "authenticated" not in st.session_state:
        st.session_state["authenticated"] = False
    if not st.session_state["authenticated"]:
        st.markdown("## Access Required")
        token_input = st.text_input("Enter Access Token", type="password")
        if st.button("Unlock App"):
            ok, error_msg = check_token_cached(token_input)
            if ok:
                st.session_state["authenticated"] = True
                st.rerun()
            else:
                st.error(error_msg)
    else:
        main_app()

if __name__ == "__main__":
    main()