Spaces:

Bhishaj
/

Continuity

Sleeping

App Files Files Community

Gaurav vashistha commited on about 1 month ago

Commit

c4bca54

1 Parent(s): 5f1ac8d

feat: implement audio bridging, cloud persistence, and creative control

Browse files

Files changed (7) hide show

Dockerfile +1 -1
agent.py +72 -4
config.py +6 -0
requirements.txt +1 -0
server.py +8 -1
stitch_continuity_dashboard/code.html +16 -0
utils.py +56 -0

Dockerfile CHANGED Viewed

@@ -1,7 +1,7 @@
 FROM python:3.10-slim
 WORKDIR /app
-RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

 FROM python:3.10-slim
 WORKDIR /app
+RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg && rm -rf /var/lib/apt/lists/*
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt

agent.py CHANGED Viewed

@@ -12,6 +12,8 @@ from google.genai import types
 # Import other clients
 from groq import Groq
 from gradio_client import Client, handle_file
 # Import refactored modules
 from config import Settings
@@ -26,6 +28,8 @@ class ContinuityState(TypedDict):
     job_id: Optional[str] # Added job_id
     video_a_url: str
     video_c_url: str
     user_notes: Optional[str]
     scene_analysis: Optional[str]
     veo_prompt: Optional[str]
@@ -33,6 +37,51 @@ class ContinuityState(TypedDict):
     video_a_local_path: Optional[str]
     video_c_local_path: Optional[str]
 # --- NODE 1: ANALYST ---
 def analyze_videos(state: ContinuityState) -> dict:
     logger.info("--- 🧐 Analyst Node (Director) ---")
@@ -42,6 +91,7 @@ def analyze_videos(state: ContinuityState) -> dict:
     video_a_url = state['video_a_url']
     video_c_url = state['video_c_url']
     # 1. Prepare Files
     try:
@@ -87,10 +137,13 @@ def analyze_videos(state: ContinuityState) -> dict:
                 logger.error(f"File state issue. A: {file_a.state.name}, C: {file_c.state.name}")
                 raise Exception("Gemini files not active.")
-            prompt_text = """
             You are a film director.
             Analyze the motion, lighting, and subject of the first video (Video A) and the second video (Video C).
             Write a detailed visual prompt for a 2-second video (Video B) that smoothly transitions from the end of A to the start of C.
             Target Output: A single concise descriptive paragraph for the video generation model.
             """
@@ -216,6 +269,19 @@ def generate_video(state: ContinuityState) -> dict:
             else:
                 logger.warning("Veo operation completed with no result.")
         else:
             logger.warning("⚠️ GCP_PROJECT_ID not set. Skipping Veo.")
@@ -296,7 +362,8 @@ def analyze_only(state_or_path_a, path_c=None, job_id=None):
             "video_a_url": "local",
             "video_c_url": "local",
             "video_a_local_path": state_or_path_a,
-            "video_c_local_path": path_c
         }
     else:
         state = state_or_path_a if isinstance(state_or_path_a, dict) else state_or_path_a.dict()
@@ -306,13 +373,14 @@ def analyze_only(state_or_path_a, path_c=None, job_id=None):
     result = analyze_videos(state)
     return {"prompt": result.get("scene_analysis"), "status": "success"}
-def generate_only(prompt, path_a, path_c, job_id=None):
     state = {
         "job_id": job_id,
         "video_a_url": "local",
         "video_c_url": "local",
         "video_a_local_path": path_a,
         "video_c_local_path": path_c,
-        "veo_prompt": prompt
     }
     return generate_video(state)

 # Import other clients
 from groq import Groq
 from gradio_client import Client, handle_file
+from huggingface_hub import InferenceClient
+import subprocess
 # Import refactored modules
 from config import Settings
     job_id: Optional[str] # Added job_id
     video_a_url: str
     video_c_url: str
+    style: Optional[str]
+    audio_prompt: Optional[str]
     user_notes: Optional[str]
     scene_analysis: Optional[str]
     veo_prompt: Optional[str]
     video_a_local_path: Optional[str]
     video_c_local_path: Optional[str]
+def generate_audio(prompt: str) -> str:
+    """Generates audio SFX using AudioLDM."""
+    try:
+        logger.info(f"🎵 Generating Audio for: {prompt}")
+        # Use a model good for SFX/Atmosphere
+        client = InferenceClient("cvssp/audioldm-12.8k-caps", token=Settings.HF_TOKEN)
+        audio_pil = client.text_to_audio(prompt)
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
+            # AudioLDM usually returns a numpy array or similar depending on the library version,
+            # but InferenceClient.text_to_audio returns a helper object or bytes.
+            # Let's handle the specific return type of huggingface_hub.InferenceClient.text_to_audio
+            # It returns a helper that has .save()
+            audio_pil.save(f.name)
+            return f.name
+    except Exception as e:
+        logger.error(f"Audio generation failed: {e}")
+        return None
+def merge_audio_video(video_path: str, audio_path: str) -> str:
+    """Merges video and audio using ffmpeg."""
+    if not audio_path:
+        return video_path
+    try:
+        output_path = video_path.replace(".mp4", "_merged.mp4")
+        logger.info(f"Mergin Audio/Video: {video_path} + {audio_path} -> {output_path}")
+        # ffmpeg command: -i video -i audio -c:v copy -c:a aac -shortest output
+        cmd = [
+            "ffmpeg", "-y",
+            "-i", video_path,
+            "-i", audio_path,
+            "-c:v", "copy",
+            "-c:a", "aac",
+            "-shortest",
+            output_path
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return output_path
+    except Exception as e:
+        logger.error(f"FFmpeg Merge Failed: {e}")
+        return video_path
 # --- NODE 1: ANALYST ---
 def analyze_videos(state: ContinuityState) -> dict:
     logger.info("--- 🧐 Analyst Node (Director) ---")
     video_a_url = state['video_a_url']
     video_c_url = state['video_c_url']
+    style = state.get('style', 'Cinematic')
     # 1. Prepare Files
     try:
                 logger.error(f"File state issue. A: {file_a.state.name}, C: {file_c.state.name}")
                 raise Exception("Gemini files not active.")
+            prompt_text = f"""
             You are a film director.
             Analyze the motion, lighting, and subject of the first video (Video A) and the second video (Video C).
             Write a detailed visual prompt for a 2-second video (Video B) that smoothly transitions from the end of A to the start of C.
+            STYLE INSTRUCTION: The user wants the style to be "{style}". Ensure the visual description reflects this style (e.g., color grading, camera movement, atmosphere).
             Target Output: A single concise descriptive paragraph for the video generation model.
             """
             else:
                 logger.warning("Veo operation completed with no result.")
+            # --- AUDIO & MERGE ---
+            if local_path:
+                update_job_status(job_id, "generating", 90, "Generating audio SFX...")
+                audio_path = generate_audio(prompt)
+                if audio_path:
+                    update_job_status(job_id, "generating", 95, "Merging audio and video...")
+                    final_path = merge_audio_video(local_path, audio_path)
+                    local_path = final_path
+                update_job_status(job_id, "completed", 100, "Done!", video_url=local_path)
+                return {"generated_video_url": local_path}
         else:
             logger.warning("⚠️ GCP_PROJECT_ID not set. Skipping Veo.")
             "video_a_url": "local",
             "video_c_url": "local",
             "video_a_local_path": state_or_path_a,
+            "video_c_local_path": path_c,
+            "style": "Cinematic" # Default
         }
     else:
         state = state_or_path_a if isinstance(state_or_path_a, dict) else state_or_path_a.dict()
     result = analyze_videos(state)
     return {"prompt": result.get("scene_analysis"), "status": "success"}
+def generate_only(prompt, path_a, path_c, job_id=None, style="Cinematic"):
     state = {
         "job_id": job_id,
         "video_a_url": "local",
         "video_c_url": "local",
         "video_a_local_path": path_a,
         "video_c_local_path": path_c,
+        "veo_prompt": prompt,
+        "style": style
     }
     return generate_video(state)

config.py CHANGED Viewed

@@ -10,6 +10,8 @@ class Settings:
     GCP_LOCATION = os.getenv("GCP_LOCATION", "us-central1")
     GCP_CREDENTIALS_JSON = os.getenv("GCP_CREDENTIALS_JSON")
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
     @classmethod
     def setup_auth(cls):
@@ -26,6 +28,10 @@ class Settings:
         """Validates critical environment variables."""
         if not cls.GOOGLE_API_KEY:
             raise ValueError("GOOGLE_API_KEY is missing from environment variables.")
 # Run setup and validation immediately on import
 Settings.setup_auth()

     GCP_LOCATION = os.getenv("GCP_LOCATION", "us-central1")
     GCP_CREDENTIALS_JSON = os.getenv("GCP_CREDENTIALS_JSON")
     GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+    HF_TOKEN = os.getenv("HF_TOKEN")
+    GCP_BUCKET_NAME = os.getenv("GCP_BUCKET_NAME")
     @classmethod
     def setup_auth(cls):
         """Validates critical environment variables."""
         if not cls.GOOGLE_API_KEY:
             raise ValueError("GOOGLE_API_KEY is missing from environment variables.")
+        if not cls.HF_TOKEN:
+             print("⚠️ HF_TOKEN is missing. Audio generation may fail.")
+        if not cls.GCP_BUCKET_NAME:
+             print("⚠️ GCP_BUCKET_NAME is missing. Cloud persistence will be disabled.")
 # Run setup and validation immediately on import
 Settings.setup_auth()

requirements.txt CHANGED Viewed

@@ -18,3 +18,4 @@ google-generativeai
 google-cloud-aiplatform
 google-cloud-storage

 google-cloud-aiplatform
 google-cloud-storage
+huggingface_hub

server.py CHANGED Viewed

@@ -8,6 +8,7 @@ import shutil
 import uuid
 import json
 from agent import analyze_only, generate_only
 app = FastAPI(title="Continuity", description="AI Video Bridging Service")
@@ -65,6 +66,7 @@ def analyze_endpoint(
 def generate_endpoint(
     background_tasks: BackgroundTasks,
     prompt: str = Body(...),
     video_a_path: str = Body(...),
     video_c_path: str = Body(...)
 ):
@@ -81,7 +83,7 @@ def generate_endpoint(
             json.dump({"status": "queued", "progress": 0, "log": "Job queued..."}, f)
         # Add to background tasks
-        background_tasks.add_task(generate_only, prompt, video_a_path, video_c_path, job_id)
         return {"job_id": job_id}
@@ -102,5 +104,10 @@ def get_status(job_id: str):
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error reading status: {e}")
 if __name__ == "__main__":
     uvicorn.run("server:app", host="0.0.0.0", port=7860, reload=False)

 import uuid
 import json
 from agent import analyze_only, generate_only
+from utils import get_history_from_gcs
 app = FastAPI(title="Continuity", description="AI Video Bridging Service")
 def generate_endpoint(
     background_tasks: BackgroundTasks,
     prompt: str = Body(...),
+    style: str = Body("Cinematic"),
     video_a_path: str = Body(...),
     video_c_path: str = Body(...)
 ):
             json.dump({"status": "queued", "progress": 0, "log": "Job queued..."}, f)
         # Add to background tasks
+        background_tasks.add_task(generate_only, prompt, video_a_path, video_c_path, job_id, style)
         return {"job_id": job_id}
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error reading status: {e}")
+@app.get("/history")
+def get_history():
+    """Returns list of past generated videos from GCS."""
+    return get_history_from_gcs()
 if __name__ == "__main__":
     uvicorn.run("server:app", host="0.0.0.0", port=7860, reload=False)

stitch_continuity_dashboard/code.html CHANGED Viewed

@@ -275,6 +275,20 @@
         class="w-full bg-surface-dark/50 border border-white/10 rounded-lg p-3 text-white text-sm focus:border-primary focus:ring-1 focus:ring-primary outline-none"
         placeholder="AI generated transition prompt will appear here..."></textarea>
       <button id="generate-btn"
         class="w-full flex items-center justify-center gap-2 bg-gradient-to-r from-primary to-purple-600 hover:from-[#6b0bc9] hover:to-purple-700 text-white px-6 py-3 rounded-xl font-bold text-lg transition-all shadow-lg">
         <span class="material-symbols-outlined text-[24px]">movie_filter</span>
@@ -364,6 +378,7 @@
     if (generateBtn) {
       generateBtn.addEventListener("click", async () => {
         const prompt = document.getElementById("prompt-box").value;
         const btn = document.getElementById("generate-btn");
         if (!currentVideoAPath || !currentVideoCPath) {
@@ -386,6 +401,7 @@
             },
             body: JSON.stringify({
               prompt: prompt,
               video_a_path: currentVideoAPath,
               video_c_path: currentVideoCPath
             })

         class="w-full bg-surface-dark/50 border border-white/10 rounded-lg p-3 text-white text-sm focus:border-primary focus:ring-1 focus:ring-primary outline-none"
         placeholder="AI generated transition prompt will appear here..."></textarea>
+      <div class="flex flex-col gap-2">
+        <label for="style-select" class="text-xs font-bold text-gray-400 uppercase tracking-widest pl-1">Creative
+          Style</label>
+        <select id="style-select"
+          class="w-full bg-surface-dark/50 border border-white/10 rounded-lg p-3 text-white text-sm focus:border-primary focus:ring-1 focus:ring-primary outline-none">
+          <option value="Cinematic">Cinematic (Default)</option>
+          <option value="Anime">Anime</option>
+          <option value="Cyberpunk">Cyberpunk</option>
+          <option value="VHS Glitch">VHS Glitch</option>
+          <option value="Claymation">Claymation</option>
+          <option value="Noir">Noir</option>
+        </select>
+      </div>
       <button id="generate-btn"
         class="w-full flex items-center justify-center gap-2 bg-gradient-to-r from-primary to-purple-600 hover:from-[#6b0bc9] hover:to-purple-700 text-white px-6 py-3 rounded-xl font-bold text-lg transition-all shadow-lg">
         <span class="material-symbols-outlined text-[24px]">movie_filter</span>
     if (generateBtn) {
       generateBtn.addEventListener("click", async () => {
         const prompt = document.getElementById("prompt-box").value;
+        const style = document.getElementById("style-select").value;
         const btn = document.getElementById("generate-btn");
         if (!currentVideoAPath || !currentVideoCPath) {
             },
             body: JSON.stringify({
               prompt: prompt,
+              style: style,
               video_a_path: currentVideoAPath,
               video_c_path: currentVideoCPath
             })

utils.py CHANGED Viewed

@@ -4,7 +4,9 @@ import requests
 import tempfile
 import logging
 import json
 from google.cloud import storage
 # Configure logging for utils
 logger = logging.getLogger(__name__)
@@ -36,6 +38,55 @@ def download_blob(gcs_uri, destination_file_name):
     blob.download_to_filename(destination_file_name)
     logger.info(f"Downloaded storage object {gcs_uri} to local file {destination_file_name}.")
 def save_video_bytes(bytes_data, suffix=".mp4") -> str:
     """Saves raw video bytes to a temporary local file."""
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
@@ -64,6 +115,11 @@ def update_job_status(job_id, status, progress, log=None, video_url=None):
             logger.info(f"Moved video to {destination}")
             # Set public URL relative to server root
             final_video_url = f"/outputs/{final_filename}"
         except Exception as e:
             logger.error(f"Failed to move output video: {e}")

 import tempfile
 import logging
 import json
+from datetime import datetime, timedelta
 from google.cloud import storage
+from config import Settings
 # Configure logging for utils
 logger = logging.getLogger(__name__)
     blob.download_to_filename(destination_file_name)
     logger.info(f"Downloaded storage object {gcs_uri} to local file {destination_file_name}.")
+def upload_to_gcs(local_path, destination_blob_name):
+    """Uploads a file to the bucket."""
+    bucket_name = Settings.GCP_BUCKET_NAME
+    if not bucket_name:
+        logger.warning("GCP_BUCKET_NAME not set. Skipping upload.")
+        return None
+    try:
+        storage_client = storage.Client()
+        bucket = storage_client.bucket(bucket_name)
+        blob = bucket.blob(destination_blob_name)
+        blob.upload_from_filename(local_path)
+        # Generate signed URL (valid for 1 hour)
+        url = blob.generate_signed_url(expiration=timedelta(hours=1), method='GET')
+        logger.info(f"Uploaded {local_path} to {destination_blob_name}. URL: {url}")
+        return url
+    except Exception as e:
+        logger.error(f"Failed to upload to GCS: {e}")
+        return None
+def get_history_from_gcs():
+    """Lists recent videos from GCS."""
+    bucket_name = Settings.GCP_BUCKET_NAME
+    if not bucket_name:
+        return []
+    try:
+        storage_client = storage.Client()
+        blobs = storage_client.list_blobs(bucket_name)
+        # Sort by time created (newest first)
+        sorted_blobs = sorted(blobs, key=lambda b: b.time_created, reverse=True)
+        history = []
+        for blob in sorted_blobs[:20]: # Limit to 20
+             if blob.name.endswith(".mp4"):
+                 url = blob.generate_signed_url(expiration=timedelta(hours=1), method='GET')
+                 history.append({
+                     "name": blob.name,
+                     "url": url,
+                     "created": blob.time_created.isoformat()
+                 })
+        return history
+    except Exception as e:
+        logger.error(f"Failed to list GCS history: {e}")
+        return []
 def save_video_bytes(bytes_data, suffix=".mp4") -> str:
     """Saves raw video bytes to a temporary local file."""
     with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
             logger.info(f"Moved video to {destination}")
             # Set public URL relative to server root
             final_video_url = f"/outputs/{final_filename}"
+            # --- AUTO BACKUP TO CLOUD ---
+            if Settings.GCP_BUCKET_NAME:
+                logger.info(f"Backing up {final_filename} to GCS...")
+                upload_to_gcs(destination, final_filename)
         except Exception as e:
             logger.error(f"Failed to move output video: {e}")