Gaurav vashistha commited on
Commit
c4bca54
·
1 Parent(s): 5f1ac8d

feat: implement audio bridging, cloud persistence, and creative control

Browse files
Files changed (7) hide show
  1. Dockerfile +1 -1
  2. agent.py +72 -4
  3. config.py +6 -0
  4. requirements.txt +1 -0
  5. server.py +8 -1
  6. stitch_continuity_dashboard/code.html +16 -0
  7. utils.py +56 -0
Dockerfile CHANGED
@@ -1,7 +1,7 @@
1
  FROM python:3.10-slim
2
  WORKDIR /app
3
 
4
- RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 && rm -rf /var/lib/apt/lists/*
5
 
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
 
1
  FROM python:3.10-slim
2
  WORKDIR /app
3
 
4
+ RUN apt-get update && apt-get install -y libgl1 libglib2.0-0 ffmpeg && rm -rf /var/lib/apt/lists/*
5
 
6
  COPY requirements.txt .
7
  RUN pip install --no-cache-dir -r requirements.txt
agent.py CHANGED
@@ -12,6 +12,8 @@ from google.genai import types
12
  # Import other clients
13
  from groq import Groq
14
  from gradio_client import Client, handle_file
 
 
15
 
16
  # Import refactored modules
17
  from config import Settings
@@ -26,6 +28,8 @@ class ContinuityState(TypedDict):
26
  job_id: Optional[str] # Added job_id
27
  video_a_url: str
28
  video_c_url: str
 
 
29
  user_notes: Optional[str]
30
  scene_analysis: Optional[str]
31
  veo_prompt: Optional[str]
@@ -33,6 +37,51 @@ class ContinuityState(TypedDict):
33
  video_a_local_path: Optional[str]
34
  video_c_local_path: Optional[str]
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # --- NODE 1: ANALYST ---
37
  def analyze_videos(state: ContinuityState) -> dict:
38
  logger.info("--- 🧐 Analyst Node (Director) ---")
@@ -42,6 +91,7 @@ def analyze_videos(state: ContinuityState) -> dict:
42
 
43
  video_a_url = state['video_a_url']
44
  video_c_url = state['video_c_url']
 
45
 
46
  # 1. Prepare Files
47
  try:
@@ -87,10 +137,13 @@ def analyze_videos(state: ContinuityState) -> dict:
87
  logger.error(f"File state issue. A: {file_a.state.name}, C: {file_c.state.name}")
88
  raise Exception("Gemini files not active.")
89
 
90
- prompt_text = """
91
  You are a film director.
92
  Analyze the motion, lighting, and subject of the first video (Video A) and the second video (Video C).
93
  Write a detailed visual prompt for a 2-second video (Video B) that smoothly transitions from the end of A to the start of C.
 
 
 
94
  Target Output: A single concise descriptive paragraph for the video generation model.
95
  """
96
 
@@ -216,6 +269,19 @@ def generate_video(state: ContinuityState) -> dict:
216
 
217
  else:
218
  logger.warning("Veo operation completed with no result.")
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
  else:
221
  logger.warning("⚠️ GCP_PROJECT_ID not set. Skipping Veo.")
@@ -296,7 +362,8 @@ def analyze_only(state_or_path_a, path_c=None, job_id=None):
296
  "video_a_url": "local",
297
  "video_c_url": "local",
298
  "video_a_local_path": state_or_path_a,
299
- "video_c_local_path": path_c
 
300
  }
301
  else:
302
  state = state_or_path_a if isinstance(state_or_path_a, dict) else state_or_path_a.dict()
@@ -306,13 +373,14 @@ def analyze_only(state_or_path_a, path_c=None, job_id=None):
306
  result = analyze_videos(state)
307
  return {"prompt": result.get("scene_analysis"), "status": "success"}
308
 
309
- def generate_only(prompt, path_a, path_c, job_id=None):
310
  state = {
311
  "job_id": job_id,
312
  "video_a_url": "local",
313
  "video_c_url": "local",
314
  "video_a_local_path": path_a,
315
  "video_c_local_path": path_c,
316
- "veo_prompt": prompt
 
317
  }
318
  return generate_video(state)
 
12
  # Import other clients
13
  from groq import Groq
14
  from gradio_client import Client, handle_file
15
+ from huggingface_hub import InferenceClient
16
+ import subprocess
17
 
18
  # Import refactored modules
19
  from config import Settings
 
28
  job_id: Optional[str] # Added job_id
29
  video_a_url: str
30
  video_c_url: str
31
+ style: Optional[str]
32
+ audio_prompt: Optional[str]
33
  user_notes: Optional[str]
34
  scene_analysis: Optional[str]
35
  veo_prompt: Optional[str]
 
37
  video_a_local_path: Optional[str]
38
  video_c_local_path: Optional[str]
39
 
40
+ def generate_audio(prompt: str) -> str:
41
+ """Generates audio SFX using AudioLDM."""
42
+ try:
43
+ logger.info(f"🎵 Generating Audio for: {prompt}")
44
+ # Use a model good for SFX/Atmosphere
45
+ client = InferenceClient("cvssp/audioldm-12.8k-caps", token=Settings.HF_TOKEN)
46
+ audio_pil = client.text_to_audio(prompt)
47
+
48
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
49
+ # AudioLDM usually returns a numpy array or similar depending on the library version,
50
+ # but InferenceClient.text_to_audio returns a helper object or bytes.
51
+ # Let's handle the specific return type of huggingface_hub.InferenceClient.text_to_audio
52
+ # It returns a helper that has .save()
53
+ audio_pil.save(f.name)
54
+ return f.name
55
+ except Exception as e:
56
+ logger.error(f"Audio generation failed: {e}")
57
+ return None
58
+
59
+ def merge_audio_video(video_path: str, audio_path: str) -> str:
60
+ """Merges video and audio using ffmpeg."""
61
+ if not audio_path:
62
+ return video_path
63
+
64
+ try:
65
+ output_path = video_path.replace(".mp4", "_merged.mp4")
66
+ logger.info(f"Mergin Audio/Video: {video_path} + {audio_path} -> {output_path}")
67
+
68
+ # ffmpeg command: -i video -i audio -c:v copy -c:a aac -shortest output
69
+ cmd = [
70
+ "ffmpeg", "-y",
71
+ "-i", video_path,
72
+ "-i", audio_path,
73
+ "-c:v", "copy",
74
+ "-c:a", "aac",
75
+ "-shortest",
76
+ output_path
77
+ ]
78
+
79
+ subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
80
+ return output_path
81
+ except Exception as e:
82
+ logger.error(f"FFmpeg Merge Failed: {e}")
83
+ return video_path
84
+
85
  # --- NODE 1: ANALYST ---
86
  def analyze_videos(state: ContinuityState) -> dict:
87
  logger.info("--- 🧐 Analyst Node (Director) ---")
 
91
 
92
  video_a_url = state['video_a_url']
93
  video_c_url = state['video_c_url']
94
+ style = state.get('style', 'Cinematic')
95
 
96
  # 1. Prepare Files
97
  try:
 
137
  logger.error(f"File state issue. A: {file_a.state.name}, C: {file_c.state.name}")
138
  raise Exception("Gemini files not active.")
139
 
140
+ prompt_text = f"""
141
  You are a film director.
142
  Analyze the motion, lighting, and subject of the first video (Video A) and the second video (Video C).
143
  Write a detailed visual prompt for a 2-second video (Video B) that smoothly transitions from the end of A to the start of C.
144
+
145
+ STYLE INSTRUCTION: The user wants the style to be "{style}". Ensure the visual description reflects this style (e.g., color grading, camera movement, atmosphere).
146
+
147
  Target Output: A single concise descriptive paragraph for the video generation model.
148
  """
149
 
 
269
 
270
  else:
271
  logger.warning("Veo operation completed with no result.")
272
+
273
+ # --- AUDIO & MERGE ---
274
+ if local_path:
275
+ update_job_status(job_id, "generating", 90, "Generating audio SFX...")
276
+ audio_path = generate_audio(prompt)
277
+
278
+ if audio_path:
279
+ update_job_status(job_id, "generating", 95, "Merging audio and video...")
280
+ final_path = merge_audio_video(local_path, audio_path)
281
+ local_path = final_path
282
+
283
+ update_job_status(job_id, "completed", 100, "Done!", video_url=local_path)
284
+ return {"generated_video_url": local_path}
285
 
286
  else:
287
  logger.warning("⚠️ GCP_PROJECT_ID not set. Skipping Veo.")
 
362
  "video_a_url": "local",
363
  "video_c_url": "local",
364
  "video_a_local_path": state_or_path_a,
365
+ "video_c_local_path": path_c,
366
+ "style": "Cinematic" # Default
367
  }
368
  else:
369
  state = state_or_path_a if isinstance(state_or_path_a, dict) else state_or_path_a.dict()
 
373
  result = analyze_videos(state)
374
  return {"prompt": result.get("scene_analysis"), "status": "success"}
375
 
376
+ def generate_only(prompt, path_a, path_c, job_id=None, style="Cinematic"):
377
  state = {
378
  "job_id": job_id,
379
  "video_a_url": "local",
380
  "video_c_url": "local",
381
  "video_a_local_path": path_a,
382
  "video_c_local_path": path_c,
383
+ "veo_prompt": prompt,
384
+ "style": style
385
  }
386
  return generate_video(state)
config.py CHANGED
@@ -10,6 +10,8 @@ class Settings:
10
  GCP_LOCATION = os.getenv("GCP_LOCATION", "us-central1")
11
  GCP_CREDENTIALS_JSON = os.getenv("GCP_CREDENTIALS_JSON")
12
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 
 
13
 
14
  @classmethod
15
  def setup_auth(cls):
@@ -26,6 +28,10 @@ class Settings:
26
  """Validates critical environment variables."""
27
  if not cls.GOOGLE_API_KEY:
28
  raise ValueError("GOOGLE_API_KEY is missing from environment variables.")
 
 
 
 
29
 
30
  # Run setup and validation immediately on import
31
  Settings.setup_auth()
 
10
  GCP_LOCATION = os.getenv("GCP_LOCATION", "us-central1")
11
  GCP_CREDENTIALS_JSON = os.getenv("GCP_CREDENTIALS_JSON")
12
  GROQ_API_KEY = os.getenv("GROQ_API_KEY")
13
+ HF_TOKEN = os.getenv("HF_TOKEN")
14
+ GCP_BUCKET_NAME = os.getenv("GCP_BUCKET_NAME")
15
 
16
  @classmethod
17
  def setup_auth(cls):
 
28
  """Validates critical environment variables."""
29
  if not cls.GOOGLE_API_KEY:
30
  raise ValueError("GOOGLE_API_KEY is missing from environment variables.")
31
+ if not cls.HF_TOKEN:
32
+ print("⚠️ HF_TOKEN is missing. Audio generation may fail.")
33
+ if not cls.GCP_BUCKET_NAME:
34
+ print("⚠️ GCP_BUCKET_NAME is missing. Cloud persistence will be disabled.")
35
 
36
  # Run setup and validation immediately on import
37
  Settings.setup_auth()
requirements.txt CHANGED
@@ -18,3 +18,4 @@ google-generativeai
18
  google-cloud-aiplatform
19
 
20
  google-cloud-storage
 
 
18
  google-cloud-aiplatform
19
 
20
  google-cloud-storage
21
+ huggingface_hub
server.py CHANGED
@@ -8,6 +8,7 @@ import shutil
8
  import uuid
9
  import json
10
  from agent import analyze_only, generate_only
 
11
 
12
  app = FastAPI(title="Continuity", description="AI Video Bridging Service")
13
 
@@ -65,6 +66,7 @@ def analyze_endpoint(
65
  def generate_endpoint(
66
  background_tasks: BackgroundTasks,
67
  prompt: str = Body(...),
 
68
  video_a_path: str = Body(...),
69
  video_c_path: str = Body(...)
70
  ):
@@ -81,7 +83,7 @@ def generate_endpoint(
81
  json.dump({"status": "queued", "progress": 0, "log": "Job queued..."}, f)
82
 
83
  # Add to background tasks
84
- background_tasks.add_task(generate_only, prompt, video_a_path, video_c_path, job_id)
85
 
86
  return {"job_id": job_id}
87
 
@@ -102,5 +104,10 @@ def get_status(job_id: str):
102
  except Exception as e:
103
  raise HTTPException(status_code=500, detail=f"Error reading status: {e}")
104
 
 
 
 
 
 
105
  if __name__ == "__main__":
106
  uvicorn.run("server:app", host="0.0.0.0", port=7860, reload=False)
 
8
  import uuid
9
  import json
10
  from agent import analyze_only, generate_only
11
+ from utils import get_history_from_gcs
12
 
13
  app = FastAPI(title="Continuity", description="AI Video Bridging Service")
14
 
 
66
  def generate_endpoint(
67
  background_tasks: BackgroundTasks,
68
  prompt: str = Body(...),
69
+ style: str = Body("Cinematic"),
70
  video_a_path: str = Body(...),
71
  video_c_path: str = Body(...)
72
  ):
 
83
  json.dump({"status": "queued", "progress": 0, "log": "Job queued..."}, f)
84
 
85
  # Add to background tasks
86
+ background_tasks.add_task(generate_only, prompt, video_a_path, video_c_path, job_id, style)
87
 
88
  return {"job_id": job_id}
89
 
 
104
  except Exception as e:
105
  raise HTTPException(status_code=500, detail=f"Error reading status: {e}")
106
 
107
+ @app.get("/history")
108
+ def get_history():
109
+ """Returns list of past generated videos from GCS."""
110
+ return get_history_from_gcs()
111
+
112
  if __name__ == "__main__":
113
  uvicorn.run("server:app", host="0.0.0.0", port=7860, reload=False)
stitch_continuity_dashboard/code.html CHANGED
@@ -275,6 +275,20 @@
275
  class="w-full bg-surface-dark/50 border border-white/10 rounded-lg p-3 text-white text-sm focus:border-primary focus:ring-1 focus:ring-primary outline-none"
276
  placeholder="AI generated transition prompt will appear here..."></textarea>
277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
  <button id="generate-btn"
279
  class="w-full flex items-center justify-center gap-2 bg-gradient-to-r from-primary to-purple-600 hover:from-[#6b0bc9] hover:to-purple-700 text-white px-6 py-3 rounded-xl font-bold text-lg transition-all shadow-lg">
280
  <span class="material-symbols-outlined text-[24px]">movie_filter</span>
@@ -364,6 +378,7 @@
364
  if (generateBtn) {
365
  generateBtn.addEventListener("click", async () => {
366
  const prompt = document.getElementById("prompt-box").value;
 
367
  const btn = document.getElementById("generate-btn");
368
 
369
  if (!currentVideoAPath || !currentVideoCPath) {
@@ -386,6 +401,7 @@
386
  },
387
  body: JSON.stringify({
388
  prompt: prompt,
 
389
  video_a_path: currentVideoAPath,
390
  video_c_path: currentVideoCPath
391
  })
 
275
  class="w-full bg-surface-dark/50 border border-white/10 rounded-lg p-3 text-white text-sm focus:border-primary focus:ring-1 focus:ring-primary outline-none"
276
  placeholder="AI generated transition prompt will appear here..."></textarea>
277
 
278
+ <div class="flex flex-col gap-2">
279
+ <label for="style-select" class="text-xs font-bold text-gray-400 uppercase tracking-widest pl-1">Creative
280
+ Style</label>
281
+ <select id="style-select"
282
+ class="w-full bg-surface-dark/50 border border-white/10 rounded-lg p-3 text-white text-sm focus:border-primary focus:ring-1 focus:ring-primary outline-none">
283
+ <option value="Cinematic">Cinematic (Default)</option>
284
+ <option value="Anime">Anime</option>
285
+ <option value="Cyberpunk">Cyberpunk</option>
286
+ <option value="VHS Glitch">VHS Glitch</option>
287
+ <option value="Claymation">Claymation</option>
288
+ <option value="Noir">Noir</option>
289
+ </select>
290
+ </div>
291
+
292
  <button id="generate-btn"
293
  class="w-full flex items-center justify-center gap-2 bg-gradient-to-r from-primary to-purple-600 hover:from-[#6b0bc9] hover:to-purple-700 text-white px-6 py-3 rounded-xl font-bold text-lg transition-all shadow-lg">
294
  <span class="material-symbols-outlined text-[24px]">movie_filter</span>
 
378
  if (generateBtn) {
379
  generateBtn.addEventListener("click", async () => {
380
  const prompt = document.getElementById("prompt-box").value;
381
+ const style = document.getElementById("style-select").value;
382
  const btn = document.getElementById("generate-btn");
383
 
384
  if (!currentVideoAPath || !currentVideoCPath) {
 
401
  },
402
  body: JSON.stringify({
403
  prompt: prompt,
404
+ style: style,
405
  video_a_path: currentVideoAPath,
406
  video_c_path: currentVideoCPath
407
  })
utils.py CHANGED
@@ -4,7 +4,9 @@ import requests
4
  import tempfile
5
  import logging
6
  import json
 
7
  from google.cloud import storage
 
8
 
9
  # Configure logging for utils
10
  logger = logging.getLogger(__name__)
@@ -36,6 +38,55 @@ def download_blob(gcs_uri, destination_file_name):
36
  blob.download_to_filename(destination_file_name)
37
  logger.info(f"Downloaded storage object {gcs_uri} to local file {destination_file_name}.")
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def save_video_bytes(bytes_data, suffix=".mp4") -> str:
40
  """Saves raw video bytes to a temporary local file."""
41
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
@@ -64,6 +115,11 @@ def update_job_status(job_id, status, progress, log=None, video_url=None):
64
  logger.info(f"Moved video to {destination}")
65
  # Set public URL relative to server root
66
  final_video_url = f"/outputs/{final_filename}"
 
 
 
 
 
67
  except Exception as e:
68
  logger.error(f"Failed to move output video: {e}")
69
 
 
4
  import tempfile
5
  import logging
6
  import json
7
+ from datetime import datetime, timedelta
8
  from google.cloud import storage
9
+ from config import Settings
10
 
11
  # Configure logging for utils
12
  logger = logging.getLogger(__name__)
 
38
  blob.download_to_filename(destination_file_name)
39
  logger.info(f"Downloaded storage object {gcs_uri} to local file {destination_file_name}.")
40
 
41
+ def upload_to_gcs(local_path, destination_blob_name):
42
+ """Uploads a file to the bucket."""
43
+ bucket_name = Settings.GCP_BUCKET_NAME
44
+ if not bucket_name:
45
+ logger.warning("GCP_BUCKET_NAME not set. Skipping upload.")
46
+ return None
47
+
48
+ try:
49
+ storage_client = storage.Client()
50
+ bucket = storage_client.bucket(bucket_name)
51
+ blob = bucket.blob(destination_blob_name)
52
+
53
+ blob.upload_from_filename(local_path)
54
+
55
+ # Generate signed URL (valid for 1 hour)
56
+ url = blob.generate_signed_url(expiration=timedelta(hours=1), method='GET')
57
+ logger.info(f"Uploaded {local_path} to {destination_blob_name}. URL: {url}")
58
+ return url
59
+ except Exception as e:
60
+ logger.error(f"Failed to upload to GCS: {e}")
61
+ return None
62
+
63
+ def get_history_from_gcs():
64
+ """Lists recent videos from GCS."""
65
+ bucket_name = Settings.GCP_BUCKET_NAME
66
+ if not bucket_name:
67
+ return []
68
+
69
+ try:
70
+ storage_client = storage.Client()
71
+ blobs = storage_client.list_blobs(bucket_name)
72
+
73
+ # Sort by time created (newest first)
74
+ sorted_blobs = sorted(blobs, key=lambda b: b.time_created, reverse=True)
75
+
76
+ history = []
77
+ for blob in sorted_blobs[:20]: # Limit to 20
78
+ if blob.name.endswith(".mp4"):
79
+ url = blob.generate_signed_url(expiration=timedelta(hours=1), method='GET')
80
+ history.append({
81
+ "name": blob.name,
82
+ "url": url,
83
+ "created": blob.time_created.isoformat()
84
+ })
85
+ return history
86
+ except Exception as e:
87
+ logger.error(f"Failed to list GCS history: {e}")
88
+ return []
89
+
90
  def save_video_bytes(bytes_data, suffix=".mp4") -> str:
91
  """Saves raw video bytes to a temporary local file."""
92
  with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
 
115
  logger.info(f"Moved video to {destination}")
116
  # Set public URL relative to server root
117
  final_video_url = f"/outputs/{final_filename}"
118
+
119
+ # --- AUTO BACKUP TO CLOUD ---
120
+ if Settings.GCP_BUCKET_NAME:
121
+ logger.info(f"Backing up {final_filename} to GCS...")
122
+ upload_to_gcs(destination, final_filename)
123
  except Exception as e:
124
  logger.error(f"Failed to move output video: {e}")
125