Spaces:

Bhishaj
/

Continuity

Sleeping

App Files Files Community

Gaurav vashistha commited on Jan 17

Commit

ef27e6d

1 Parent(s): a8c659a

feat: integrate Veo and GCS download

Browse files

Files changed (6) hide show

agent.py +151 -83
check_genai.py +5 -0
check_genai_help.py +7 -0
check_genai_models.py +7 -0
requirements.txt +4 -0
server.py +9 -14

agent.py CHANGED Viewed

@@ -4,13 +4,27 @@ import shutil
 import requests
 import tempfile
 import logging
 from typing import TypedDict, Optional
 from langgraph.graph import StateGraph, END
 from google import genai
 from groq import Groq
 from gradio_client import Client, handle_file
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
@@ -32,6 +46,9 @@ class ContinuityState(TypedDict):
 # --- HELPER FUNCTIONS ---
 def download_to_temp(url):
     logger.info(f"Downloading: {url}")
     resp = requests.get(url, stream=True)
     resp.raise_for_status()
     suffix = os.path.splitext(url.split("/")[-1])[1] or ".mp4"
@@ -39,46 +56,63 @@ def download_to_temp(url):
         shutil.copyfileobj(resp.raw, f)
         return f.name
 # --- NODE 1: ANALYST ---
 def analyze_videos(state: ContinuityState) -> dict:
     logger.info("--- 🧐 Analyst Node (Director) ---")
     video_a_url = state['video_a_url']
     video_c_url = state['video_c_url']
     # 1. Prepare Files
     try:
         path_a = state.get('video_a_local_path')
         if not path_a:
-             path_a = download_to_temp(video_a_url)
         path_c = state.get('video_c_local_path')
         if not path_c:
-             path_c = download_to_temp(video_c_url)
     except Exception as e:
         logger.error(f"Download failed: {e}")
         return {"scene_analysis": "Error downloading", "veo_prompt": "Smooth cinematic transition"}
     # 2. Try Gemini 2.0 (With Retry)
     client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
     transition_prompt = None
     retries = 3
     for attempt in range(retries):
         try:
             logger.info(f"Uploading videos to Gemini... (Attempt {attempt+1})")
             file_a = client.files.upload(file=path_a)
             file_c = client.files.upload(file=path_c)
             prompt_text = """
             You are a film director.
             Analyze the motion, lighting, and subject of the first video (Video A) and the second video (Video C).
             Write a detailed visual prompt for a 2-second video (Video B) that smoothly transitions from the end of A to the start of C.
             Target Output: A single concise descriptive paragraph for the video generation model.
             """
             logger.info("Generating transition prompt...")
-            # Using 2.0 Flash as per your logs (or 1.5-flash if preferred)
             response = client.models.generate_content(
                 model="gemini-2.0-flash-exp",
                 contents=[prompt_text, file_a, file_c]
@@ -86,7 +120,6 @@ def analyze_videos(state: ContinuityState) -> dict:
             transition_prompt = response.text
             logger.info(f"Generated Prompt: {transition_prompt}")
             break # Success
         except Exception as e:
             if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
                 wait = 30 * (attempt + 1)
@@ -101,14 +134,10 @@ def analyze_videos(state: ContinuityState) -> dict:
         logger.info("Switching to Llama 3.2 (Groq) Fallback...")
         try:
             groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
-            # We can't easily send videos, so we generate a prompt based on general best practices
-            fallback_prompt = "Create a smooth, cinematic visual transition that bridges two scenes with matching lighting and motion blur."
             completion = groq_client.chat.completions.create(
                 model="llama-3.2-11b-vision-preview",
-                messages=[
-                    {"role": "user", "content": f"Refine this into a video generation prompt: {fallback_prompt}"}
-                ]
             )
             transition_prompt = completion.choices[0].message.content
         except Exception as e:
@@ -125,94 +154,107 @@ def analyze_videos(state: ContinuityState) -> dict:
 # --- NODE 2: GENERATOR ---
 def generate_video(state: ContinuityState) -> dict:
     logger.info("--- 🎥 Generator Node ---")
     prompt = state.get('veo_prompt', "")
     path_a = state.get('video_a_local_path')
     path_c = state.get('video_c_local_path')
     if not path_a or not path_c:
         return {"generated_video_url": "Error: Missing local video paths"}
     try:
-        # Extract Frames (simplified for brevity, ensuring libraries are imported)
         import cv2
         from PIL import Image
-        def get_frame(video_path, location="last"):
             cap = cv2.VideoCapture(video_path)
-            total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-            if location == "last": cap.set(cv2.CAP_PROP_POS_FRAMES, total_frames - 1)
-            else: cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
             ret, frame = cap.read()
             cap.release()
-            if ret: return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-            raise ValueError(f"Could not extract frame from {video_path}")
-        logger.info("Extracting frames...")
-        img_start = get_frame(path_a, "last")
-        img_end = get_frame(path_c, "first")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f_start:
             img_start.save(f_start, format="PNG")
             start_path = f_start.name
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f_end:
-            img_end.save(f_end, format="PNG")
-            end_path = f_end.name
-        # --- ATTEMPT 1: WAN 2.2 ---
-        try:
-            logger.info("Initializing Wan Client...")
-            client = Client("multimodalart/wan-2-2-first-last-frame")
-            logger.info(f"Generating with Wan 2.2... Prompt: {prompt[:30]}...")
-            result = client.predict(
-                start_image_pil=handle_file(start_path),
-                end_image_pil=handle_file(end_path),
-                prompt=prompt,
-                negative_prompt="blurry, distorted, low quality, static",
-                duration_seconds=2.1,
-                steps=20,
-                guidance_scale=5.0,
-                guidance_scale_2=5.0,
-                seed=42,
-                randomize_seed=True,
-                api_name="/generate_video"
-            )
-            # Handle Wan output format
-            video_out = result[0]
-            if isinstance(video_out, dict) and 'video' in video_out:
-                 return {"generated_video_url": video_out['video']}
-            elif isinstance(video_out, str) and os.path.exists(video_out):
-                 return {"generated_video_url": video_out}
-        except Exception as e:
-            logger.warning(f"⚠️ Wan 2.2 Failed: {e}")
-        # --- ATTEMPT 2: SVD FALLBACK ---
-        logger.info("🔄 Switching to SVD Fallback...")
-        try:
-            # FIXED REPO ID
-            client = Client("multimodalart/stable-video-diffusion")
-            # SVD uses one image, we'll use the start frame
-            result = client.predict(
-                handle_file(start_path),
-                0.0, 0.0, 1, 25, # resized_width, resized_height, motion_bucket_id, fps
-                api_name="/predict"
-            )
-            logger.info(f"✅ SVD Generated: {result}")
-            return {"generated_video_url": result} # SVD usually returns path string
-        except Exception as e:
-            logger.error(f"❌ All Generators Failed. Error: {e}")
-            return {"generated_video_url": f"Error: {str(e)}"}
     except Exception as e:
-        logger.error(f"Error in Generator Setup: {e}")
         return {"generated_video_url": f"Error: {str(e)}"}
 # Graph Construction
 workflow = StateGraph(ContinuityState)
 workflow.add_node("analyst", analyze_videos)
@@ -220,4 +262,30 @@ workflow.add_node("generator", generate_video)
 workflow.set_entry_point("analyst")
 workflow.add_edge("analyst", "generator")
 workflow.add_edge("generator", END)
-app = workflow.compile()

 import requests
 import tempfile
 import logging
+import json
 from typing import TypedDict, Optional
 from langgraph.graph import StateGraph, END
+# Unified SDK for both Analyst (Gemini) and Generator (Veo)
 from google import genai
+from google.genai import types
+from google.cloud import storage # Required for downloading Veo output
 from groq import Groq
 from gradio_client import Client, handle_file
 from dotenv import load_dotenv
+# --- AUTH SETUP FOR HUGGING FACE ---
+if "GCP_CREDENTIALS_JSON" in os.environ:
+    # logger is not defined yet, using print
+    print("🔐 Found GCP Credentials Secret. Setting up auth...")
+    creds_path = "gcp_credentials.json"
+    with open(creds_path, "w") as f:
+        f.write(os.environ["GCP_CREDENTIALS_JSON"])
+    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = creds_path
 # Load environment variables
 load_dotenv()
 # --- HELPER FUNCTIONS ---
 def download_to_temp(url):
     logger.info(f"Downloading: {url}")
+    if os.path.exists(url):
+        return url
     resp = requests.get(url, stream=True)
     resp.raise_for_status()
     suffix = os.path.splitext(url.split("/")[-1])[1] or ".mp4"
         shutil.copyfileobj(resp.raw, f)
         return f.name
+def download_blob(gcs_uri, destination_file_name):
+    """Downloads a blob from the bucket."""
+    # gcs_uri format: gs://bucket-name/path/to/object
+    if not gcs_uri.startswith("gs://"):
+        raise ValueError(f"Invalid GCS URI: {gcs_uri}")
+    parts = gcs_uri[5:].split("/", 1)
+    bucket_name = parts[0]
+    source_blob_name = parts[1]
+    storage_client = storage.Client()
+    bucket = storage_client.bucket(bucket_name)
+    blob = bucket.blob(source_blob_name)
+    blob.download_to_filename(destination_file_name)
+    logger.info(f"Downloaded storage object {gcs_uri} to local file {destination_file_name}.")
 # --- NODE 1: ANALYST ---
 def analyze_videos(state: ContinuityState) -> dict:
     logger.info("--- 🧐 Analyst Node (Director) ---")
     video_a_url = state['video_a_url']
     video_c_url = state['video_c_url']
     # 1. Prepare Files
     try:
         path_a = state.get('video_a_local_path')
         if not path_a:
+            path_a = download_to_temp(video_a_url)
         path_c = state.get('video_c_local_path')
         if not path_c:
+            path_c = download_to_temp(video_c_url)
     except Exception as e:
         logger.error(f"Download failed: {e}")
         return {"scene_analysis": "Error downloading", "veo_prompt": "Smooth cinematic transition"}
     # 2. Try Gemini 2.0 (With Retry)
+    # Standard Client for Gemini (API Key based)
     client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
     transition_prompt = None
     retries = 3
     for attempt in range(retries):
         try:
             logger.info(f"Uploading videos to Gemini... (Attempt {attempt+1})")
             file_a = client.files.upload(file=path_a)
             file_c = client.files.upload(file=path_c)
             prompt_text = """
             You are a film director.
             Analyze the motion, lighting, and subject of the first video (Video A) and the second video (Video C).
             Write a detailed visual prompt for a 2-second video (Video B) that smoothly transitions from the end of A to the start of C.
             Target Output: A single concise descriptive paragraph for the video generation model.
             """
             logger.info("Generating transition prompt...")
+            # Using 2.0 Flash Exp or falling back to 1.5 Flash if needed
             response = client.models.generate_content(
                 model="gemini-2.0-flash-exp",
                 contents=[prompt_text, file_a, file_c]
             transition_prompt = response.text
             logger.info(f"Generated Prompt: {transition_prompt}")
             break # Success
         except Exception as e:
             if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
                 wait = 30 * (attempt + 1)
         logger.info("Switching to Llama 3.2 (Groq) Fallback...")
         try:
             groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
+            fallback_prompt = "Create a smooth, cinematic visual transition that bridges two scenes."
             completion = groq_client.chat.completions.create(
                 model="llama-3.2-11b-vision-preview",
+                messages=[{"role": "user", "content": f"Refine this into a video prompt: {fallback_prompt}"}]
             )
             transition_prompt = completion.choices[0].message.content
         except Exception as e:
 # --- NODE 2: GENERATOR ---
 def generate_video(state: ContinuityState) -> dict:
     logger.info("--- 🎥 Generator Node ---")
     prompt = state.get('veo_prompt', "")
     path_a = state.get('video_a_local_path')
     path_c = state.get('video_c_local_path')
     if not path_a or not path_c:
         return {"generated_video_url": "Error: Missing local video paths"}
+    # --- ATTEMPT 1: GOOGLE VEO (VIA UNIFIED GENAI SDK) ---
+    try:
+        logger.info("⚡ Initializing Google Veo (Unified SDK)...")
+        project_id = os.getenv("GCP_PROJECT_ID")
+        location = os.getenv("GCP_LOCATION", "us-central1")
+        if project_id:
+            # Initialize Vertex AI Client via genai
+            client = genai.Client(
+                vertexai=True,
+                project=project_id,
+                location=location
+            )
+            logger.info(f"Generating with Veo... Prompt: {prompt[:30]}...")
+            # Submit Generation Operation
+            operation = client.models.generate_videos(
+                model='veo-2.0-generate-001',
+                prompt=prompt,
+                config=types.GenerateVideosConfig(
+                    number_of_videos=1,
+                )
+            )
+            # Polling Loop
+            logger.info(f"Waiting for Veo operation {operation.name}...")
+            while not operation.done:
+                time.sleep(10)
+                operation = client.operations.get(operation.name)
+                logger.info("...still generating...")
+            # Handle Result
+            if operation.result and operation.result.generated_videos:
+                video_result = operation.result.generated_videos[0]
+                # Check if we have a GCS URI (Typical for Veo)
+                if hasattr(video_result.video, 'uri') and video_result.video.uri:
+                    gcs_uri = video_result.video.uri
+                    logger.info(f"Veo output saved to GCS: {gcs_uri}")
+                    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as f:
+                        local_path = f.name
+                    download_blob(gcs_uri, local_path)
+                    logger.info(f"✅ Veo Video Downloaded: {local_path}")
+                    return {"generated_video_url": local_path}
+                else:
+                    logger.warning("Veo operation completed but no URI found.")
+            else:
+                logger.warning("Veo operation completed with no result.")
+        else:
+            logger.warning("⚠️ GCP_PROJECT_ID not set. Skipping Veo.")
+    except Exception as e:
+        logger.warning(f"⚠️ Veo Failed: {e}")
+        # Fallback to SVD below
+    # --- ATTEMPT 2: SVD FALLBACK (Free) ---
+    logger.info("🔄 Switching to SVD Fallback...")
     try:
         import cv2
         from PIL import Image
+        def get_frame(video_path):
             cap = cv2.VideoCapture(video_path)
             ret, frame = cap.read()
             cap.release()
+            if ret:
+                return Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+            return None
+        img_start = get_frame(path_a)
+        if img_start is None:
+             raise ValueError("Could not read start frame for SVD")
         with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as f_start:
             img_start.save(f_start, format="PNG")
             start_path = f_start.name
+        client = Client("multimodalart/stable-video-diffusion")
+        result = client.predict(
+            handle_file(start_path),
+            0.0, 0.0, 1, 25,
+            api_name="/predict"
+        )
+        logger.info(f"✅ SVD Generated: {result}")
+        return {"generated_video_url": result}
     except Exception as e:
+        logger.error(f"❌ All Generators Failed. Error: {e}")
         return {"generated_video_url": f"Error: {str(e)}"}
 # Graph Construction
 workflow = StateGraph(ContinuityState)
 workflow.add_node("analyst", analyze_videos)
 workflow.set_entry_point("analyst")
 workflow.add_edge("analyst", "generator")
 workflow.add_edge("generator", END)
+app = workflow.compile()
+# --- SERVER COMPATIBILITY WRAPPERS ---
+def analyze_only(state_or_path_a, path_c=None):
+    # Handle direct server call format (path_a, path_c)
+    if isinstance(state_or_path_a, str) and path_c:
+        state = {
+            "video_a_url": "local",
+            "video_c_url": "local",
+            "video_a_local_path": state_or_path_a,
+            "video_c_local_path": path_c
+        }
+    else:
+        state = state_or_path_a if isinstance(state_or_path_a, dict) else state_or_path_a.dict()
+    result = analyze_videos(state)
+    return {"prompt": result.get("scene_analysis"), "status": "success"}
+def generate_only(prompt, path_a, path_c):
+    state = {
+        "video_a_url": "local",
+        "video_c_url": "local",
+        "video_a_local_path": path_a,
+        "video_c_local_path": path_c,
+        "veo_prompt": prompt
+    }
+    return generate_video(state)

check_genai.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from google import genai
+try:
+    print("Client methods:", dir(genai.Client))
+except Exception as e:
+    print(e)

check_genai_help.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from google import genai
+import os
+try:
+    client = genai.Client(api_key="TEST")
+    print(help(client.models.generate_videos))
+except Exception as e:
+    print(e)

check_genai_models.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from google import genai
+import os
+try:
+    client = genai.Client(api_key="TEST")
+    print("models methods:", dir(client.models))
+except Exception as e:
+    print(e)

requirements.txt CHANGED Viewed

@@ -14,3 +14,7 @@ groq
 numpy
 gradio
 google-generativeai

 numpy
 gradio
 google-generativeai
+google-cloud-aiplatform
+google-cloud-storage

server.py CHANGED Viewed

@@ -6,7 +6,8 @@ import uvicorn
 import os
 import shutil
 import uuid
-from continuity_agent.agent import analyze_only, generate_only
 app = FastAPI(title="Continuity", description="AI Video Bridging Service")
@@ -24,6 +25,7 @@ app.mount("/outputs", StaticFiles(directory=OUTPUT_DIR), name="outputs")
 @app.get("/")
 async def read_root():
     return FileResponse("stitch_continuity_dashboard/code.html")
 @app.post("/analyze")
@@ -38,18 +40,18 @@ async def analyze_endpoint(
         path_a = os.path.join(OUTPUT_DIR, f"{request_id}_a{ext_a}")
         path_c = os.path.join(OUTPUT_DIR, f"{request_id}_c{ext_c}")
         with open(path_a, "wb") as buffer:
             shutil.copyfileobj(video_a.file, buffer)
         with open(path_c, "wb") as buffer:
             shutil.copyfileobj(video_c.file, buffer)
-        # Call Agent
-        result = analyze_only(os.path.abspath(path_a), os.path.abspath(path_c))
         if result.get("status") == "error":
              raise HTTPException(status_code=500, detail=result.get("detail"))
         return {
             "prompt": result["prompt"],
             "video_a_path": os.path.abspath(path_a),
@@ -68,7 +70,7 @@ async def generate_endpoint(
     try:
         if not os.path.exists(video_a_path) or not os.path.exists(video_c_path):
              raise HTTPException(status_code=400, detail="Video files not found on server.")
         # Call Agent
         result = generate_only(prompt, video_a_path, video_c_path)
         gen_path = result.get("generated_video_url")
@@ -76,16 +78,12 @@ async def generate_endpoint(
         if not gen_path or "Error" in gen_path:
             raise HTTPException(status_code=500, detail=f"Generation failed: {gen_path}")
-        # Move final file to output dir if it's not already there (SVD might return temp path)
         final_filename = f"{uuid.uuid4()}_bridge.mp4"
         final_output_path = os.path.join(OUTPUT_DIR, final_filename)
-        # If gen_path is a URL (some providers), we might need to handle differently
-        # But our agent functions return local paths (SVD) or temp paths (Wan)
         if os.path.exists(gen_path):
              shutil.move(gen_path, final_output_path)
         else:
-             # Assume it's an error message or invalid
              raise HTTPException(status_code=500, detail="Generated file missing.")
         return {"video_url": f"/outputs/{final_filename}"}
@@ -93,9 +91,6 @@ async def generate_endpoint(
     except Exception as e:
         print(f"Server Error (Generate): {e}")
         raise HTTPException(status_code=500, detail=str(e))
-    except Exception as e:
-        print(f"Server Error: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     uvicorn.run("server:app", host="0.0.0.0", port=7860, reload=False)

 import os
 import shutil
 import uuid
+# FIXED IMPORT: Importing from root agent.py instead of continuity_agent
+from agent import analyze_only, generate_only
 app = FastAPI(title="Continuity", description="AI Video Bridging Service")
 @app.get("/")
 async def read_root():
+    # Serve the dashboard HTML
     return FileResponse("stitch_continuity_dashboard/code.html")
 @app.post("/analyze")
         path_a = os.path.join(OUTPUT_DIR, f"{request_id}_a{ext_a}")
         path_c = os.path.join(OUTPUT_DIR, f"{request_id}_c{ext_c}")
         with open(path_a, "wb") as buffer:
             shutil.copyfileobj(video_a.file, buffer)
         with open(path_c, "wb") as buffer:
             shutil.copyfileobj(video_c.file, buffer)
+        # Call Agent with local paths
+        result = analyze_only(os.path.abspath(path_a), os.path.abspath(path_c))
         if result.get("status") == "error":
              raise HTTPException(status_code=500, detail=result.get("detail"))
         return {
             "prompt": result["prompt"],
             "video_a_path": os.path.abspath(path_a),
     try:
         if not os.path.exists(video_a_path) or not os.path.exists(video_c_path):
              raise HTTPException(status_code=400, detail="Video files not found on server.")
         # Call Agent
         result = generate_only(prompt, video_a_path, video_c_path)
         gen_path = result.get("generated_video_url")
         if not gen_path or "Error" in gen_path:
             raise HTTPException(status_code=500, detail=f"Generation failed: {gen_path}")
         final_filename = f"{uuid.uuid4()}_bridge.mp4"
         final_output_path = os.path.join(OUTPUT_DIR, final_filename)
         if os.path.exists(gen_path):
              shutil.move(gen_path, final_output_path)
         else:
              raise HTTPException(status_code=500, detail="Generated file missing.")
         return {"video_url": f"/outputs/{final_filename}"}
     except Exception as e:
         print(f"Server Error (Generate): {e}")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     uvicorn.run("server:app", host="0.0.0.0", port=7860, reload=False)