Spaces:

MCP-1st-Birthday
/

vidzly

Paused

App Files Files Community

tthhanh commited on Nov 28, 2025

Commit

5a459dd

1 Parent(s): ff7ae34

chore: black reformating

Browse files

Files changed (6) hide show

src/app/tools/langchain_tools.py +24 -16
src/app/tools/tool_schemas.py +10 -17
src/app/tools/video_clipper.py +5 -3
src/app/tools/video_composer.py +19 -13
src/app/tools/video_script_generator.py +17 -9
src/app/workflow.py +17 -9

src/app/tools/langchain_tools.py CHANGED Viewed

@@ -36,7 +36,9 @@ _VIDEO_PATH_REGISTRY: List[str] = []
 def register_video_paths(paths: List[str]) -> None:
     """Register valid video paths for path resolution."""
     global _VIDEO_PATH_REGISTRY
-    _VIDEO_PATH_REGISTRY = [os.path.abspath(p) for p in paths if p and os.path.exists(p)]
 def _resolve_video_path(video_path: str) -> Optional[str]:
@@ -48,16 +50,16 @@ def _resolve_video_path(video_path: str) -> Optional[str]:
     """
     # Clean the path
     video_path = video_path.strip()
     # Try direct path first
     if os.path.exists(video_path):
         return os.path.abspath(video_path)
     # Try absolute path conversion
     abs_path = os.path.abspath(video_path)
     if os.path.exists(abs_path):
         return abs_path
     # Try to find matching path in registry by filename
     if _VIDEO_PATH_REGISTRY:
         filename = os.path.basename(video_path)
@@ -65,7 +67,7 @@ def _resolve_video_path(video_path: str) -> Optional[str]:
             if os.path.basename(registered_path) == filename:
                 if os.path.exists(registered_path):
                     return registered_path
         # Try fuzzy matching - check if the path is similar to any registered path
         # This handles cases where the path got corrupted (e.g., missing characters)
         for registered_path in _VIDEO_PATH_REGISTRY:
@@ -74,7 +76,7 @@ def _resolve_video_path(video_path: str) -> Optional[str]:
             if filename in registered_path or registered_path.endswith(filename):
                 if os.path.exists(registered_path):
                     return registered_path
     return None
@@ -107,7 +109,7 @@ def video_summarizer_tool(video_path: str, fps: float = 2.0) -> str:
     else:
         # If resolution failed, try the original path anyway
         result_json = video_summarizer(video_path, fps=fps)
     # Validate and ensure the result matches VideoSummary schema
     try:
         parsed = json.loads(result_json)
@@ -147,8 +149,10 @@ def video_script_generator_tool(
     Returns:
         JSON string containing detailed script with scene information and composition details matching VideoScript schema
     """
-    result_json = video_script_generator(video_summaries, user_description, target_duration)
     # Validate and ensure the result matches VideoScript schema
     try:
         parsed = json.loads(result_json)
@@ -197,7 +201,7 @@ def music_selector_tool(
         looping=looping,
         prompt_influence=prompt_influence,
     )
     # Return as JSON string matching MusicSelectorResult schema
     result = MusicSelectorResult(audio_path=audio_path)
     return result.model_dump_json()
@@ -226,11 +230,15 @@ def frame_extractor_tool(
     # Try to resolve the path in case it got corrupted
     resolved_path = _resolve_video_path(video_path)
     if resolved_path:
-        frame_path = frame_extractor(resolved_path, thumbnail_timeframe=thumbnail_timeframe)
     else:
         # If resolution failed, try the original path anyway
-        frame_path = frame_extractor(video_path, thumbnail_timeframe=thumbnail_timeframe)
     # Return as JSON string matching FrameExtractorResult schema
     result = FrameExtractorResult(frame_path=frame_path)
     return result.model_dump_json()
@@ -255,7 +263,7 @@ def thumbnail_generator_tool(image_path: str, summary: str) -> str:
         JSON string with thumbnail_path field matching ThumbnailGeneratorResult schema
     """
     thumbnail_path = thumbnail_generator(image_path, summary)
     # Return as JSON string matching ThumbnailGeneratorResult schema
     result = ThumbnailGeneratorResult(thumbnail_path=thumbnail_path)
     return result.model_dump_json()
@@ -293,7 +301,7 @@ def video_composer_tool(
     else:
         # Comma-separated paths
         clips_list = [path.strip() for path in video_clips.split(",") if path.strip()]
     # Resolve all video clip paths in case they got corrupted
     resolved_clips = []
     for clip_path in clips_list:
@@ -310,7 +318,7 @@ def video_composer_tool(
         music_path=music_path,
         thumbnail_image=thumbnail_image,
     )
     # Return as JSON string matching VideoComposerResult schema
     result = VideoComposerResult(video_path=video_path)
     return result.model_dump_json()

 def register_video_paths(paths: List[str]) -> None:
     """Register valid video paths for path resolution."""
     global _VIDEO_PATH_REGISTRY
+    _VIDEO_PATH_REGISTRY = [
+        os.path.abspath(p) for p in paths if p and os.path.exists(p)
+    ]
 def _resolve_video_path(video_path: str) -> Optional[str]:
     """
     # Clean the path
     video_path = video_path.strip()
     # Try direct path first
     if os.path.exists(video_path):
         return os.path.abspath(video_path)
     # Try absolute path conversion
     abs_path = os.path.abspath(video_path)
     if os.path.exists(abs_path):
         return abs_path
     # Try to find matching path in registry by filename
     if _VIDEO_PATH_REGISTRY:
         filename = os.path.basename(video_path)
             if os.path.basename(registered_path) == filename:
                 if os.path.exists(registered_path):
                     return registered_path
         # Try fuzzy matching - check if the path is similar to any registered path
         # This handles cases where the path got corrupted (e.g., missing characters)
         for registered_path in _VIDEO_PATH_REGISTRY:
             if filename in registered_path or registered_path.endswith(filename):
                 if os.path.exists(registered_path):
                     return registered_path
     return None
     else:
         # If resolution failed, try the original path anyway
         result_json = video_summarizer(video_path, fps=fps)
     # Validate and ensure the result matches VideoSummary schema
     try:
         parsed = json.loads(result_json)
     Returns:
         JSON string containing detailed script with scene information and composition details matching VideoScript schema
     """
+    result_json = video_script_generator(
+        video_summaries, user_description, target_duration
+    )
     # Validate and ensure the result matches VideoScript schema
     try:
         parsed = json.loads(result_json)
         looping=looping,
         prompt_influence=prompt_influence,
     )
     # Return as JSON string matching MusicSelectorResult schema
     result = MusicSelectorResult(audio_path=audio_path)
     return result.model_dump_json()
     # Try to resolve the path in case it got corrupted
     resolved_path = _resolve_video_path(video_path)
     if resolved_path:
+        frame_path = frame_extractor(
+            resolved_path, thumbnail_timeframe=thumbnail_timeframe
+        )
     else:
         # If resolution failed, try the original path anyway
+        frame_path = frame_extractor(
+            video_path, thumbnail_timeframe=thumbnail_timeframe
+        )
     # Return as JSON string matching FrameExtractorResult schema
     result = FrameExtractorResult(frame_path=frame_path)
     return result.model_dump_json()
         JSON string with thumbnail_path field matching ThumbnailGeneratorResult schema
     """
     thumbnail_path = thumbnail_generator(image_path, summary)
     # Return as JSON string matching ThumbnailGeneratorResult schema
     result = ThumbnailGeneratorResult(thumbnail_path=thumbnail_path)
     return result.model_dump_json()
     else:
         # Comma-separated paths
         clips_list = [path.strip() for path in video_clips.split(",") if path.strip()]
     # Resolve all video clip paths in case they got corrupted
     resolved_clips = []
     for clip_path in clips_list:
         music_path=music_path,
         thumbnail_image=thumbnail_image,
     )
     # Return as JSON string matching VideoComposerResult schema
     result = VideoComposerResult(video_path=video_path)
     return result.model_dump_json()

src/app/tools/tool_schemas.py CHANGED Viewed

@@ -41,7 +41,9 @@ class VideoSummary(BaseModel):
 class VideoScript(BaseModel):
     """Schema for video script generator tool output."""
-    total_duration: float = Field(..., description="Total duration of the script in seconds")
     scenes: List[dict] = Field(..., description="List of scene objects")
     music: Optional[dict] = Field(None, description="Music configuration")
     pacing: Optional[str] = Field(None, description="Pacing description")
@@ -79,9 +81,7 @@ class MusicSelectorResult(BaseModel):
     class Config:
         json_schema_extra = {
-            "example": {
-                "audio_path": "/tmp/sound_effect_energetic_30s_1234567890.mp3"
-            }
         }
@@ -92,22 +92,20 @@ class FrameExtractorResult(BaseModel):
     class Config:
         json_schema_extra = {
-            "example": {
-                "frame_path": "/path/to/video_frame_ai_13s.png"
-            }
         }
 class ThumbnailGeneratorResult(BaseModel):
     """Schema for thumbnail generator tool output."""
-    thumbnail_path: str = Field(..., description="Path to the generated thumbnail image")
     class Config:
         json_schema_extra = {
-            "example": {
-                "thumbnail_path": "/tmp/thumbnail_1234567890.png"
-            }
         }
@@ -117,9 +115,4 @@ class VideoComposerResult(BaseModel):
     video_path: str = Field(..., description="Path to the final composed video file")
     class Config:
-        json_schema_extra = {
-            "example": {
-                "video_path": "/tmp/composed_video_12345.mp4"
-            }
-        }

 class VideoScript(BaseModel):
     """Schema for video script generator tool output."""
+    total_duration: float = Field(
+        ..., description="Total duration of the script in seconds"
+    )
     scenes: List[dict] = Field(..., description="List of scene objects")
     music: Optional[dict] = Field(None, description="Music configuration")
     pacing: Optional[str] = Field(None, description="Pacing description")
     class Config:
         json_schema_extra = {
+            "example": {"audio_path": "/tmp/sound_effect_energetic_30s_1234567890.mp3"}
         }
     class Config:
         json_schema_extra = {
+            "example": {"frame_path": "/path/to/video_frame_ai_13s.png"}
         }
 class ThumbnailGeneratorResult(BaseModel):
     """Schema for thumbnail generator tool output."""
+    thumbnail_path: str = Field(
+        ..., description="Path to the generated thumbnail image"
+    )
     class Config:
         json_schema_extra = {
+            "example": {"thumbnail_path": "/tmp/thumbnail_1234567890.png"}
         }
     video_path: str = Field(..., description="Path to the final composed video file")
     class Config:
+        json_schema_extra = {"example": {"video_path": "/tmp/composed_video_12345.mp4"}}

src/app/tools/video_clipper.py CHANGED Viewed

@@ -88,16 +88,18 @@ def video_clipper(
         # Clean up
         clipped_video.close()
         video.close()
         # Verify the clipped video duration by reloading it
         # This helps catch any frame reading issues early
         verify_clip = VideoFileClip(output_path)
         actual_duration = verify_clip.duration
         verify_clip.close()
         # Log if there's a significant duration mismatch
         if abs(actual_duration - expected_duration) > 0.5:
-            print(f"Warning: Clipped video expected {expected_duration:.2f}s but actual duration is {actual_duration:.2f}s")
         # Return absolute path
         return os.path.abspath(output_path)

         # Clean up
         clipped_video.close()
         video.close()
         # Verify the clipped video duration by reloading it
         # This helps catch any frame reading issues early
         verify_clip = VideoFileClip(output_path)
         actual_duration = verify_clip.duration
         verify_clip.close()
         # Log if there's a significant duration mismatch
         if abs(actual_duration - expected_duration) > 0.5:
+            print(
+                f"Warning: Clipped video expected {expected_duration:.2f}s but actual duration is {actual_duration:.2f}s"
+            )
         # Return absolute path
         return os.path.abspath(output_path)

src/app/tools/video_composer.py CHANGED Viewed

@@ -304,25 +304,27 @@ def video_composer(
         video_clips_loaded = []
         expected_total_duration = 0.0
         actual_total_duration = 0.0
         for i, (clip_path, scene) in enumerate(zip(clip_paths, scenes)):
             if not os.path.exists(clip_path):
                 raise FileNotFoundError(f"Video clip not found: {clip_path}")
             clip = VideoFileClip(clip_path)
             actual_duration = clip.duration
             expected_duration = scene.get("duration", actual_duration)
             # Use actual duration for calculations, not expected
             actual_total_duration += actual_duration
             expected_total_duration += expected_duration
             # Log duration mismatch if significant
             if abs(actual_duration - expected_duration) > 0.5:
-                print(f"Warning: Scene {i+1} expected duration {expected_duration:.2f}s but actual clip duration is {actual_duration:.2f}s")
             video_clips_loaded.append(clip)
         print(f"Total expected duration from script: {expected_total_duration:.2f}s")
         print(f"Total actual duration from clips: {actual_total_duration:.2f}s")
@@ -387,26 +389,30 @@ def video_composer(
         else:
             # Use concatenate_videoclips for simple sequential composition
             final_video = concatenate_videoclips(processed_clips, method="compose")
         # Validate final video duration
         actual_final_duration = final_video.duration
         target_duration = script_data.get("total_duration", expected_total_duration)
         # Log duration information
         print(f"Final composed video duration: {actual_final_duration:.2f}s")
         print(f"Target duration from script: {target_duration:.2f}s")
         if abs(actual_final_duration - target_duration) > 1.0:
-            print(f"Warning: Final video duration ({actual_final_duration:.2f}s) is shorter than target duration ({target_duration:.2f}s)")
             print(f"Expected total from scenes: {expected_total_duration:.2f}s")
             print(f"Actual total from clips: {actual_total_duration:.2f}s")
             # If the actual duration is significantly shorter, it might be due to:
             # 1. Frame reading issues in clipped videos
             # 2. Crossfade overlaps reducing duration
             # 3. Clips being truncated during extraction
             if actual_final_duration < actual_total_duration * 0.8:
-                print(f"Warning: Final video is significantly shorter than sum of clip durations. This may indicate frame reading issues.")
         # Add thumbnail image to first frame if provided
         if thumbnail_path and os.path.exists(thumbnail_path):

         video_clips_loaded = []
         expected_total_duration = 0.0
         actual_total_duration = 0.0
         for i, (clip_path, scene) in enumerate(zip(clip_paths, scenes)):
             if not os.path.exists(clip_path):
                 raise FileNotFoundError(f"Video clip not found: {clip_path}")
             clip = VideoFileClip(clip_path)
             actual_duration = clip.duration
             expected_duration = scene.get("duration", actual_duration)
             # Use actual duration for calculations, not expected
             actual_total_duration += actual_duration
             expected_total_duration += expected_duration
             # Log duration mismatch if significant
             if abs(actual_duration - expected_duration) > 0.5:
+                print(
+                    f"Warning: Scene {i+1} expected duration {expected_duration:.2f}s but actual clip duration is {actual_duration:.2f}s"
+                )
             video_clips_loaded.append(clip)
         print(f"Total expected duration from script: {expected_total_duration:.2f}s")
         print(f"Total actual duration from clips: {actual_total_duration:.2f}s")
         else:
             # Use concatenate_videoclips for simple sequential composition
             final_video = concatenate_videoclips(processed_clips, method="compose")
         # Validate final video duration
         actual_final_duration = final_video.duration
         target_duration = script_data.get("total_duration", expected_total_duration)
         # Log duration information
         print(f"Final composed video duration: {actual_final_duration:.2f}s")
         print(f"Target duration from script: {target_duration:.2f}s")
         if abs(actual_final_duration - target_duration) > 1.0:
+            print(
+                f"Warning: Final video duration ({actual_final_duration:.2f}s) is shorter than target duration ({target_duration:.2f}s)"
+            )
             print(f"Expected total from scenes: {expected_total_duration:.2f}s")
             print(f"Actual total from clips: {actual_total_duration:.2f}s")
             # If the actual duration is significantly shorter, it might be due to:
             # 1. Frame reading issues in clipped videos
             # 2. Crossfade overlaps reducing duration
             # 3. Clips being truncated during extraction
             if actual_final_duration < actual_total_duration * 0.8:
+                print(
+                    f"Warning: Final video is significantly shorter than sum of clip durations. This may indicate frame reading issues."
+                )
         # Add thumbnail image to first frame if provided
         if thumbnail_path and os.path.exists(thumbnail_path):

src/app/tools/video_script_generator.py CHANGED Viewed

@@ -206,7 +206,10 @@ def video_script_generator(
                     # Check if it's wrapped in a tool response format
                     if len(summary) == 1:
                         key = list(summary.keys())[0]
-                        if "_tool_response" in key.lower() or "_response" in key.lower():
                             # Extract the actual data from the wrapper
                             summaries_list.append(summary[key])
                         else:
@@ -382,13 +385,13 @@ Rules:
         video_durations = {}
         for i, summary in enumerate(summaries_list):
             video_durations[i] = summary.get("duration", 0.0)
         num_videos = len(summaries_list)
         # Validate and fix each scene
         for scene in script["scenes"]:
             source_video_idx = scene.get("source_video")
             # Validate and fix source_video index if it's an integer
             if isinstance(source_video_idx, int):
                 # Clamp index to valid range (0 to num_videos - 1)
@@ -401,7 +404,7 @@ Rules:
             elif source_video_idx is None:
                 # If source_video is missing, default to first video
                 scene["source_video"] = 0
             # Now validate timestamps if we have a valid video index
             # Use the clamped value from scene (in case it was updated)
             validated_idx = scene.get("source_video")
@@ -427,8 +430,10 @@ Rules:
                         scene["duration"] = video_duration - scene["start_time"]
                 else:
                     # Clamp start_time to be within bounds
-                    scene["start_time"] = max(0.0, min(start_time, video_duration - 0.1))
                     # Calculate or validate end_time
                     if end_time is None:
                         if scene_duration:
@@ -437,10 +442,13 @@ Rules:
                             calculated_end_time = video_duration
                     else:
                         calculated_end_time = end_time
                     # Clamp end_time to be within bounds
-                    scene["end_time"] = max(scene["start_time"] + 0.1, min(calculated_end_time, video_duration))
                     # Update duration to match
                     scene["duration"] = scene["end_time"] - scene["start_time"]

                     # Check if it's wrapped in a tool response format
                     if len(summary) == 1:
                         key = list(summary.keys())[0]
+                        if (
+                            "_tool_response" in key.lower()
+                            or "_response" in key.lower()
+                        ):
                             # Extract the actual data from the wrapper
                             summaries_list.append(summary[key])
                         else:
         video_durations = {}
         for i, summary in enumerate(summaries_list):
             video_durations[i] = summary.get("duration", 0.0)
         num_videos = len(summaries_list)
         # Validate and fix each scene
         for scene in script["scenes"]:
             source_video_idx = scene.get("source_video")
             # Validate and fix source_video index if it's an integer
             if isinstance(source_video_idx, int):
                 # Clamp index to valid range (0 to num_videos - 1)
             elif source_video_idx is None:
                 # If source_video is missing, default to first video
                 scene["source_video"] = 0
             # Now validate timestamps if we have a valid video index
             # Use the clamped value from scene (in case it was updated)
             validated_idx = scene.get("source_video")
                         scene["duration"] = video_duration - scene["start_time"]
                 else:
                     # Clamp start_time to be within bounds
+                    scene["start_time"] = max(
+                        0.0, min(start_time, video_duration - 0.1)
+                    )
                     # Calculate or validate end_time
                     if end_time is None:
                         if scene_duration:
                             calculated_end_time = video_duration
                     else:
                         calculated_end_time = end_time
                     # Clamp end_time to be within bounds
+                    scene["end_time"] = max(
+                        scene["start_time"] + 0.1,
+                        min(calculated_end_time, video_duration),
+                    )
                     # Update duration to match
                     scene["duration"] = scene["end_time"] - scene["start_time"]

src/app/workflow.py CHANGED Viewed

@@ -74,7 +74,7 @@ def agent_workflow(
     This workflow parallelizes operations where possible:
     - Video analysis: All videos are analyzed concurrently
     - Music generation and frame extraction: Run in parallel
     This is a generator function that yields progress updates as the workflow progresses.
     Each yield contains: (final_path, summary_json, script_json, thumbnail_path, status)
@@ -118,7 +118,7 @@ def agent_workflow(
         yield final_path, summary_json, script_json, thumbnail_path, status
         summaries = []
         def analyze_video(video_path, index):
             """Helper function to analyze a single video."""
             try:
@@ -137,20 +137,22 @@ def agent_workflow(
                 executor.submit(analyze_video, video_path, i): (i, video_path)
                 for i, video_path in enumerate(video_paths)
             }
             # Process results as they complete
             results = [None] * len(video_paths)
             for future in as_completed(future_to_video):
                 index, summary_dict, error = future.result()
                 if error:
-                    status += f"  ⚠️ Warning: Video {index+1}/{len(video_paths)} - {error}\n"
                 elif summary_dict:
                     results[index] = summary_dict
                     status += f"  ✅ Completed video {index+1}/{len(video_paths)}\n"
                 else:
                     status += f"  ⚠️ Warning: Video {index+1}/{len(video_paths)} - No summary generated\n"
                 yield final_path, summary_json, script_json, thumbnail_path, status
         # Collect successful summaries in order
@@ -186,14 +188,20 @@ def agent_workflow(
             else:
                 # Fallback: extract mood from first video summary
                 if summaries and summaries[0].get("mood_tags"):
-                    music_mood = summaries[0]["mood_tags"][0] if summaries[0]["mood_tags"] else "energetic"
                 else:
                     music_mood = "energetic"
         except:
             music_mood = "energetic"
         # Step 3 & 4: Generate music and extract frame in parallel
-        status += "\n🎵 Step 3 & 4: Generating music and extracting frame (in parallel)...\n"
         yield final_path, summary_json, script_json, thumbnail_path, status
         music_path = None
@@ -250,7 +258,7 @@ def agent_workflow(
                     elif result:
                         frame_path = result
                         status += "✅ Frame extracted.\n"
                 yield final_path, summary_json, script_json, thumbnail_path, status
         # Step 5: Generate thumbnail

     This workflow parallelizes operations where possible:
     - Video analysis: All videos are analyzed concurrently
     - Music generation and frame extraction: Run in parallel
     This is a generator function that yields progress updates as the workflow progresses.
     Each yield contains: (final_path, summary_json, script_json, thumbnail_path, status)
         yield final_path, summary_json, script_json, thumbnail_path, status
         summaries = []
         def analyze_video(video_path, index):
             """Helper function to analyze a single video."""
             try:
                 executor.submit(analyze_video, video_path, i): (i, video_path)
                 for i, video_path in enumerate(video_paths)
             }
             # Process results as they complete
             results = [None] * len(video_paths)
             for future in as_completed(future_to_video):
                 index, summary_dict, error = future.result()
                 if error:
+                    status += (
+                        f"  ⚠️ Warning: Video {index+1}/{len(video_paths)} - {error}\n"
+                    )
                 elif summary_dict:
                     results[index] = summary_dict
                     status += f"  ✅ Completed video {index+1}/{len(video_paths)}\n"
                 else:
                     status += f"  ⚠️ Warning: Video {index+1}/{len(video_paths)} - No summary generated\n"
                 yield final_path, summary_json, script_json, thumbnail_path, status
         # Collect successful summaries in order
             else:
                 # Fallback: extract mood from first video summary
                 if summaries and summaries[0].get("mood_tags"):
+                    music_mood = (
+                        summaries[0]["mood_tags"][0]
+                        if summaries[0]["mood_tags"]
+                        else "energetic"
+                    )
                 else:
                     music_mood = "energetic"
         except:
             music_mood = "energetic"
         # Step 3 & 4: Generate music and extract frame in parallel
+        status += (
+            "\n🎵 Step 3 & 4: Generating music and extracting frame (in parallel)...\n"
+        )
         yield final_path, summary_json, script_json, thumbnail_path, status
         music_path = None
                     elif result:
                         frame_path = result
                         status += "✅ Frame extracted.\n"
                 yield final_path, summary_json, script_json, thumbnail_path, status
         # Step 5: Generate thumbnail