Spaces:

userIdc2024
/

Video_AdGenesis_App

Sleeping

App Files Files Community

sushilideaclan01 commited on 17 days ago

Commit

d51b30c

1 Parent(s): 82a1419

improve the flow

Browse files

Files changed (5) hide show

api/prompt_generation.py +110 -80
frontend/src/components/GenerationComplete.tsx +28 -7
frontend/src/components/GenerationForm.tsx +143 -337
frontend/src/components/GenerationProgress.tsx +16 -18
frontend/src/context/GenerationContext.tsx +27 -15

api/prompt_generation.py CHANGED Viewed

@@ -361,6 +361,29 @@ async def refine_prompt_for_continuity(
     try:
         # Read the image
         image_bytes = await lastFrame.read()
         encoded_image = base64.b64encode(image_bytes).decode('utf-8')
         # Parse the segment prompt
@@ -379,109 +402,116 @@ async def refine_prompt_for_continuity(
         audio_context = ""
         if transcribedDialogue.strip():
             audio_context = f"""
-═══════════════════════════════════════════════════════════
-AUDIO CONTINUITY CONTEXT (WHAT WAS ACTUALLY SPOKEN)
-═══════════════════════════════════════════════════════════
-Previous segment's dialogue (from Whisper transcription):
-\"{transcribedDialogue.strip()}\"
-Expected dialogue was:
-\"{expectedDialogue.strip() if expectedDialogue.strip() else 'Not provided'}\"
-IMPORTANT: The next segment should continue naturally from what was ACTUALLY said.
-If there are differences between expected and transcribed dialogue, use the TRANSCRIBED version
-as the ground truth for continuity (it's what the viewer actually heard).
 """
-        # Build the refinement prompt
-        refinement_instructions = f"""
-You are a video continuity expert. Your task is to UPDATE the provided segment prompt to ensure PERFECT VISUAL AND AUDIO CONTINUITY with the previous video segment.
-═══════════════════════════════════════════════════════════
-VISUAL CONTINUITY (from attached image)
-═══════════════════════════════════════════════════════════
-Analyze the image carefully - this is the ACTUAL last frame from the previous video.
-1. Update the character_description to match the ACTUAL person in the image:
-   - Physical appearance (EXACT age, hair color/style, facial features, skin tone)
-   - Clothing (EXACTLY what they're wearing - color, style, pattern)
-   - Current state (their actual expression and posture at this moment)
-   - Voice matching (adjust to match their appearance)
-2. Update the scene_continuity to match the ACTUAL environment:
-   - Environment (describe what you see - bedroom, office, outdoor, etc.)
-   - Camera position (maintain the SAME angle/framing)
-   - Lighting state (match the EXACT lighting conditions in the image)
-   - Props and background elements (describe what's actually visible)
-   - Spatial relationships (match the actual layout)
-{audio_context}
-═══════════════════════════════════════════════════════════
-ORIGINAL PROMPT TO UPDATE
-═══════════════════════════════════════════════════════════
-{json.dumps(segment_data, indent=2)}
-═══════════════════════════════════════════════════════════
-CRITICAL RULES
-═══════════════��═══════════════════════════════════════════
-- Be EXTREMELY specific about what you see in the image
-- If the image shows a young woman with red hair, describe EXACTLY that
-- If it's a sunset beach scene, describe EXACTLY that setting
-- If they're wearing a beige blazer, describe EXACTLY that clothing
-- Match colors, styles, and details PRECISELY to what's visible
-- Maintain the SAME camera angle and distance
-- Keep the action_timeline.dialogue EXACTLY as provided (this is the NEXT segment's dialogue)
-- Update segment_info.continuity_markers to reflect the visual state
-- Adjust synchronized_actions to fit the actual character appearance
-🚨 CRITICAL: NO BLUR TRANSITIONS AT SEGMENT START 🚨
-- The video MUST start immediately at 0:00 with a SHARP, CLEAR, IN-FOCUS frame
-- NO fade-in, NO blur transition, NO gradual focus effect at the start
-- The first frame (0:00) must be as clear and sharp as any other frame
-- camera_movement MUST describe movement that starts from a clear, sharp state
-The goal is SEAMLESS video extension with ZERO visual or audio discontinuity.
-Return ONLY the updated JSON segment object with the same structure. No explanation, just the corrected JSON.
-"""
         print(f"🔄 Refining prompt for visual continuity...")
         messages = [
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": refinement_instructions},
                     {
                         "type": "image_url",
-                        "image_url": {"url": f"data:image/jpeg;base64,{encoded_image}"}
                     }
                 ]
             }
         ]
         content = None
-        for attempt in range(2):  # initial + 1 retry
-            response = client.chat.completions.create(
-                model="gpt-4o",
-                messages=messages,
-                response_format={"type": "json_object"},
-                temperature=0.3,
-            )
-            content = response.choices[0].message.content
-            if content and content.strip():
-                break
-            print(f"⚠️ GPT-4o returned empty response (attempt {attempt + 1}/2)")
-            if attempt == 0:
-                await asyncio.sleep(1.0)
         if not content or not content.strip():
-            print(f"⚠️ Using original segment after empty GPT response")
             return JSONResponse(content={
                 "refined_prompt": segment_data,
                 "original_prompt": segment_data,
@@ -491,7 +521,7 @@ Return ONLY the updated JSON segment object with the same structure. No explanat
         try:
             refined_prompt = json.loads(content)
         except json.JSONDecodeError as je:
-            print(f"⚠️ Failed to parse GPT response as JSON, using original: {str(je)}")
             return JSONResponse(content={
                 "refined_prompt": segment_data,
                 "original_prompt": segment_data,

     try:
         # Read the image
         image_bytes = await lastFrame.read()
+        # Detect MIME type from image bytes (PNG starts with \x89PNG, JPEG with \xff\xd8)
+        mime_type = "image/png" if image_bytes[:4] == b'\x89PNG' else "image/jpeg"
+        # Compress image to reduce token usage (resize to max 512px, convert to JPEG)
+        try:
+            from PIL import Image
+            import io
+            img = Image.open(io.BytesIO(image_bytes))
+            # Resize to max 512px on longest side (saves ~75% tokens vs full resolution)
+            max_dim = 512
+            if max(img.size) > max_dim:
+                ratio = max_dim / max(img.size)
+                new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
+                img = img.resize(new_size, Image.LANCZOS)
+            # Convert to JPEG for smaller size
+            buf = io.BytesIO()
+            img.convert('RGB').save(buf, format='JPEG', quality=85)
+            image_bytes = buf.getvalue()
+            mime_type = "image/jpeg"
+        except Exception as resize_err:
+            print(f"⚠️ Image resize skipped: {resize_err}")
         encoded_image = base64.b64encode(image_bytes).decode('utf-8')
         # Parse the segment prompt
         audio_context = ""
         if transcribedDialogue.strip():
             audio_context = f"""
+AUDIO CONTEXT: Previous segment said: "{transcribedDialogue.strip()}"
+Expected: "{expectedDialogue.strip() if expectedDialogue.strip() else 'N/A'}"
+Continue naturally from what was ACTUALLY said (use transcription as ground truth).
 """
+        # Build compact refinement prompt (shorter = more reliable responses)
+        segment_json = json.dumps(segment_data, indent=2)
+        refinement_instructions = f"""Update this video segment prompt for VISUAL CONTINUITY with the attached image (last frame from previous video).
+WHAT TO UPDATE:
+1. character_description: Match the ACTUAL person in the image (appearance, clothing, expression, posture)
+2. scene_continuity: Match environment, camera angle, lighting, props exactly as shown
+3. continuity_markers: Reflect current visual state
+4. synchronized_actions: Fit actual character appearance
+{audio_context}
+SEGMENT TO UPDATE:
+{segment_json}
+RULES:
+- Be specific about what you SEE in the image
+- Keep action_timeline.dialogue EXACTLY as-is (this is the NEXT segment's script)
+- NO blur/fade transitions at start - video starts sharp and clear at 0:00
+- Maintain same camera angle and distance as image
+Return the updated JSON segment object with the same structure."""
         print(f"🔄 Refining prompt for visual continuity...")
+        system_msg = "You are a video continuity expert. Analyze the provided image and update the JSON segment prompt to ensure seamless visual continuity. Always respond with a single valid JSON object matching the input structure."
         messages = [
+            {"role": "system", "content": system_msg},
             {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": refinement_instructions},
                     {
                         "type": "image_url",
+                        "image_url": {
+                            "url": f"data:{mime_type};base64,{encoded_image}",
+                            "detail": "low"  # Use low detail to reduce token usage
+                        }
                     }
                 ]
             }
         ]
+        # Try GPT-4o first, then gpt-4o-mini as fallback
+        models_to_try = ["gpt-4o", "gpt-4o-mini"]
         content = None
+        for model_name in models_to_try:
+            try:
+                response = client.chat.completions.create(
+                    model=model_name,
+                    messages=messages,
+                    response_format={"type": "json_object"},
+                    max_tokens=4096,
+                    temperature=0.3,
+                )
+                choice = response.choices[0]
+                content = choice.message.content
+                finish_reason = choice.finish_reason
+                if content and content.strip():
+                    if model_name != "gpt-4o":
+                        print(f"✅ Got response from fallback model {model_name}")
+                    break
+                print(f"⚠️ {model_name} returned empty (finish_reason={finish_reason})")
+                if finish_reason == "content_filter":
+                    print(f"   🛡️ Blocked by content filter, trying next model...")
+                content = None  # Reset for next model
+            except Exception as model_err:
+                print(f"⚠️ {model_name} failed: {model_err}")
+                content = None
+                continue
+        # Last resort: try without json_object mode (some content triggers filter only in JSON mode)
+        if not content or not content.strip():
+            print(f"⚠️ All models failed with JSON mode, trying without response_format...")
+            try:
+                messages_plain = [
+                    {"role": "system", "content": system_msg + " Return ONLY raw JSON, no markdown, no explanation."},
+                    messages[1]  # Same user message with image
+                ]
+                response = client.chat.completions.create(
+                    model="gpt-4o-mini",
+                    messages=messages_plain,
+                    max_tokens=4096,
+                    temperature=0.3,
+                )
+                raw_content = response.choices[0].message.content
+                if raw_content and raw_content.strip():
+                    # Extract JSON from response (might have markdown fences)
+                    cleaned = raw_content.strip()
+                    if cleaned.startswith("```"):
+                        # Remove markdown code fences
+                        lines = cleaned.split('\n')
+                        lines = [l for l in lines if not l.strip().startswith("```")]
+                        cleaned = '\n'.join(lines)
+                    content = cleaned
+                    print(f"✅ Got response without JSON mode")
+            except Exception as plain_err:
+                print(f"⚠️ Plain mode also failed: {plain_err}")
         if not content or not content.strip():
+            print(f"⚠️ All refinement attempts failed, using original segment")
             return JSONResponse(content={
                 "refined_prompt": segment_data,
                 "original_prompt": segment_data,
         try:
             refined_prompt = json.loads(content)
         except json.JSONDecodeError as je:
+            print(f"⚠️ Failed to parse response as JSON: {str(je)[:100]}, using original")
             return JSONResponse(content={
                 "refined_prompt": segment_data,
                 "original_prompt": segment_data,

frontend/src/components/GenerationComplete.tsx CHANGED Viewed

@@ -134,8 +134,13 @@ export const GenerationComplete: React.FC = () => {
                   {partialCompletionError}
                 </p>
                 <p className="text-void-400 text-xs">
-                  The successfully generated videos are displayed below. You can still export and use them.
                 </p>
               </div>
             </div>
           </motion.div>
@@ -271,8 +276,15 @@ export const GenerationComplete: React.FC = () => {
                   <VideoIcon size={24} className={accentColor === 'coral' ? 'text-coral-400' : 'text-electric-400'} />
                 </div>
                 <div>
-                  <h3 className="font-bold text-lg text-void-100">Final Exported Video</h3>
-                  <p className="text-sm text-void-400">All segments merged into one video</p>
                 </div>
               </div>
@@ -348,12 +360,17 @@ export const GenerationComplete: React.FC = () => {
               {isMerging ? (
                 <>
                   <div className="w-5 h-5 border-2 border-white/30 border-t-white rounded-full animate-spin" />
-                  <span>Merging...</span>
                 </>
               ) : (
                 <>
                   <VideoIcon size={20} />
-                  <span>Export Final Video</span>
                 </>
               )}
             </button>
@@ -402,9 +419,13 @@ export const GenerationComplete: React.FC = () => {
           className="text-center text-void-500 text-sm mt-8"
         >
           {mergedVideoUrl
-            ? 'Your final video is ready! Download it or re-merge with different settings.'
             : generatedVideos.length >= 2
-              ? '"Export Final Video" will merge all segments into a single video file with Whisper-optimized trim points.'
               : 'Videos are ready to use in your video editor or social media.'
           }
         </motion.p>

                   {partialCompletionError}
                 </p>
                 <p className="text-void-400 text-xs">
+                  The successfully generated videos are displayed below. You can still merge and export them.
                 </p>
+                {generatedVideos.length >= 2 && (
+                  <p className="text-green-400 text-xs mt-2 font-medium">
+                    {generatedVideos.length} segments available for merge and export.
+                  </p>
+                )}
               </div>
             </div>
           </motion.div>
                   <VideoIcon size={24} className={accentColor === 'coral' ? 'text-coral-400' : 'text-electric-400'} />
                 </div>
                 <div>
+                  <h3 className="font-bold text-lg text-void-100">
+                    {partialCompletionError ? 'Merged Partial Video' : 'Final Exported Video'}
+                  </h3>
+                  <p className="text-sm text-void-400">
+                    {partialCompletionError
+                      ? `${generatedVideos.length} available segments merged into one video`
+                      : 'All segments merged into one video'
+                    }
+                  </p>
                 </div>
               </div>
               {isMerging ? (
                 <>
                   <div className="w-5 h-5 border-2 border-white/30 border-t-white rounded-full animate-spin" />
+                  <span>Merging {generatedVideos.length} segments...</span>
                 </>
               ) : (
                 <>
                   <VideoIcon size={20} />
+                  <span>
+                    {partialCompletionError
+                      ? `Merge ${generatedVideos.length} Available Segments`
+                      : 'Export Final Video'
+                    }
+                  </span>
                 </>
               )}
             </button>
           className="text-center text-void-500 text-sm mt-8"
         >
           {mergedVideoUrl
+            ? partialCompletionError
+              ? `Your partial video (${generatedVideos.length} segments) is ready! Download it or start a new generation.`
+              : 'Your final video is ready! Download it or re-merge with different settings.'
             : generatedVideos.length >= 2
+              ? partialCompletionError
+                ? `You can merge the ${generatedVideos.length} successfully generated segments into a single video.`
+                : '"Export Final Video" will merge all segments into a single video file with Whisper-optimized trim points.'
               : 'Videos are ready to use in your video editor or social media.'
           }
         </motion.p>

frontend/src/components/GenerationForm.tsx CHANGED Viewed

@@ -297,7 +297,8 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
     setError(null);
     let segmentsToUse: VeoSegment[] = [];
-    let currentImageFile = imageFile; // Declare at function scope for catch block access
     try {
@@ -380,8 +381,8 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
         }
       }
-      // Track current reference image (starts with original)
-      let currentImageFile = imageFile;
       // RESUME SUPPORT: Start from where we left off if retrying
       const startIndex = attemptCount > 0 ? generatedVideos.length : 0;
@@ -402,11 +403,15 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
       // Step 2: Generate videos segment by segment with frame continuity
       for (let i = startIndex; i < segmentsToUse.length; i++) {
-        const segment = segmentsToUse[i];
         const isLastSegment = i === segmentsToUse.length - 1;
         updateProgress(
-          `Generating video ${i + 1} of ${segmentsToUse.length}...${i > 0 ? ' (using last frame from previous)' : ''}`,
           i,
           segmentsToUse.length
         );
@@ -536,13 +541,49 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
           trimPoint, // Store trim point for merge
         };
         addVideo(generatedVideo);
         updateProgress(`Completed video ${i + 1} of ${segmentsToUse.length}`, i + 1, segmentsToUse.length);
       }
       // All done!
       clearDraft(); // Clear draft on successful generation
-      clearDraft(); // Clear draft on successful generation
       setStep('completed');
       updateProgress('All videos generated successfully!');
@@ -558,312 +599,35 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
         return;
       }
-      {
-      // Check if this is a safety error that can be auto-fixed
-      if (isUnsafeSegmentError(errorMessage) && attemptCount < AUTO_FIX_MAX_ATTEMPTS) {
-        const segmentIndex = generatedVideos.length;
-        console.log(`🛡️ Safety error detected for segment ${segmentIndex + 1}, attempting auto-fix...`);
-        updateProgress(`Detected safety issue in segment ${segmentIndex + 1}, auto-fixing...`);
-        try {
-          // Call AI to fix the unsafe segment
-          const fixResult = await fixUnsafePrompt({
-            segment: segmentsToUse[segmentIndex],
-            error_message: errorMessage,
-            attempt_count: attemptCount
-          });
-          if (fixResult.success && fixResult.fixed_segment) {
-            console.log(`✅ Auto-fix successful: ${fixResult.changes_made}`);
-            updateProgress(`Fixed segment ${segmentIndex + 1}: ${fixResult.changes_made}`);
-            // Update the segment with the fixed version IN THE LOCAL ARRAY
-            segmentsToUse[segmentIndex] = fixResult.fixed_segment;
-            // Update context state (async, but we don't wait for it)
-            updateSegments(segmentsToUse);
-            // IMPORTANT: Continue generating from current position with fixed segment
-            // Don't restart the whole function - just continue from current index
-            console.log(`🔄 Retrying segment ${segmentIndex + 1} with fixed prompt...`);
-            await new Promise(resolve => setTimeout(resolve, 1000));
-            // Continue the loop from current segment (i stays the same, so it will retry)
-            // We do this by NOT incrementing i and continuing
-            // Actually, we're in the catch block, so we need to resume the generation
-            // The best approach is to just retry the current segment inline
-            // Reset to retry current segment
-            updateProgress(`Retrying segment ${segmentIndex + 1} with fixed content...`, segmentIndex, segmentsToUse.length);
-            // Re-run the segment generation with fixed prompt
-            const segment = segmentsToUse[segmentIndex];
-            const isLastSegment = segmentIndex === segmentsToUse.length - 1;
-            // Upload current reference image (should still be set from before)
-            updateProgress(`Uploading reference image for segment ${segmentIndex + 1}...`);
-            const uploadResult = await uploadImage(currentImageFile, { reference: true });
-            const hostedImageUrl = uploadResult.url;
-            // Generate video with fixed prompt
-            updateProgress(`Submitting FIXED segment ${segmentIndex + 1} to KIE Veo 3.1...`);
-            const generateResult = await klingGenerate({
-              prompt: segment,
-              imageUrls: [hostedImageUrl],
-              model: 'veo3_fast',
-              aspectRatio: formState.aspectRatio,
-              generationType: 'FIRST_AND_LAST_FRAMES_2_VIDEO',
-              seeds: formState.seedValue,
-              voiceType: formState.voiceType,
-            });
-            // Wait for completion
-            updateProgress(`Processing FIXED video ${segmentIndex + 1}... (this may take 1-2 minutes)`);
-            const videoUrl = await waitForKlingVideo(generateResult.taskId);
-            // Download video
-            updateProgress(`Downloading video ${segmentIndex + 1}...`);
-            const videoBlob = await downloadVideo(videoUrl);
-            const blobUrl = URL.createObjectURL(videoBlob);
-            // Get video duration
-            const videoFile = new File([videoBlob], `segment-${segmentIndex + 1}.mp4`, { type: 'video/mp4' });
-            const duration = await getVideoDuration(videoFile);
-            const thumbnails = await generateThumbnails(videoFile);
-            // Extract frame for next segment if not last
-            let trimPoint = duration;
-            if (!isLastSegment) {
-              updateProgress(`Analyzing video ${segmentIndex + 1} with Whisper...`);
-              try {
-                const dialogue = segment.action_timeline?.dialogue || '';
-                const whisperResult = await whisperAnalyzeAndExtract({
-                  video_url: videoUrl,
-                  dialogue: dialogue,
-                  buffer_time: 0.3,
-                  model_size: 'base'
-                });
-                if (whisperResult.success && whisperResult.frame_base64) {
-                  const dataUrl = whisperResult.frame_base64;
-                  const mime = dataUrl.startsWith('data:image/png') ? 'image/png' : 'image/jpeg';
-                  const ext = mime === 'image/png' ? 'png' : 'jpg';
-                  const base64Data = dataUrl.split(',')[1] || dataUrl;
-                  const byteCharacters = atob(base64Data);
-                  const byteNumbers = new Array(byteCharacters.length);
-                  for (let j = 0; j < byteCharacters.length; j++) {
-                    byteNumbers[j] = byteCharacters.charCodeAt(j);
-                  }
-                  const byteArray = new Uint8Array(byteNumbers);
-                  const frameBlob = new Blob([byteArray], { type: mime });
-                  currentImageFile = new File([frameBlob], `whisper-frame-${segmentIndex + 1}.${ext}`, { type: mime });
-                  if (whisperResult.trim_point) {
-                    trimPoint = whisperResult.trim_point;
-                  }
-                  const transcribedText = whisperResult.transcribed_text || '';
-                  const nextSegment = segmentsToUse[segmentIndex + 1];
-                  if (nextSegment && currentImageFile && transcribedText) {
-                    updateProgress(`Refining segment ${segmentIndex + 2} prompt with visual and audio context...`);
-                    try {
-                      const { refinePromptWithContext } = await import('@/utils/api');
-                      const refined = await refinePromptWithContext(
-                        nextSegment,
-                        currentImageFile,
-                        transcribedText,
-                        dialogue
-                      );
-                      segmentsToUse[segmentIndex + 1] = refined.refined_prompt as typeof nextSegment;
-                      console.log(`✅ Refined segment ${segmentIndex + 2} prompt for consistency`);
-                    } catch (refineError) {
-                      console.warn(`⚠️ Prompt refinement failed, using original:`, refineError);
-                    }
-                  }
-                }
-              } catch {
-                const lastFrameFile = await extractLastFrame(videoBlob);
-                currentImageFile = lastFrameFile;
-              }
-            }
-            // Add to generated videos
-            const generatedVideo: GeneratedVideo = {
-              id: `video-${Date.now()}-${segmentIndex}`,
-              url: videoUrl,
-              blobUrl,
-              segment,
-              duration,
-              thumbnails,
-              trimPoint,
-            };
-            addVideo(generatedVideo);
-            updateProgress(`Completed FIXED video ${segmentIndex + 1} of ${segmentsToUse.length}`, segmentIndex + 1, segmentsToUse.length);
-            // Continue with remaining segments (DON'T restart the whole function!)
-            for (let i = segmentIndex + 1; i < segmentsToUse.length; i++) {
-              const segment = segmentsToUse[i];
-              const isLastSegment = i === segmentsToUse.length - 1;
-              updateProgress(
-                `Generating video ${i + 1} of ${segmentsToUse.length}...${i > 0 ? ' (using last frame from previous)' : ''}`,
-                i,
-                segmentsToUse.length
-              );
-              // Upload current reference image
-              updateProgress(`Uploading reference image for segment ${i + 1}...`);
-              const uploadResult = await uploadImage(currentImageFile, { reference: true });
-              const hostedImageUrl = uploadResult.url;
-              console.log(`🖼️ Segment ${i + 1} using image: ${i === 0 ? 'original' : 'last frame from previous'}`);
-              // Generate video with current reference image
-              updateProgress(`Submitting segment ${i + 1} to KIE Veo 3.1...`);
-              const generateResult = await klingGenerate({
-                prompt: segment,
-                imageUrls: [hostedImageUrl],
-                model: 'veo3_fast',
-                aspectRatio: formState.aspectRatio,
-                generationType: 'FIRST_AND_LAST_FRAMES_2_VIDEO',
-                seeds: formState.seedValue,
-                voiceType: formState.voiceType,
-              });
-              // Wait for completion
-              updateProgress(`Processing video ${i + 1}... (this may take 1-2 minutes)`);
-              const videoUrl = await waitForKlingVideo(generateResult.taskId);
-              // Download video
-              updateProgress(`Downloading video ${i + 1}...`);
-              const videoBlob = await downloadVideo(videoUrl);
-              const blobUrl = URL.createObjectURL(videoBlob);
-              // Get video duration
-              const videoFile = new File([videoBlob], `segment-${i + 1}.mp4`, { type: 'video/mp4' });
-              const duration = await getVideoDuration(videoFile);
-              const thumbnails = await generateThumbnails(videoFile);
-              // Use Whisper to find optimal trim point, extract frame, and get transcription
-              let trimPoint = duration;
-              let transcribedText = '';
-              if (!isLastSegment) {
-                updateProgress(`Analyzing video ${i + 1} with Whisper for optimal continuity...`);
-                try {
-                  const dialogue = segment.action_timeline?.dialogue || '';
-                  const whisperResult = await whisperAnalyzeAndExtract({
-                    video_url: videoUrl,
-                    dialogue: dialogue,
-                    buffer_time: 0.3,
-                    model_size: 'base'
-                  });
-                  if (whisperResult.success && whisperResult.frame_base64) {
-                    const dataUrl = whisperResult.frame_base64;
-                    const mime = dataUrl.startsWith('data:image/png') ? 'image/png' : 'image/jpeg';
-                    const ext = mime === 'image/png' ? 'png' : 'jpg';
-                    const base64Data = dataUrl.split(',')[1] || dataUrl;
-                    const byteCharacters = atob(base64Data);
-                    const byteNumbers = new Array(byteCharacters.length);
-                    for (let j = 0; j < byteCharacters.length; j++) {
-                      byteNumbers[j] = byteCharacters.charCodeAt(j);
-                    }
-                    const byteArray = new Uint8Array(byteNumbers);
-                    const frameBlob = new Blob([byteArray], { type: mime });
-                    currentImageFile = new File([frameBlob], `whisper-frame-${i + 1}.${ext}`, { type: mime });
-                    if (whisperResult.trim_point) {
-                      trimPoint = whisperResult.trim_point;
-                    }
-                    if (whisperResult.transcribed_text) {
-                      transcribedText = whisperResult.transcribed_text;
-                      console.log(`📝 Whisper transcription: "${transcribedText.substring(0, 100)}..."`);
-                    }
-                    console.log(`✅ Whisper: Last word at ${whisperResult.last_word_timestamp?.toFixed(2)}s, frame at ${whisperResult.frame_timestamp?.toFixed(2)}s, trim at ${trimPoint.toFixed(2)}s`);
-                    // REFINE NEXT SEGMENT PROMPT with frame + transcription
-                    const nextSegment = segmentsToUse[i + 1];
-                    if (nextSegment && currentImageFile) {
-                      updateProgress(`Refining segment ${i + 2} prompt with visual and audio context...`);
-                      try {
-                        const { refinePromptWithContext } = await import('@/utils/api');
-                        const refined = await refinePromptWithContext(
-                          nextSegment,
-                          currentImageFile,
-                          transcribedText,
-                          dialogue
-                        );
-                        segmentsToUse[i + 1] = refined.refined_prompt as typeof nextSegment;
-                        console.log(`✅ Refined segment ${i + 2} prompt for consistency`);
-                      } catch (refineError) {
-                        console.warn(`⚠️ Prompt refinement failed, using original:`, refineError);
-                      }
-                    }
-                  } else {
-                    console.log(`⚠️ Whisper failed (${whisperResult.error}), falling back to last frame extraction`);
-                    const lastFrameFile = await extractLastFrame(videoBlob);
-                    currentImageFile = lastFrameFile;
-                  }
-                } catch (frameError) {
-                  console.error(`⚠️ Whisper analysis failed, using fallback:`, frameError);
-                  try {
-                    const lastFrameFile = await extractLastFrame(videoBlob);
-                    currentImageFile = lastFrameFile;
-                  } catch {
-                    // Continue with current image if all extraction fails
-                  }
-                }
-              }
-              // Add to generated videos with trim metadata
-              const generatedVideo: GeneratedVideo = {
-                id: `video-${Date.now()}-${i}`,
-                url: videoUrl,
-                blobUrl,
-                segment,
-                duration,
-                thumbnails,
-                trimPoint,
-              };
-              addVideo(generatedVideo);
-              updateProgress(`Completed video ${i + 1} of ${segmentsToUse.length}`, i + 1, segmentsToUse.length);
-            }
-            // All done after fixing and continuing!
-            clearDraft();
-            setStep('completed');
-            updateProgress('All videos generated successfully!');
-            return; // Exit successfully - don't continue to normal retry logic
-          } else {
-            console.warn(`⚠️ Auto-fix failed: ${fixResult.error}, falling back to manual retry`);
-          }
-        } catch (fixError) {
-          console.error('❌ Auto-fix error:', fixError);
         }
       }
-      const outcome: FlowRetryOutcome = await handleFlowRetry({
-        attemptCount,
-        errorMessage,
-        isCancelled: false,
-        generatedCount: generatedVideos.length,
-        totalCount: segmentsToUse.length,
-        setError,
-        setStep,
-        setPartialCompletionError,
-      });
-      if (outcome === 'retry') {
-        console.log('🔄 First attempt failed, auto-retrying...');
-        updateProgress('Generation failed, automatically retrying...');
-        return handleKlingFrameContinuityFlow(1);
-      }
-      }
     } finally {
       setIsGenerating(false);
     }
@@ -881,6 +645,7 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
     setError(null);
     let payload: { segments: VeoSegment[] } = { segments: [] }; // Declare at function scope
     try {
       // Step 1: Get segments - skip prompt generation if segments already exist (retry mode)
@@ -1036,6 +801,7 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
           duration,
           thumbnails,
         });
         updateProgress(`Completed video ${i + 1} of ${payload.segments.length}`, i + 1, payload.segments.length);
@@ -1052,20 +818,39 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
     } catch (err) {
       console.error('Generation error:', err);
       const errorMessage = err instanceof Error ? err.message : 'Generation failed';
-      const outcome = await handleFlowRetry({
-        attemptCount,
-        errorMessage,
-        isCancelled: errorMessage.includes('cancelled') || isCancelling,
-        generatedCount: generatedVideos.length,
-        totalCount: payload.segments.length,
-        setError,
-        setStep,
-        setPartialCompletionError,
-      });
-      if (outcome === 'retry') {
-        console.log('🔄 First attempt failed, auto-retrying...');
-        updateProgress('Generation failed, automatically retrying...');
-        return handleKlingExtendFlow(1);
       }
     } finally {
       setIsGenerating(false);
@@ -1091,6 +876,7 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
     setError(null);
     let segmentsToUse: VeoSegment[] = []; // Declare at function scope
     try {
       // Step 1: Get segments - skip prompt generation if segments already exist (retry mode)
@@ -1317,6 +1103,7 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
           trimPoint, // Store trim point for merge
         };
         addVideo(generatedVideo);
         updateProgress(`Completed video ${i + 1} of ${segmentsToUse.length}`, i + 1, segmentsToUse.length);
       }
@@ -1328,20 +1115,39 @@ export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack
     } catch (err) {
       console.error('Replicate generation error:', err);
       const errorMessage = err instanceof Error ? err.message : 'Replicate generation failed';
-      const outcome = await handleFlowRetry({
-        attemptCount,
-        errorMessage,
-        isCancelled: errorMessage.includes('cancelled') || isCancelling,
-        generatedCount: state.generatedVideos.length,
-        totalCount: segmentsToUse.length,
-        setError,
-        setStep,
-        setPartialCompletionError,
-      });
-      if (outcome === 'retry') {
-        console.log('🔄 First attempt failed, auto-retrying...');
-        updateProgress('Generation failed, automatically retrying...');
-        return handleReplicateGeneration(1);
       }
     } finally {
       setIsGenerating(false);

     setError(null);
     let segmentsToUse: VeoSegment[] = [];
+    let currentImageFile: File = imageFile; // Track reference image across segments and catch block
+    let completedVideoCount = 0; // Track locally to avoid stale React state closure
     try {
         }
       }
+      // Reset reference image to original for this run
+      currentImageFile = imageFile;
       // RESUME SUPPORT: Start from where we left off if retrying
       const startIndex = attemptCount > 0 ? generatedVideos.length : 0;
       // Step 2: Generate videos segment by segment with frame continuity
       for (let i = startIndex; i < segmentsToUse.length; i++) {
         const isLastSegment = i === segmentsToUse.length - 1;
+        // Per-segment safety retry loop with hard cap (prevents infinite retries)
+        for (let safetyAttempt = 0; safetyAttempt <= AUTO_FIX_MAX_ATTEMPTS; safetyAttempt++) {
+        try {
+        const segment = segmentsToUse[i];
         updateProgress(
+          `Generating video ${i + 1} of ${segmentsToUse.length}...${i > 0 ? ' (using last frame from previous)' : ''}${safetyAttempt > 0 ? ` (safety retry ${safetyAttempt}/${AUTO_FIX_MAX_ATTEMPTS})` : ''}`,
           i,
           segmentsToUse.length
         );
           trimPoint, // Store trim point for merge
         };
         addVideo(generatedVideo);
+        completedVideoCount++;
         updateProgress(`Completed video ${i + 1} of ${segmentsToUse.length}`, i + 1, segmentsToUse.length);
+        break; // Segment succeeded - exit safety retry loop, move to next segment
+        } catch (segErr) {
+          const segErrMsg = segErr instanceof Error ? segErr.message : String(segErr);
+          // Safety error with retries remaining: auto-fix the prompt and retry this segment
+          if (isUnsafeSegmentError(segErrMsg) && safetyAttempt < AUTO_FIX_MAX_ATTEMPTS) {
+            console.log(`🛡️ Safety error on segment ${i + 1}, auto-fix attempt ${safetyAttempt + 1}/${AUTO_FIX_MAX_ATTEMPTS}...`);
+            updateProgress(`Safety issue in segment ${i + 1}, auto-fixing (attempt ${safetyAttempt + 1}/${AUTO_FIX_MAX_ATTEMPTS})...`);
+            try {
+              const fixResult = await fixUnsafePrompt({
+                segment: segmentsToUse[i],
+                error_message: segErrMsg,
+                attempt_count: safetyAttempt + 1
+              });
+              if (fixResult.success && fixResult.fixed_segment) {
+                console.log(`✅ Auto-fix successful: ${fixResult.changes_made}`);
+                updateProgress(`Auto-fix successful: ${fixResult.changes_made}`);
+                segmentsToUse[i] = fixResult.fixed_segment;
+                updateSegments([...segmentsToUse]);
+                await new Promise(resolve => setTimeout(resolve, 1000));
+                continue; // Retry this segment with the fixed prompt
+              } else {
+                console.warn(`⚠️ Auto-fix returned no usable fix: ${fixResult.error}`);
+              }
+            } catch (fixErr) {
+              console.error('❌ Auto-fix API error:', fixErr);
+            }
+          }
+          // Non-safety error, fix failed, or max safety retries exhausted → propagate to outer handler
+          throw segErr;
+        }
+        } // end per-segment safety retry loop
       }
       // All done!
       clearDraft(); // Clear draft on successful generation
       setStep('completed');
       updateProgress('All videos generated successfully!');
         return;
       }
+      // If some videos were generated, show partial completion immediately
+      // (don't retry from scratch -- that would waste the already-generated videos)
+      if (completedVideoCount > 0) {
+        console.log(`⚠️ Partial completion: ${completedVideoCount}/${segmentsToUse.length} segments generated`);
+        setPartialCompletionError(
+          `Generation stopped at segment ${completedVideoCount + 1} of ${segmentsToUse.length}. ` +
+          `${completedVideoCount} video${completedVideoCount > 1 ? 's' : ''} generated successfully. ` +
+          `Reason: ${errorMessage}`
+        );
+        setStep('completed');
+      } else {
+        // No videos generated at all -- use normal retry logic
+        const outcome: FlowRetryOutcome = await handleFlowRetry({
+          attemptCount,
+          errorMessage,
+          isCancelled: false,
+          generatedCount: 0,
+          totalCount: segmentsToUse.length,
+          setError,
+          setStep,
+          setPartialCompletionError,
+        });
+        if (outcome === 'retry') {
+          console.log('🔄 No videos generated, auto-retrying...');
+          updateProgress('Generation failed, automatically retrying...');
+          return handleKlingFrameContinuityFlow(1);
         }
       }
     } finally {
       setIsGenerating(false);
     }
     setError(null);
     let payload: { segments: VeoSegment[] } = { segments: [] }; // Declare at function scope
+    let completedVideoCount = 0; // Track locally to avoid stale React state closure
     try {
       // Step 1: Get segments - skip prompt generation if segments already exist (retry mode)
           duration,
           thumbnails,
         });
+        completedVideoCount++;
         updateProgress(`Completed video ${i + 1} of ${payload.segments.length}`, i + 1, payload.segments.length);
     } catch (err) {
       console.error('Generation error:', err);
       const errorMessage = err instanceof Error ? err.message : 'Generation failed';
+      const isUserCancel = (err as Error & { name?: string })?.name === 'AbortError' || errorMessage.includes('cancelled') || isCancelling;
+      if (isUserCancel) {
+        setStep('error');
+        setIsGenerating(false);
+        return;
+      }
+      // If some videos were generated, show partial completion immediately
+      if (completedVideoCount > 0) {
+        console.log(`⚠️ Partial completion: ${completedVideoCount}/${payload.segments.length} segments generated`);
+        setPartialCompletionError(
+          `Generation stopped at segment ${completedVideoCount + 1} of ${payload.segments.length}. ` +
+          `${completedVideoCount} video${completedVideoCount > 1 ? 's' : ''} generated successfully. ` +
+          `Reason: ${errorMessage}`
+        );
+        setStep('completed');
+      } else {
+        const outcome = await handleFlowRetry({
+          attemptCount,
+          errorMessage,
+          isCancelled: false,
+          generatedCount: 0,
+          totalCount: payload.segments.length,
+          setError,
+          setStep,
+          setPartialCompletionError,
+        });
+        if (outcome === 'retry') {
+          console.log('🔄 No videos generated, auto-retrying...');
+          updateProgress('Generation failed, automatically retrying...');
+          return handleKlingExtendFlow(1);
+        }
       }
     } finally {
       setIsGenerating(false);
     setError(null);
     let segmentsToUse: VeoSegment[] = []; // Declare at function scope
+    let completedVideoCount = 0; // Track locally to avoid stale React state closure
     try {
       // Step 1: Get segments - skip prompt generation if segments already exist (retry mode)
           trimPoint, // Store trim point for merge
         };
         addVideo(generatedVideo);
+        completedVideoCount++;
         updateProgress(`Completed video ${i + 1} of ${segmentsToUse.length}`, i + 1, segmentsToUse.length);
       }
     } catch (err) {
       console.error('Replicate generation error:', err);
       const errorMessage = err instanceof Error ? err.message : 'Replicate generation failed';
+      const isUserCancel = (err as Error & { name?: string })?.name === 'AbortError' || errorMessage.includes('cancelled') || isCancelling;
+      if (isUserCancel) {
+        setStep('error');
+        setIsGenerating(false);
+        return;
+      }
+      // If some videos were generated, show partial completion immediately
+      if (completedVideoCount > 0) {
+        console.log(`⚠️ Partial completion: ${completedVideoCount}/${segmentsToUse.length} segments generated`);
+        setPartialCompletionError(
+          `Generation stopped at segment ${completedVideoCount + 1} of ${segmentsToUse.length}. ` +
+          `${completedVideoCount} video${completedVideoCount > 1 ? 's' : ''} generated successfully. ` +
+          `Reason: ${errorMessage}`
+        );
+        setStep('completed');
+      } else {
+        const outcome = await handleFlowRetry({
+          attemptCount,
+          errorMessage,
+          isCancelled: false,
+          generatedCount: 0,
+          totalCount: segmentsToUse.length,
+          setError,
+          setStep,
+          setPartialCompletionError,
+        });
+        if (outcome === 'retry') {
+          console.log('🔄 No videos generated, auto-retrying...');
+          updateProgress('Generation failed, automatically retrying...');
+          return handleReplicateGeneration(1);
+        }
       }
     } finally {
       setIsGenerating(false);

frontend/src/components/GenerationProgress.tsx CHANGED Viewed

@@ -71,7 +71,7 @@ const XIcon = () => (
 export const GenerationProgress: React.FC = () => {
   const { state, cancelGeneration } = useGeneration();
-  const { progress, provider, generatedVideos, segments, isCancelling, activeTaskIds, step } = state;
   // Show enhanced UX during prompt generation (streaming)
   // Use estimated count if segments not yet loaded, or actual count if available
@@ -217,23 +217,21 @@ export const GenerationProgress: React.FC = () => {
                 </div>
               )}
             </div>
-            {activeTaskIds.length > 0 && (
-              <button
-                onClick={cancelGeneration}
-                disabled={isCancelling}
-                className={`
-                  px-4 py-2 rounded-lg font-medium text-sm transition-all
-                  flex items-center gap-2
-                  ${isCancelling
-                    ? 'bg-void-700 text-void-400 cursor-not-allowed'
-                    : 'bg-red-500/20 hover:bg-red-500/30 text-red-400 hover:text-red-300 border border-red-500/30'
-                  }
-                `}
-              >
-                <XIcon />
-                {isCancelling ? 'Cancelling...' : 'Cancel Generation'}
-              </button>
-            )}
           </div>
         </div>

 export const GenerationProgress: React.FC = () => {
   const { state, cancelGeneration } = useGeneration();
+  const { progress, provider, generatedVideos, segments, isCancelling, step } = state;
   // Show enhanced UX during prompt generation (streaming)
   // Use estimated count if segments not yet loaded, or actual count if available
                 </div>
               )}
             </div>
+            <button
+              onClick={cancelGeneration}
+              disabled={isCancelling}
+              className={`
+                px-4 py-2 rounded-lg font-medium text-sm transition-all
+                flex items-center gap-2
+                ${isCancelling
+                  ? 'bg-void-700 text-void-400 cursor-not-allowed'
+                  : 'bg-red-500/20 hover:bg-red-500/30 text-red-400 hover:text-red-300 border border-red-500/30'
+                }
+              `}
+            >
+              <XIcon />
+              {isCancelling ? 'Cancelling...' : 'Cancel Generation'}
+            </button>
           </div>
         </div>

frontend/src/context/GenerationContext.tsx CHANGED Viewed

@@ -209,23 +209,35 @@ export function GenerationProvider({ children }: { children: ReactNode }) {
         }
         if (currentStep === 'generating_video' || currentStep === 'processing') {
           const { klingCancel } = await import('@/utils/api');
           const currentTaskIds = [...s.activeTaskIds];
-          const cancelPromises = currentTaskIds.map(taskId =>
-            klingCancel(taskId).catch(err => {
-              console.warn(`Failed to cancel task ${taskId}:`, err);
-            })
-          );
-          await Promise.all(cancelPromises);
-          currentTaskIds.forEach(id => {
-            dispatch({ type: 'REMOVE_TASK_ID', payload: id });
-          });
-          dispatch({ type: 'SET_TASK_ID', payload: null });
-          const msg = videoCount > 0
-            ? `Generation cancelled. ${videoCount} video segment${videoCount === 1 ? '' : 's'} generated.`
-            : 'Generation cancelled by user.';
-          dispatch({ type: 'SET_ERROR', payload: msg });
-          dispatch({ type: 'SET_STEP', payload: 'error' });
           return;
         }
       } catch (error) {

         }
         if (currentStep === 'generating_video' || currentStep === 'processing') {
+          // Cancel any active API tasks
           const { klingCancel } = await import('@/utils/api');
           const currentTaskIds = [...s.activeTaskIds];
+          if (currentTaskIds.length > 0) {
+            const cancelPromises = currentTaskIds.map(taskId =>
+              klingCancel(taskId).catch(err => {
+                console.warn(`Failed to cancel task ${taskId}:`, err);
+              })
+            );
+            await Promise.all(cancelPromises);
+            currentTaskIds.forEach(id => {
+              dispatch({ type: 'REMOVE_TASK_ID', payload: id });
+            });
+            dispatch({ type: 'SET_TASK_ID', payload: null });
+          }
+          // If some videos were generated, show partial completion so user can merge them
+          if (videoCount > 0) {
+            const totalCount = s.segments.length || s.progress.total;
+            dispatch({
+              type: 'SET_PARTIAL_COMPLETION_ERROR',
+              payload: `Generation cancelled by user at segment ${videoCount + 1} of ${totalCount}. ` +
+                `${videoCount} video${videoCount > 1 ? 's' : ''} generated successfully.`
+            });
+            dispatch({ type: 'SET_STEP', payload: 'completed' });
+          } else {
+            dispatch({ type: 'SET_ERROR', payload: 'Generation cancelled by user.' });
+            dispatch({ type: 'SET_STEP', payload: 'error' });
+          }
           return;
         }
       } catch (error) {