sushilideaclan01's picture
Add video cancellation feature and update API integration
66e744c
import React, { useState, useCallback, useEffect, useRef } from 'react';
import { motion } from 'framer-motion';
import { useGeneration } from '@/context/GenerationContext';
import type { GenerationInputs, VideoProvider, GeneratedVideo, VeoSegment } from '@/types';
import {
SparklesIcon,
ArrowLeftIcon,
ImageIcon
} from './Icons';
import {
generatePrompts,
uploadImage,
klingGenerate,
klingExtend,
waitForKlingVideo,
generateVideoWithRetry,
downloadVideo,
getVideoDuration,
generateThumbnails,
replicateGenerate,
waitForReplicateVideo,
whisperAnalyzeAndExtract
} from '@/utils/api';
interface GenerationFormProps {
provider: VideoProvider;
onBack: () => void;
}
const voiceTypes = ['Deep', 'Warm', 'Crisp', 'None'];
const energyLevels = ['Low', 'Medium', 'High'];
const cameraStyles = ['Standard', 'Handheld', 'Steadicam', 'FPV Drone'];
const narrativeStyles = ['Standard', 'Documentary', 'Action', 'Introspective'];
const aspectRatios = ['9:16', '16:9', '1:1'];
// Generation modes
type GenerationMode = 'extend' | 'frame-continuity';
export const GenerationForm: React.FC<GenerationFormProps> = ({ provider, onBack }) => {
const { startGeneration, updateProgress, addVideo, setStep, setError, setRetryState, updateSegments, addTaskId, removeTaskId, state } = useGeneration();
const { retryState, generatedVideos, segments, isCancelling } = state;
// Draft storage key
const draftKey = `video-gen-draft-${provider}`;
// Load draft on mount - initialize state from localStorage
const loadDraft = useCallback(() => {
try {
const savedDraft = localStorage.getItem(draftKey);
if (savedDraft) {
const draft = JSON.parse(savedDraft);
return draft;
}
} catch (error) {
console.warn('Failed to load draft:', error);
}
return null;
}, [draftKey]);
const draft = loadDraft();
const [draftRestored, setDraftRestored] = useState(!!draft);
const [formState, setFormState] = useState<GenerationInputs>(
draft?.formState || {
script: '',
style: '',
voiceType: 'Deep',
energyLevel: 'Medium',
cameraStyle: 'Standard',
narrativeStyle: 'Standard',
seedValue: 12005,
aspectRatio: '9:16',
model: provider === 'kling' ? 'veo3_fast' : 'google/veo-3',
}
);
const [imageFile, setImageFile] = useState<File | null>(null);
const [imagePreview, setImagePreview] = useState<string | null>(draft?.imagePreview || null);
const [isDragging, setIsDragging] = useState(false);
const [isGenerating, setIsGenerating] = useState(false);
// Generation mode selection
const [generationMode, setGenerationMode] = useState<GenerationMode>(draft?.generationMode || 'frame-continuity');
// Retry editing state
const [retryDialogue, setRetryDialogue] = useState('');
const [retryEnvironment, setRetryEnvironment] = useState('');
const [retryAction, setRetryAction] = useState('');
// Initialize retry fields when error occurs
useEffect(() => {
if (retryState && segments[retryState.failedSegmentIndex]) {
const seg = segments[retryState.failedSegmentIndex];
setRetryDialogue(seg.action_timeline?.dialogue || '');
setRetryEnvironment(seg.scene_continuity?.environment || '');
setRetryAction(seg.character_description?.current_state || '');
}
}, [retryState, segments]);
const handleRetrySubmit = () => {
if (!retryState) return;
const idx = retryState.failedSegmentIndex;
const updatedSegments = [...segments];
// Update the segment with edited values
if (updatedSegments[idx]) {
updatedSegments[idx] = {
...updatedSegments[idx],
action_timeline: {
...updatedSegments[idx].action_timeline,
dialogue: retryDialogue
},
scene_continuity: {
...updatedSegments[idx].scene_continuity,
environment: retryEnvironment
},
character_description: {
...updatedSegments[idx].character_description,
current_state: retryAction
}
};
updateSegments(updatedSegments);
}
// Clear error and resume
setRetryState(null);
setStep('generating_video');
setIsGenerating(true);
// Resume generation based on provider
if (provider === 'kling') {
if (generationMode === 'frame-continuity') {
handleKlingFrameContinuityFlow();
} else {
handleKlingExtendFlow();
}
} else {
handleReplicateGeneration();
}
};
const handleCancelRetry = () => {
setRetryState(null);
setIsGenerating(false);
};
// Show notification if draft was restored
useEffect(() => {
if (draftRestored) {
console.log('📝 Draft restored from localStorage');
// Auto-hide notification after 5 seconds
const timer = setTimeout(() => setDraftRestored(false), 5000);
return () => clearTimeout(timer);
}
}, [draftRestored]);
// Save draft whenever formState, imagePreview, or generationMode changes
// Skip saving on initial mount to avoid overwriting with default values
const isInitialMount = useRef(true);
useEffect(() => {
if (isInitialMount.current) {
isInitialMount.current = false;
return;
}
try {
const draft = {
formState,
imagePreview,
generationMode,
savedAt: new Date().toISOString(),
};
localStorage.setItem(draftKey, JSON.stringify(draft));
} catch (error) {
console.warn('Failed to save draft:', error);
}
}, [formState, imagePreview, generationMode, draftKey]);
// Clear draft function
const clearDraft = useCallback(() => {
try {
localStorage.removeItem(draftKey);
setDraftRestored(false);
console.log('🗑️ Draft cleared');
} catch (error) {
console.warn('Failed to clear draft:', error);
}
}, [draftKey]);
// Calculate estimated segments
const wordCount = formState.script.trim().split(/\s+/).filter(w => w).length;
const estimatedSegments = wordCount > 0 ? Math.max(1, Math.min(Math.ceil(wordCount / 17), 10)) : 0;
// Handle input changes
const handleChange = (e: React.ChangeEvent<HTMLInputElement | HTMLTextAreaElement | HTMLSelectElement>) => {
const { name, value } = e.target;
setFormState(prev => ({ ...prev, [name]: value }));
};
// Handle image upload
const handleImageUpload = useCallback((file: File) => {
if (file.type.startsWith('image/')) {
setImageFile(file);
const reader = new FileReader();
reader.onloadend = () => setImagePreview(reader.result as string);
reader.readAsDataURL(file);
}
}, []);
// Drag and drop handlers
const handleDragOver = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(true);
};
const handleDragLeave = () => setIsDragging(false);
const handleDrop = (e: React.DragEvent) => {
e.preventDefault();
setIsDragging(false);
const file = e.dataTransfer.files[0];
if (file) handleImageUpload(file);
};
// Extract last frame from video blob
const extractLastFrame = async (videoBlob: Blob): Promise<File> => {
return new Promise((resolve, reject) => {
const video = document.createElement('video');
video.preload = 'metadata';
video.muted = true;
video.src = URL.createObjectURL(videoBlob);
video.onloadedmetadata = async () => {
// Seek to near the end of the video
const targetTime = Math.max(0, video.duration - 0.1);
video.currentTime = targetTime;
};
video.onseeked = () => {
// Create canvas and draw current frame
const canvas = document.createElement('canvas');
canvas.width = video.videoWidth;
canvas.height = video.videoHeight;
const ctx = canvas.getContext('2d');
if (!ctx) {
URL.revokeObjectURL(video.src);
reject(new Error('Could not get canvas context'));
return;
}
ctx.drawImage(video, 0, 0);
// Convert to blob then to file
canvas.toBlob((blob) => {
URL.revokeObjectURL(video.src);
if (!blob) {
reject(new Error('Could not extract frame'));
return;
}
const file = new File([blob], `frame-${Date.now()}.jpg`, { type: 'image/jpeg' });
resolve(file);
}, 'image/jpeg', 0.95);
};
video.onerror = () => {
URL.revokeObjectURL(video.src);
reject(new Error('Failed to load video for frame extraction'));
};
});
};
// ============================================
// KIE GENERATION - FRAME CONTINUITY FLOW
// ============================================
// This mirrors the Replicate flow from standalone_video_creator.py:
// 1. Generate first video with original reference image
// 2. Extract last frame using the whisper analysis from generated video
// 3. Use that frame as reference for next segment
// 4. Repeat for all segments
const handleKlingFrameContinuityFlow = async () => {
if (!imageFile || !formState.script.trim()) return;
setIsGenerating(true);
setError(null);
try {
// Step 1: Generate prompts using GPT-4o
updateProgress('Analyzing script with GPT-4o...');
const formData = new FormData();
formData.append('script', formState.script);
formData.append('style', formState.style || 'clean, lifestyle UGC');
formData.append('jsonFormat', 'standard');
formData.append('continuationMode', 'true');
formData.append('voiceType', formState.voiceType || '');
formData.append('energyLevel', formState.energyLevel || '');
formData.append('settingMode', 'single');
formData.append('cameraStyle', formState.cameraStyle || '');
formData.append('narrativeStyle', formState.narrativeStyle || '');
formData.append('image', imageFile);
const payload = await generatePrompts(formData);
if (!payload?.segments?.length) {
throw new Error('No segments generated from script');
}
const segments = payload.segments;
updateProgress(`Generated ${segments.length} segments. Starting video generation...`);
startGeneration(segments);
// Track current reference image (starts with original)
let currentImageFile = imageFile;
const generatedVideos: GeneratedVideo[] = [];
// Step 2: Generate videos segment by segment with frame continuity
for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
const isLastSegment = i === segments.length - 1;
updateProgress(
`Generating video ${i + 1} of ${segments.length}...${i > 0 ? ' (using last frame from previous)' : ''}`,
i,
segments.length
);
// Upload current reference image
updateProgress(`Uploading reference image for segment ${i + 1}...`);
const uploadResult = await uploadImage(currentImageFile);
const hostedImageUrl = uploadResult.url;
console.log(`🖼️ Segment ${i + 1} using image: ${i === 0 ? 'original' : 'last frame from previous'}`);
// Generate video with current reference image
updateProgress(`Submitting segment ${i + 1} to KIE Veo 3.1...`);
const generateResult = await klingGenerate({
prompt: segment,
imageUrls: [hostedImageUrl],
model: 'veo3_fast',
aspectRatio: formState.aspectRatio,
generationType: 'FIRST_AND_LAST_FRAMES_2_VIDEO',
seeds: formState.seedValue,
voiceType: formState.voiceType,
});
// Wait for completion
updateProgress(`Processing video ${i + 1}... (this may take 1-2 minutes)`);
const videoUrl = await waitForKlingVideo(generateResult.taskId);
// Download video
updateProgress(`Downloading video ${i + 1}...`);
const videoBlob = await downloadVideo(videoUrl);
const blobUrl = URL.createObjectURL(videoBlob);
// Get video duration
const videoFile = new File([videoBlob], `segment-${i + 1}.mp4`, { type: 'video/mp4' });
const duration = await getVideoDuration(videoFile);
const thumbnails = await generateThumbnails(videoFile);
// Use Whisper to find optimal trim point, extract frame, and get transcription
let trimPoint = duration; // Default to full duration
let transcribedText = ''; // What Whisper actually heard
if (!isLastSegment) {
updateProgress(`Analyzing video ${i + 1} with Whisper for optimal continuity...`);
try {
// Get dialogue from segment for Whisper analysis
const dialogue = segment.action_timeline?.dialogue || '';
const whisperResult = await whisperAnalyzeAndExtract({
video_url: videoUrl,
dialogue: dialogue,
buffer_time: 0.3,
model_size: 'base'
});
if (whisperResult.success && whisperResult.frame_base64) {
// Convert base64 frame to File for next segment
const base64Data = whisperResult.frame_base64.split(',')[1] || whisperResult.frame_base64;
const byteCharacters = atob(base64Data);
const byteNumbers = new Array(byteCharacters.length);
for (let j = 0; j < byteCharacters.length; j++) {
byteNumbers[j] = byteCharacters.charCodeAt(j);
}
const byteArray = new Uint8Array(byteNumbers);
const frameBlob = new Blob([byteArray], { type: 'image/jpeg' });
currentImageFile = new File([frameBlob], `whisper-frame-${i + 1}.jpg`, { type: 'image/jpeg' });
// Store trim point for later merge
if (whisperResult.trim_point) {
trimPoint = whisperResult.trim_point;
}
// Store transcribed text for prompt refinement
if (whisperResult.transcribed_text) {
transcribedText = whisperResult.transcribed_text;
console.log(`📝 Whisper transcription: "${transcribedText.substring(0, 100)}..."`);
}
console.log(`✅ Whisper: Last word at ${whisperResult.last_word_timestamp?.toFixed(2)}s, frame at ${whisperResult.frame_timestamp?.toFixed(2)}s, trim at ${trimPoint.toFixed(2)}s`);
// REFINE NEXT SEGMENT PROMPT with frame + transcription
const nextSegment = segments[i + 1];
if (nextSegment && currentImageFile) {
updateProgress(`Refining segment ${i + 2} prompt with visual and audio context...`);
try {
const { refinePromptWithContext } = await import('@/utils/api');
const refined = await refinePromptWithContext(
nextSegment,
currentImageFile,
transcribedText,
dialogue
);
// Update the next segment with refined prompt
segments[i + 1] = refined.refined_prompt as typeof nextSegment;
console.log(`✅ Refined segment ${i + 2} prompt for consistency`);
} catch (refineError) {
console.warn(`⚠️ Prompt refinement failed, using original:`, refineError);
}
}
} else {
// Fallback to simple last frame extraction
console.log(`⚠️ Whisper failed (${whisperResult.error}), falling back to last frame extraction`);
const lastFrameFile = await extractLastFrame(videoBlob);
currentImageFile = lastFrameFile;
}
} catch (frameError) {
console.error(`⚠️ Whisper analysis failed, using fallback:`, frameError);
try {
const lastFrameFile = await extractLastFrame(videoBlob);
currentImageFile = lastFrameFile;
} catch {
// Continue with current image if all extraction fails
}
}
}
// Add to generated videos with trim metadata
const generatedVideo: GeneratedVideo = {
id: `video-${Date.now()}-${i}`,
url: videoUrl,
blobUrl,
segment,
duration,
thumbnails,
trimPoint, // Store trim point for merge
};
generatedVideos.push(generatedVideo);
addVideo(generatedVideo);
updateProgress(`Completed video ${i + 1} of ${segments.length}`, i + 1, segments.length);
}
// All done!
clearDraft(); // Clear draft on successful generation
clearDraft(); // Clear draft on successful generation
setStep('completed');
updateProgress('All videos generated successfully!');
} catch (err) {
console.error('Generation error:', err);
const errorMessage = err instanceof Error ? err.message : 'Generation failed';
// Enable retry mode
setRetryState({
failedSegmentIndex: generatedVideos.length, // Current segment that failed
error: errorMessage
});
setStep('configuring'); // Go back to form, but with retry overlay
} finally {
setIsGenerating(false);
}
};
// ============================================
// KIE GENERATION - EXTEND API FLOW
// ============================================
// Original flow using KIE's extend API
const handleKlingExtendFlow = async () => {
if (!imageFile || !formState.script.trim()) return;
setIsGenerating(true);
setError(null);
try {
// Step 1: Generate prompts using GPT-4o
updateProgress('Analyzing script with GPT-4o...');
const formData = new FormData();
formData.append('script', formState.script);
formData.append('style', formState.style || 'clean, lifestyle UGC');
formData.append('jsonFormat', 'standard');
formData.append('continuationMode', 'true');
formData.append('voiceType', formState.voiceType || '');
formData.append('energyLevel', formState.energyLevel || '');
formData.append('settingMode', 'single');
formData.append('cameraStyle', formState.cameraStyle || '');
formData.append('narrativeStyle', formState.narrativeStyle || '');
formData.append('image', imageFile);
// Use existing segments if retrying, otherwise generate new ones
let payload: { segments: VeoSegment[] };
if (retryState && segments.length > 0) {
// Retry mode: use existing segments (they may have been edited)
payload = { segments };
updateProgress(`Using existing ${segments.length} segments for retry...`);
} else {
// Normal mode: generate new segments
payload = await generatePrompts(formData);
if (!payload?.segments?.length) {
throw new Error('No segments generated from script');
}
updateProgress(`Generated ${payload.segments.length} segments. Starting video generation...`);
startGeneration(payload.segments);
}
// Step 2: Upload reference image once
updateProgress('Uploading reference image...');
const uploadResult = await uploadImage(imageFile);
const hostedImageUrl = uploadResult.url;
// Step 3: Generate videos (resume from where we left off if retrying)
const startIndex = generatedVideos.length;
let currentTaskId: string | null = null;
let currentImageUrl = hostedImageUrl; // Start with original image
// If resuming, extract last frame from previous video for continuity
if (startIndex > 0 && generatedVideos[startIndex - 1]?.blobUrl) {
updateProgress(`Extracting last frame from segment ${startIndex} for continuity...`);
try {
const lastVideoBlob = await fetch(generatedVideos[startIndex - 1].blobUrl!).then(r => r.blob());
const lastFrameFile = await extractLastFrame(lastVideoBlob);
const frameUploadResult = await uploadImage(lastFrameFile);
currentImageUrl = frameUploadResult.url;
updateProgress(`Using frame from segment ${startIndex} for segment ${startIndex + 1}...`);
} catch (frameError) {
console.warn('Failed to extract frame, using original image:', frameError);
// Continue with original image
}
}
for (let i = startIndex; i < payload.segments.length; i++) {
const segment = payload.segments[i];
updateProgress(`Generating video ${i + 1} of ${payload.segments.length}...`, i, payload.segments.length);
// Generate video with automatic retry (retries once on failure)
updateProgress(`Processing video ${i + 1}... (this may take 1-2 minutes)`);
// Check if cancelled
if (isCancelling) {
throw new Error('Generation cancelled by user');
}
const videoUrl = await generateVideoWithRetry(async () => {
if (i === 0 || (i === startIndex && startIndex > 0)) {
// First segment OR resuming after failure: use generate API with current image
const generateResult = await klingGenerate({
prompt: segment,
imageUrls: [currentImageUrl],
model: 'veo3_fast',
aspectRatio: formState.aspectRatio,
generationType: 'FIRST_AND_LAST_FRAMES_2_VIDEO',
seeds: formState.seedValue,
voiceType: formState.voiceType,
});
currentTaskId = generateResult.taskId;
addTaskId(currentTaskId);
return generateResult;
} else {
// Subsequent segments: use extend API
const extendResult = await klingExtend(
currentTaskId!,
segment,
formState.seedValue,
formState.voiceType
);
currentTaskId = extendResult.taskId;
addTaskId(currentTaskId);
return extendResult;
}
}, 300000, (attempt) => {
updateProgress(`Retrying video ${i + 1}... (attempt ${attempt}/2)`);
});
// Download and save
updateProgress(`Downloading video ${i + 1}...`);
const videoBlob = await downloadVideo(videoUrl);
const blobUrl = URL.createObjectURL(videoBlob);
const videoFile = new File([videoBlob], `segment-${i + 1}.mp4`, { type: 'video/mp4' });
const duration = await getVideoDuration(videoFile);
const thumbnails = await generateThumbnails(videoFile);
addVideo({
id: `video-${Date.now()}-${i}`,
url: videoUrl,
blobUrl,
segment,
duration,
thumbnails,
});
updateProgress(`Completed video ${i + 1} of ${payload.segments.length}`, i + 1, payload.segments.length);
// Remove task ID after completion
if (currentTaskId) {
removeTaskId(currentTaskId);
}
}
clearDraft(); // Clear draft on successful generation
setStep('completed');
updateProgress('All videos generated successfully!');
} catch (err) {
console.error('Generation error:', err);
const errorMessage = err instanceof Error ? err.message : 'Generation failed';
// If cancelled, don't show retry option
if (errorMessage.includes('cancelled') || isCancelling) {
setError('Generation cancelled by user');
setStep('error');
} else {
// Enable retry mode
setRetryState({
failedSegmentIndex: generatedVideos.length, // Current segment that failed
error: errorMessage
});
setStep('configuring'); // Go back to form, but with retry overlay
}
} finally {
setIsGenerating(false);
// Clean up any remaining task IDs
state.activeTaskIds.forEach(taskId => removeTaskId(taskId));
}
};
// ============================================
// REPLICATE GENERATION - FRAME CONTINUITY FLOW
// ============================================
// This mirrors the approach from standalone_video_creator.py:
// 1. Generate prompts using GPT-4o
// 2. For each segment, generate video with current reference image
// 3. Extract last frame from generated video
// 4. Use that frame as reference for next segment
// 5. Result: Perfect visual continuity across all segments
const handleReplicateGeneration = async () => {
if (!formState.script.trim()) return;
setIsGenerating(true);
setError(null);
try {
// Step 1: Generate prompts using GPT-4o
// Note: Replicate can work without an image, but for consistency we encourage one
updateProgress('Analyzing script with GPT-4o...');
const formData = new FormData();
formData.append('script', formState.script);
formData.append('style', formState.style || 'clean, lifestyle UGC');
formData.append('jsonFormat', 'standard');
formData.append('continuationMode', 'true');
formData.append('voiceType', formState.voiceType || '');
formData.append('energyLevel', formState.energyLevel || '');
formData.append('settingMode', 'single');
formData.append('cameraStyle', formState.cameraStyle || '');
formData.append('narrativeStyle', formState.narrativeStyle || '');
// If image provided, include it for GPT-4o analysis
if (imageFile) {
formData.append('image', imageFile);
} else {
// Create a placeholder image for GPT-4o (it needs one for analysis)
// In production, you might want to handle this differently
const placeholderBlob = new Blob(['placeholder'], { type: 'image/jpeg' });
formData.append('image', placeholderBlob, 'placeholder.jpg');
}
const payload = await generatePrompts(formData);
if (!payload?.segments?.length) {
throw new Error('No segments generated from script');
}
const segments = payload.segments;
updateProgress(`Generated ${segments.length} segments. Starting Replicate generation...`);
startGeneration(segments);
// Track current reference image (starts with original if provided)
let currentImageFile = imageFile;
const generatedVideos: GeneratedVideo[] = [];
// Step 2: Generate videos segment by segment with frame continuity
for (let i = 0; i < segments.length; i++) {
const segment = segments[i];
const isLastSegment = i === segments.length - 1;
updateProgress(
`Generating video ${i + 1} of ${segments.length} with Replicate...${i > 0 ? ' (using last frame)' : ''}`,
i,
segments.length
);
// Convert structured segment to text prompt for Replicate
// Replicate models typically expect text prompts
const textPrompt = convertSegmentToTextPrompt(segment);
console.log(`🎬 Segment ${i + 1} prompt:`, textPrompt.substring(0, 100) + '...');
// Upload current reference image if available
let imageUrl: string | undefined;
if (currentImageFile) {
updateProgress(`Uploading reference image for segment ${i + 1}...`);
const uploadResult = await uploadImage(currentImageFile);
imageUrl = uploadResult.url;
console.log(`🖼️ Segment ${i + 1} using image: ${i === 0 ? 'original' : 'last frame from previous'}`);
}
// Generate video with Replicate
updateProgress(`Submitting segment ${i + 1} to Replicate...`);
const generateResult = await replicateGenerate({
prompt: textPrompt,
imageUrl: imageUrl,
model: formState.model || 'google/veo-3',
aspectRatio: formState.aspectRatio,
});
// Wait for completion (polling)
updateProgress(`Processing video ${i + 1}... (this may take 2-5 minutes)`);
const videoUrl = await waitForReplicateVideo(generateResult.id);
// Download video
updateProgress(`Downloading video ${i + 1}...`);
const videoBlob = await downloadVideo(videoUrl);
const blobUrl = URL.createObjectURL(videoBlob);
// Get video duration and thumbnails
const videoFile = new File([videoBlob], `segment-${i + 1}.mp4`, { type: 'video/mp4' });
const duration = await getVideoDuration(videoFile);
const thumbnails = await generateThumbnails(videoFile);
// Use Whisper to find optimal trim point, extract frame, and get transcription
// This is more accurate than extracting the very last frame
let trimPoint = duration; // Default to full duration
let transcribedText = ''; // What Whisper actually heard
if (!isLastSegment) {
updateProgress(`Analyzing video ${i + 1} with Whisper for optimal continuity...`);
try {
// Get dialogue from segment for Whisper analysis
const dialogue = segment.action_timeline?.dialogue || textPrompt;
const whisperResult = await whisperAnalyzeAndExtract({
video_url: videoUrl,
dialogue: dialogue,
buffer_time: 0.3,
model_size: 'base'
});
if (whisperResult.success && whisperResult.frame_base64) {
// Convert base64 frame to File for next segment
const base64Data = whisperResult.frame_base64.split(',')[1] || whisperResult.frame_base64;
const byteCharacters = atob(base64Data);
const byteNumbers = new Array(byteCharacters.length);
for (let j = 0; j < byteCharacters.length; j++) {
byteNumbers[j] = byteCharacters.charCodeAt(j);
}
const byteArray = new Uint8Array(byteNumbers);
const frameBlob = new Blob([byteArray], { type: 'image/jpeg' });
currentImageFile = new File([frameBlob], `whisper-frame-${i + 1}.jpg`, { type: 'image/jpeg' });
// Store trim point for later merge
if (whisperResult.trim_point) {
trimPoint = whisperResult.trim_point;
}
// Store transcribed text for prompt refinement
if (whisperResult.transcribed_text) {
transcribedText = whisperResult.transcribed_text;
console.log(`📝 Whisper transcription: "${transcribedText.substring(0, 100)}..."`);
}
console.log(`✅ Whisper: Last word at ${whisperResult.last_word_timestamp?.toFixed(2)}s, frame at ${whisperResult.frame_timestamp?.toFixed(2)}s, trim at ${trimPoint.toFixed(2)}s`);
// REFINE NEXT SEGMENT PROMPT with frame + transcription
const nextSegment = segments[i + 1];
if (nextSegment && currentImageFile) {
updateProgress(`Refining segment ${i + 2} prompt with visual and audio context...`);
try {
const { refinePromptWithContext } = await import('@/utils/api');
const refined = await refinePromptWithContext(
nextSegment,
currentImageFile,
transcribedText,
dialogue
);
// Update the next segment with refined prompt
segments[i + 1] = refined.refined_prompt as typeof nextSegment;
console.log(`✅ Refined segment ${i + 2} prompt for consistency`);
} catch (refineError) {
console.warn(`⚠️ Prompt refinement failed, using original:`, refineError);
}
}
} else {
// Fallback to simple last frame extraction
console.log(`⚠️ Whisper failed (${whisperResult.error}), falling back to last frame extraction`);
const lastFrameFile = await extractLastFrame(videoBlob);
currentImageFile = lastFrameFile;
}
} catch (frameError) {
console.error(`⚠️ Whisper analysis failed, using fallback:`, frameError);
try {
const lastFrameFile = await extractLastFrame(videoBlob);
currentImageFile = lastFrameFile;
} catch {
// Continue with current image if all extraction fails
}
}
}
// Add to generated videos with trim metadata
const generatedVideo: GeneratedVideo = {
id: `video-${Date.now()}-${i}`,
url: videoUrl,
blobUrl,
segment,
duration,
thumbnails,
trimPoint, // Store trim point for merge
};
generatedVideos.push(generatedVideo);
addVideo(generatedVideo);
updateProgress(`Completed video ${i + 1} of ${segments.length}`, i + 1, segments.length);
}
// All done!
setStep('completed');
updateProgress('All videos generated successfully with Replicate!');
} catch (err) {
console.error('Replicate generation error:', err);
const errorMessage = err instanceof Error ? err.message : 'Replicate generation failed';
// Enable retry mode
setRetryState({
failedSegmentIndex: generatedVideos.length, // Current segment that failed
error: errorMessage
});
setStep('configuring'); // Go back to form, but with retry overlay
} finally {
setIsGenerating(false);
}
};
// Helper: Convert structured segment JSON to text prompt for Replicate
// Replicate models typically expect plain text, not structured JSON
const convertSegmentToTextPrompt = (segment: VeoSegment): string => {
const parts: string[] = [];
// Extract dialogue
const dialogue = segment.action_timeline?.dialogue;
if (dialogue) {
parts.push(`"${dialogue}"`);
}
// Extract character description
const character = segment.character_description;
if (character?.current_state) {
parts.push(`Character: ${character.current_state}`);
}
// Extract scene description
const scene = segment.scene_continuity;
if (scene?.environment) {
parts.push(`Scene: ${scene.environment}`);
}
if (scene?.lighting_state) {
parts.push(`Lighting: ${scene.lighting_state}`);
}
if (scene?.camera_position) {
parts.push(`Camera: ${scene.camera_position}`);
}
if (scene?.camera_movement) {
parts.push(`Movement: ${scene.camera_movement}`);
}
// Extract synchronized actions
const syncedActions = segment.action_timeline?.synchronized_actions;
if (syncedActions) {
const actionsList = Object.entries(syncedActions)
.filter(([, value]) => value)
.map(([key, value]) => `${key}: ${value}`)
.join('; ');
if (actionsList) {
parts.push(`Actions: ${actionsList}`);
}
}
// Add instruction to not include captions/subtitles
parts.push('Do not include any captions, subtitles, or text overlays in the video');
return parts.join('. ');
};
// Main submit handler
const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
if (provider === 'kling') {
if (generationMode === 'frame-continuity') {
handleKlingFrameContinuityFlow();
} else {
handleKlingExtendFlow();
}
} else {
handleReplicateGeneration();
}
};
const isValid = provider === 'kling'
? !!imageFile && formState.script.trim().length > 0
: formState.script.trim().length > 0;
return (
<motion.div
initial={{ opacity: 0, x: 20 }}
animate={{ opacity: 1, x: 0 }}
exit={{ opacity: 0, x: -20 }}
className="max-w-6xl mx-auto p-8"
>
{/* Header */}
<div className="flex items-center justify-between mb-8">
<div>
<button
onClick={onBack}
className="flex items-center gap-2 text-void-400 hover:text-void-200 transition-colors mb-4"
>
<ArrowLeftIcon size={20} />
<span>Back to providers</span>
</button>
<h1 className="text-3xl font-display font-bold">
<span className={provider === 'kling' ? 'gradient-text' : 'gradient-text-electric'}>
{provider === 'kling' ? 'KIE API' : 'Replicate'}
</span>
<span className="text-void-200"> Video Generation</span>
</h1>
<p className="text-void-400 mt-2">
{provider === 'kling'
? 'Generate professional UGC videos with AI-powered segmentation'
: 'Create unique videos with open-source models'
}
</p>
</div>
</div>
{/* Retry Modal */}
{retryState && (
<div className="fixed inset-0 z-50 flex items-center justify-center bg-black/80 backdrop-blur-sm p-4">
<motion.div
initial={{ opacity: 0, scale: 0.95 }}
animate={{ opacity: 1, scale: 1 }}
className="bg-void-900 border border-void-700 rounded-2xl p-6 max-w-2xl w-full shadow-2xl overflow-y-auto max-h-[90vh]"
>
<div className="flex items-center gap-3 mb-4 text-red-400">
<svg className="w-6 h-6" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M12 9v2m0 4h.01m-6.938 4h13.856c1.54 0 2.502-1.667 1.732-3L13.732 4c-.77-1.333-2.694-1.333-3.464 0L3.34 16c-.77 1.333.192 3 1.732 3z" />
</svg>
<h3 className="text-xl font-bold">Generation Failed</h3>
</div>
<p className="text-void-300 mb-6 p-4 bg-void-800 rounded-lg border border-void-700">
Error at segment {retryState.failedSegmentIndex + 1}: <span className="text-red-300">{retryState.error}</span>
</p>
<div className="space-y-4 mb-8">
<h4 className="font-semibold text-void-200">Edit Segment {retryState.failedSegmentIndex + 1} to fix the issue:</h4>
<div>
<label className="block text-sm font-medium text-void-400 mb-1">Dialogue</label>
<textarea
value={retryDialogue}
onChange={(e) => setRetryDialogue(e.target.value)}
className="w-full bg-void-950 border border-void-700 rounded-lg p-3 text-void-100 h-24 focus:border-coral-500 focus:outline-none"
placeholder="Adjust dialogue..."
/>
</div>
<div>
<label className="block text-sm font-medium text-void-400 mb-1">Action / Character State</label>
<textarea
value={retryAction}
onChange={(e) => setRetryAction(e.target.value)}
className="w-full bg-void-950 border border-void-700 rounded-lg p-3 text-void-100 h-24 focus:border-coral-500 focus:outline-none"
placeholder="Adjust action description..."
/>
</div>
<div>
<label className="block text-sm font-medium text-void-400 mb-1">Environment</label>
<textarea
value={retryEnvironment}
onChange={(e) => setRetryEnvironment(e.target.value)}
className="w-full bg-void-950 border border-void-700 rounded-lg p-3 text-void-100 h-24 focus:border-coral-500 focus:outline-none"
placeholder="Adjust environment description..."
/>
</div>
</div>
<div className="flex justify-end gap-3">
<button
onClick={handleCancelRetry}
className="px-4 py-2 rounded-lg text-void-300 hover:text-white hover:bg-void-800 transition-colors"
>
Cancel
</button>
<button
onClick={handleRetrySubmit}
className="px-6 py-2 bg-gradient-to-r from-coral-500 to-coral-600 text-white font-semibold rounded-lg hover:from-coral-400 hover:to-coral-500 shadow-lg shadow-coral-500/20"
>
Retry Generation
</button>
</div>
</motion.div>
</div>
)}
{/* Draft Restored Notification */}
{draftRestored && (
<motion.div
initial={{ opacity: 0, y: -20 }}
animate={{ opacity: 1, y: 0 }}
exit={{ opacity: 0, y: -20 }}
className="mb-6 p-4 bg-void-800/80 border border-void-600 rounded-xl flex items-center justify-between"
>
<div className="flex items-center gap-3">
<div className="w-2 h-2 rounded-full bg-green-500 animate-pulse" />
<div>
<p className="text-sm font-medium text-void-200">Draft restored</p>
<p className="text-xs text-void-400">Your previous inputs have been loaded</p>
</div>
</div>
<button
onClick={() => setDraftRestored(false)}
className="text-void-400 hover:text-void-200 transition-colors"
>
<svg className="w-5 h-5" fill="none" stroke="currentColor" viewBox="0 0 24 24">
<path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M6 18L18 6M6 6l12 12" />
</svg>
</button>
</motion.div>
)}
<form onSubmit={handleSubmit}>
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
{/* Left Column - Script & Style */}
<div className="space-y-6">
{/* Script Input */}
<div className="card">
<label className="block text-sm font-semibold text-void-200 mb-3">
Script <span className="text-coral-400">*</span>
</label>
<textarea
name="script"
value={formState.script}
onChange={handleChange}
rows={10}
className="textarea-field font-mono text-sm"
placeholder="Enter your complete video script here...
The AI will automatically analyze and segment your script into optimal video chunks, typically 8 seconds each."
required
/>
<div className="flex items-center justify-between mt-3">
<p className="text-xs text-void-500">
AI will automatically segment your script
</p>
{wordCount > 0 && (
<div className="flex items-center gap-4 text-xs">
<span className="text-void-400">{wordCount} words</span>
<span className={`font-semibold ${provider === 'kling' ? 'text-coral-400' : 'text-electric-400'}`}>
~{estimatedSegments} segments
</span>
</div>
)}
</div>
</div>
{/* Style Input */}
<div className="card">
<label className="block text-sm font-semibold text-void-200 mb-3">
Visual Style
</label>
<textarea
name="style"
value={formState.style}
onChange={handleChange}
rows={3}
className="textarea-field"
placeholder="e.g., Cinematic, hyper-realistic, natural lighting, modern aesthetic, warm tones..."
/>
</div>
{/* Generation Mode Selection (Kling only) */}
{provider === 'kling' && (
<div className="card border-2 border-coral-500/30 bg-coral-500/5">
<h4 className="text-sm font-semibold text-void-200 mb-4">
Generation Mode
</h4>
<div className="space-y-3">
<label className="flex items-start gap-3 cursor-pointer group">
<input
type="radio"
name="generationMode"
value="frame-continuity"
checked={generationMode === 'frame-continuity'}
onChange={() => setGenerationMode('frame-continuity')}
className="mt-1 w-4 h-4 accent-coral-500"
/>
<div>
<span className="text-sm font-medium text-void-100 group-hover:text-coral-400 transition-colors">
🎯 Frame Continuity (Recommended)
</span>
<p className="text-xs text-void-400 mt-0.5">
Extract last frame from each video → Use as reference for next segment.
<br />
<span className="text-coral-400">Best for visual consistency across segments.</span>
</p>
</div>
</label>
<label className="flex items-start gap-3 cursor-pointer group">
<input
type="radio"
name="generationMode"
value="extend"
checked={generationMode === 'extend'}
onChange={() => setGenerationMode('extend')}
className="mt-1 w-4 h-4 accent-coral-500"
/>
<div>
<span className="text-sm font-medium text-void-100 group-hover:text-coral-400 transition-colors">
➕ Extend API
</span>
<p className="text-xs text-void-400 mt-0.5">
Use KIE's native extend API for video continuation.
<br />
<span className="text-void-500">Faster but may have less visual consistency.</span>
</p>
</div>
</label>
</div>
</div>
)}
{/* Generation Preview */}
{estimatedSegments > 0 && (
<motion.div
initial={{ opacity: 0, scale: 0.95 }}
animate={{ opacity: 1, scale: 1 }}
className={`card border-2 ${provider === 'kling' ? 'border-coral-500/30 bg-coral-500/5' : 'border-electric-500/30 bg-electric-500/5'}`}
>
<h4 className={`font-bold text-sm mb-3 ${provider === 'kling' ? 'text-coral-400' : 'text-electric-400'}`}>
AI Analysis Preview
</h4>
<div className="grid grid-cols-2 gap-3 text-sm">
<div className="flex justify-between">
<span className="text-void-400">Words:</span>
<span className="text-void-200 font-medium">{wordCount}</span>
</div>
<div className="flex justify-between">
<span className="text-void-400">Segments:</span>
<span className="text-void-200 font-medium">~{estimatedSegments}</span>
</div>
<div className="flex justify-between">
<span className="text-void-400">Duration:</span>
<span className="text-void-200 font-medium">~{estimatedSegments * 8}s</span>
</div>
<div className="flex justify-between">
<span className="text-void-400">Mode:</span>
<span className="text-void-200 font-medium">
{generationMode === 'frame-continuity' ? 'Frame' : 'Extend'}
</span>
</div>
</div>
</motion.div>
)}
</div>
{/* Right Column - Image & Settings */}
<div className="space-y-6">
{/* Image Upload - Required for Kling, Optional for Replicate */}
<div className="card">
<label className="block text-sm font-semibold text-void-200 mb-3">
Character Image {provider === 'kling' && <span className="text-coral-400">*</span>}
{provider === 'replicate' && <span className="text-void-500 text-xs ml-2">(optional, for visual continuity)</span>}
</label>
<div
onDragOver={handleDragOver}
onDragLeave={handleDragLeave}
onDrop={handleDrop}
className={`dropzone flex flex-col items-center justify-center text-center min-h-[200px] ${isDragging ? 'active' : ''}`}
>
{imagePreview ? (
<div className="relative">
<img
src={imagePreview}
alt="Preview"
className="max-h-40 rounded-lg shadow-lg"
/>
<button
type="button"
onClick={() => { setImageFile(null); setImagePreview(null); }}
className={`absolute -top-2 -right-2 w-6 h-6 rounded-full flex items-center justify-center text-white text-xs transition-colors ${
provider === 'kling' ? 'bg-coral-500 hover:bg-coral-600' : 'bg-electric-500 hover:bg-electric-600'
}`}
>
×
</button>
</div>
) : (
<>
<ImageIcon className="text-void-500 mb-4" size={48} />
<p className="text-void-300 mb-2">Drag and drop your image here</p>
<p className="text-void-500 text-sm mb-4">or</p>
<label className={`cursor-pointer ${provider === 'kling' ? 'btn-secondary' : 'btn-secondary-electric'}`}>
<span>Browse Files</span>
<input
type="file"
accept="image/*"
className="hidden"
onChange={(e) => e.target.files?.[0] && handleImageUpload(e.target.files[0])}
/>
</label>
</>
)}
</div>
<p className="text-xs text-void-500 mt-2">
{provider === 'kling'
? 'PNG, JPG up to 10MB. This image will be used as your character reference.'
: 'PNG, JPG up to 10MB. Optional: Provides visual continuity across segments.'
}
</p>
</div>
{/* Settings Grid */}
<div className="card">
<h4 className="text-sm font-semibold text-void-200 mb-4">Generation Settings</h4>
<div className="grid grid-cols-2 gap-4">
<div>
<label className="block text-xs text-void-400 mb-1.5">Voice Type</label>
<select
name="voiceType"
value={formState.voiceType}
onChange={handleChange}
className="select-field"
>
{voiceTypes.map(v => <option key={v} value={v}>{v}</option>)}
</select>
</div>
<div>
<label className="block text-xs text-void-400 mb-1.5">Energy Level</label>
<select
name="energyLevel"
value={formState.energyLevel}
onChange={handleChange}
className="select-field"
>
{energyLevels.map(e => <option key={e} value={e}>{e}</option>)}
</select>
</div>
<div>
<label className="block text-xs text-void-400 mb-1.5">Camera Style</label>
<select
name="cameraStyle"
value={formState.cameraStyle}
onChange={handleChange}
className="select-field"
>
{cameraStyles.map(c => <option key={c} value={c}>{c}</option>)}
</select>
</div>
<div>
<label className="block text-xs text-void-400 mb-1.5">Narrative</label>
<select
name="narrativeStyle"
value={formState.narrativeStyle}
onChange={handleChange}
className="select-field"
>
{narrativeStyles.map(n => <option key={n} value={n}>{n}</option>)}
</select>
</div>
<div>
<label className="block text-xs text-void-400 mb-1.5">Aspect Ratio</label>
<select
name="aspectRatio"
value={formState.aspectRatio}
onChange={handleChange}
className="select-field"
>
{aspectRatios.map(a => <option key={a} value={a}>{a}</option>)}
</select>
</div>
{provider === 'kling' ? (
<div>
<label className="block text-xs text-void-400 mb-1.5">Seed Value</label>
<input
type="number"
name="seedValue"
value={formState.seedValue}
onChange={handleChange}
className="input-field"
placeholder="12005"
/>
</div>
) : (
<div>
<label className="block text-xs text-void-400 mb-1.5">Model</label>
<select
name="model"
value={formState.model}
onChange={handleChange}
className="select-field"
>
<option value="google/veo-3">Google Veo 3</option>
</select>
</div>
)}
</div>
<p className="text-xs text-void-500 mt-3">
{provider === 'kling'
? 'Seed 12005 = Warm, flattering lighting. Use same seed for consistency.'
: 'Google Veo 3 recommended for best quality and consistency.'
}
</p>
</div>
{/* Submit Button */}
<button
type="submit"
disabled={!isValid || isGenerating}
className={`
w-full py-4 font-semibold rounded-xl transition-all duration-300
flex items-center justify-center gap-3
${provider === 'kling' ? 'btn-primary' : 'btn-electric'}
disabled:opacity-50 disabled:cursor-not-allowed disabled:scale-100
`}
>
{isGenerating ? (
<>
<div className="w-5 h-5 border-2 border-white/30 border-t-white rounded-full animate-spin" />
<span>Generating...</span>
</>
) : (
<>
<SparklesIcon size={20} />
<span>
Generate Video
{provider === 'kling' && generationMode === 'frame-continuity' && ' (Frame Continuity)'}
{provider === 'kling' && generationMode === 'extend' && ' (Extend API)'}
</span>
</>
)}
</button>
</div>
</div>
</form>
</motion.div>
);
};