Spaces:

GeminiAi
/

deepflick

No application file

File size: 5,016 Bytes

640bc6a
ce447ed
640bc6a

import streamlit as st
import openai
import torch
from transformers import pipeline
from deepface import DeepFace
import cv2
import numpy as np
import random
import os
from moviepy.editor import *
from faster_whisper import WhisperModel
from insightface.app import FaceAnalysis
import shutil
import tempfile

# Set Streamlit Page
st.set_page_config(page_title="DeepFlick: AI Face Swap & Voice Clone", layout="wide")

st.title("🎭 DeepFlick: AI-Powered Face Swap & Voice Clone")

# Load Face Swap Model
face_app = FaceAnalysis(name="buffalo_l")
face_app.prepare(ctx_id=0, det_size=(640, 640))

# Text-to-Speech Model
tts_pipeline = pipeline("text-to-speech", model="coqui-ai/TTS")

# Whisper Model for Auto-Captions
whisper_model = WhisperModel("base", device="cuda" if torch.cuda.is_available() else "cpu")

# Function to Swap Faces
def swap_faces(source_img_path, target_img_path, output_path):
    source_img = cv2.imread(source_img_path)
    target_img = cv2.imread(target_img_path)

    # Detect faces
    source_faces = face_app.get(source_img)
    target_faces = face_app.get(target_img)

    if len(source_faces) == 0 or len(target_faces) == 0:
        return None

    # Swap faces
    swapped_img = DeepFace.swap(source_img, target_img, model_name="insightface")

    # Save output
    cv2.imwrite(output_path, swapped_img)
    return output_path

# Function to Generate Voiceover
def generate_voice(script_text):
    audio_path = f"output_{random.randint(1000,9999)}.wav"
    tts_audio = tts_pipeline(script_text)
    
    with open(audio_path, "wb") as f:
        f.write(tts_audio["audio"])
    
    return audio_path

# Function to Generate Captions
def generate_captions(audio_path):
    segments, _ = whisper_model.transcribe(audio_path)
    captions = "\n".join([segment.text for segment in segments])
    return captions

# User Inputs
st.sidebar.subheader("🎭 Upload Images")
source_img = st.sidebar.file_uploader("Upload Your Face (Source)", type=["jpg", "png"])
target_img = st.sidebar.file_uploader("Upload Target Face", type=["jpg", "png"])

st.sidebar.subheader("🎙️ AI Voice Options")
script_text = st.sidebar.text_area("Enter Video Script:", "Type something interesting here...")
use_ai_voice = st.sidebar.checkbox("Use AI Voice Clone", value=True)

# Process Button
if st.sidebar.button("🎬 Generate AI Video"):
    if source_img and target_img and script_text:
        with st.spinner("Processing AI Face Swap & Voice Clone... 🚀"):
            # Save temp images
            temp_source = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
            temp_target = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
            temp_source.write(source_img.read())
            temp_target.write(target_img.read())

            # Perform Face Swap
            output_img_path = "swapped_face.jpg"
            swapped_face = swap_faces(temp_source.name, temp_target.name, output_img_path)

            if swapped_face:
                st.image(swapped_face, caption="AI-Swapped Face", use_column_width=True)

                # Generate AI Voice
                if use_ai_voice:
                    audio_path = generate_voice(script_text)
                    st.audio(audio_path, format="audio/wav")

                    # Generate Captions
                    captions = generate_captions(audio_path)
                    st.write("📝 **Auto-Generated Captions:**", captions)

                # Convert Swapped Image to Video
                video_clip = ImageSequenceClip([swapped_face] * 30, fps=10)
                video_audio = AudioFileClip(audio_path) if use_ai_voice else None
                final_video_path = f"deepflick_output_{random.randint(1000,9999)}.mp4"

                if video_audio:
                    video_clip = video_clip.set_audio(video_audio)

                video_clip.write_videofile(final_video_path, codec="libx264")

                st.video(final_video_path)

            else:
                st.error("Face swap failed! Try using clearer images.")

            # Cleanup temp files
            temp_source.close()
            temp_target.close()
            os.remove(temp_source.name)
            os.remove(temp_target.name)

    else:
        st.warning("⚠️ Please upload both images and enter a script.")

# Search Functionality
st.markdown("---")
st.subheader("🔍 Search AI-Generated Videos")

search_query = st.text_input("Search for a video by script content:")
if st.button("🔍 Search"):
    # Simulated Search (Replace with a database search)
    dummy_videos = {
        "Funny AI Skit": "deepflick_output_1234.mp4",
        "AI Meme Video": "deepflick_output_5678.mp4"
    }

    found_videos = {title: path for title, path in dummy_videos.items() if search_query.lower() in title.lower()}
    
    if found_videos:
        for title, path in found_videos.items():
            st.markdown(f"### 🎥 {title}")
            st.video(path)
    else:
        st.write("❌ No videos found matching your search.")