File size: 5,016 Bytes
640bc6a
ce447ed
640bc6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import streamlit as st
import openai
import torch
from transformers import pipeline
from deepface import DeepFace
import cv2
import numpy as np
import random
import os
from moviepy.editor import *
from faster_whisper import WhisperModel
from insightface.app import FaceAnalysis
import shutil
import tempfile

# Set Streamlit Page
st.set_page_config(page_title="DeepFlick: AI Face Swap & Voice Clone", layout="wide")

st.title("🎭 DeepFlick: AI-Powered Face Swap & Voice Clone")

# Load Face Swap Model
face_app = FaceAnalysis(name="buffalo_l")
face_app.prepare(ctx_id=0, det_size=(640, 640))

# Text-to-Speech Model
tts_pipeline = pipeline("text-to-speech", model="coqui-ai/TTS")

# Whisper Model for Auto-Captions
whisper_model = WhisperModel("base", device="cuda" if torch.cuda.is_available() else "cpu")

# Function to Swap Faces
def swap_faces(source_img_path, target_img_path, output_path):
    source_img = cv2.imread(source_img_path)
    target_img = cv2.imread(target_img_path)

    # Detect faces
    source_faces = face_app.get(source_img)
    target_faces = face_app.get(target_img)

    if len(source_faces) == 0 or len(target_faces) == 0:
        return None

    # Swap faces
    swapped_img = DeepFace.swap(source_img, target_img, model_name="insightface")

    # Save output
    cv2.imwrite(output_path, swapped_img)
    return output_path

# Function to Generate Voiceover
def generate_voice(script_text):
    audio_path = f"output_{random.randint(1000,9999)}.wav"
    tts_audio = tts_pipeline(script_text)
    
    with open(audio_path, "wb") as f:
        f.write(tts_audio["audio"])
    
    return audio_path

# Function to Generate Captions
def generate_captions(audio_path):
    segments, _ = whisper_model.transcribe(audio_path)
    captions = "\n".join([segment.text for segment in segments])
    return captions

# User Inputs
st.sidebar.subheader("🎭 Upload Images")
source_img = st.sidebar.file_uploader("Upload Your Face (Source)", type=["jpg", "png"])
target_img = st.sidebar.file_uploader("Upload Target Face", type=["jpg", "png"])

st.sidebar.subheader("πŸŽ™οΈ AI Voice Options")
script_text = st.sidebar.text_area("Enter Video Script:", "Type something interesting here...")
use_ai_voice = st.sidebar.checkbox("Use AI Voice Clone", value=True)

# Process Button
if st.sidebar.button("🎬 Generate AI Video"):
    if source_img and target_img and script_text:
        with st.spinner("Processing AI Face Swap & Voice Clone... πŸš€"):
            # Save temp images
            temp_source = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
            temp_target = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
            temp_source.write(source_img.read())
            temp_target.write(target_img.read())

            # Perform Face Swap
            output_img_path = "swapped_face.jpg"
            swapped_face = swap_faces(temp_source.name, temp_target.name, output_img_path)

            if swapped_face:
                st.image(swapped_face, caption="AI-Swapped Face", use_column_width=True)

                # Generate AI Voice
                if use_ai_voice:
                    audio_path = generate_voice(script_text)
                    st.audio(audio_path, format="audio/wav")

                    # Generate Captions
                    captions = generate_captions(audio_path)
                    st.write("πŸ“ **Auto-Generated Captions:**", captions)

                # Convert Swapped Image to Video
                video_clip = ImageSequenceClip([swapped_face] * 30, fps=10)
                video_audio = AudioFileClip(audio_path) if use_ai_voice else None
                final_video_path = f"deepflick_output_{random.randint(1000,9999)}.mp4"

                if video_audio:
                    video_clip = video_clip.set_audio(video_audio)

                video_clip.write_videofile(final_video_path, codec="libx264")

                st.video(final_video_path)

            else:
                st.error("Face swap failed! Try using clearer images.")

            # Cleanup temp files
            temp_source.close()
            temp_target.close()
            os.remove(temp_source.name)
            os.remove(temp_target.name)

    else:
        st.warning("⚠️ Please upload both images and enter a script.")

# Search Functionality
st.markdown("---")
st.subheader("πŸ” Search AI-Generated Videos")

search_query = st.text_input("Search for a video by script content:")
if st.button("πŸ” Search"):
    # Simulated Search (Replace with a database search)
    dummy_videos = {
        "Funny AI Skit": "deepflick_output_1234.mp4",
        "AI Meme Video": "deepflick_output_5678.mp4"
    }

    found_videos = {title: path for title, path in dummy_videos.items() if search_query.lower() in title.lower()}
    
    if found_videos:
        for title, path in found_videos.items():
            st.markdown(f"### πŸŽ₯ {title}")
            st.video(path)
    else:
        st.write("❌ No videos found matching your search.")