deepfake-api / src /video_model.py
piyushnaula's picture
backend
63bbcbd
import os
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, TimeDistributed
from tensorflow.keras.applications import Xception
# Import our settings (SEQUENCE_LENGTH, TARGET_IMAGE_SIZE)
try:
from . import config
except ImportError:
try:
import config
except ImportError:
raise ImportError("Could not import config.py. Make sure it's in the src/ directory.")
# Suppress TensorFlow logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def build_video_model(finetuned_model_path=None):
"""
Builds the CNN-LSTM video model.
Args:
finetuned_model_path: Path to the finetuned_model.h5 file (can be local or downloaded from HF)
"""
print("Building new CNN-LSTM video model...")
# --- 1. The "Encoder" (Feature Extractor: Pre-Trained Model) ---
base_model = None
# Try to load from provided path first
if finetuned_model_path and os.path.exists(finetuned_model_path):
print(f"Loading encoder from: {finetuned_model_path}")
try:
full_image_model = tf.keras.models.load_model(finetuned_model_path, compile=False)
# Get the output of the pooling layer
encoder_output = full_image_model.get_layer('global_average_pooling2d').output
# Create a new model using the same input as the full model
base_model = Model(
inputs=full_image_model.input,
outputs=encoder_output,
name="finetuned_encoder"
)
print("Successfully loaded encoder from finetuned_model.h5")
except Exception as e:
print(f"Warning: Failed to load encoder: {e}")
base_model = None
else:
if finetuned_model_path:
print(f"Warning: Model not found at {finetuned_model_path}")
# Fallback to ImageNet weights if encoder not loaded
if base_model is None:
print("Using ImageNet Xception weights as fallback (CPU-friendly)...")
base_model = Xception(
weights='imagenet',
include_top=False,
input_shape=(config.TARGET_IMAGE_SIZE, config.TARGET_IMAGE_SIZE, 3),
pooling='avg'
)
# Freeze this base model
base_model.trainable = False
# --- 2. The Full Model (Encoder + Decoder) ---
video_input = Input(shape=(
config.SEQUENCE_LENGTH,
config.TARGET_IMAGE_SIZE,
config.TARGET_IMAGE_SIZE,
3
))
# Apply encoder to every frame
encoded_frames = TimeDistributed(base_model)(video_input)
# LSTM layers
x = LSTM(256, return_sequences=True)(encoded_frames)
x = Dropout(0.5)(x)
x = LSTM(128)(x)
x = Dropout(0.5)(x)
# Final Classification Head
x = Dense(64, activation='relu')(x)
output = Dense(1, activation='sigmoid')(x)
# Create the final model
video_model = Model(video_input, output, name="cnn_lstm_video_model")
return video_model
if __name__ == "__main__":
print("Running a quick test to build the model...")
try:
model = build_video_model()
print("\n--- Model Summary ---")
model.summary()
print("\nModel built successfully!")
except Exception as e:
print(f"\nModel build FAILED: {e}")