import os import tensorflow as tf from tensorflow.keras.models import Model from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, TimeDistributed from tensorflow.keras.applications import Xception # Import our settings (SEQUENCE_LENGTH, TARGET_IMAGE_SIZE) try: from . import config except ImportError: try: import config except ImportError: raise ImportError("Could not import config.py. Make sure it's in the src/ directory.") # Suppress TensorFlow logs os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' def build_video_model(finetuned_model_path=None): """ Builds the CNN-LSTM video model. Args: finetuned_model_path: Path to the finetuned_model.h5 file (can be local or downloaded from HF) """ print("Building new CNN-LSTM video model...") # --- 1. The "Encoder" (Feature Extractor: Pre-Trained Model) --- base_model = None # Try to load from provided path first if finetuned_model_path and os.path.exists(finetuned_model_path): print(f"Loading encoder from: {finetuned_model_path}") try: full_image_model = tf.keras.models.load_model(finetuned_model_path, compile=False) # Get the output of the pooling layer encoder_output = full_image_model.get_layer('global_average_pooling2d').output # Create a new model using the same input as the full model base_model = Model( inputs=full_image_model.input, outputs=encoder_output, name="finetuned_encoder" ) print("Successfully loaded encoder from finetuned_model.h5") except Exception as e: print(f"Warning: Failed to load encoder: {e}") base_model = None else: if finetuned_model_path: print(f"Warning: Model not found at {finetuned_model_path}") # Fallback to ImageNet weights if encoder not loaded if base_model is None: print("Using ImageNet Xception weights as fallback (CPU-friendly)...") base_model = Xception( weights='imagenet', include_top=False, input_shape=(config.TARGET_IMAGE_SIZE, config.TARGET_IMAGE_SIZE, 3), pooling='avg' ) # Freeze this base model base_model.trainable = False # --- 2. The Full Model (Encoder + Decoder) --- video_input = Input(shape=( config.SEQUENCE_LENGTH, config.TARGET_IMAGE_SIZE, config.TARGET_IMAGE_SIZE, 3 )) # Apply encoder to every frame encoded_frames = TimeDistributed(base_model)(video_input) # LSTM layers x = LSTM(256, return_sequences=True)(encoded_frames) x = Dropout(0.5)(x) x = LSTM(128)(x) x = Dropout(0.5)(x) # Final Classification Head x = Dense(64, activation='relu')(x) output = Dense(1, activation='sigmoid')(x) # Create the final model video_model = Model(video_input, output, name="cnn_lstm_video_model") return video_model if __name__ == "__main__": print("Running a quick test to build the model...") try: model = build_video_model() print("\n--- Model Summary ---") model.summary() print("\nModel built successfully!") except Exception as e: print(f"\nModel build FAILED: {e}")