Spaces:
Sleeping
Sleeping
| import os | |
| import tensorflow as tf | |
| from tensorflow.keras.models import Model | |
| from tensorflow.keras.layers import Input, LSTM, Dense, Dropout, TimeDistributed | |
| from tensorflow.keras.applications import Xception | |
| # Import our settings (SEQUENCE_LENGTH, TARGET_IMAGE_SIZE) | |
| try: | |
| from . import config | |
| except ImportError: | |
| try: | |
| import config | |
| except ImportError: | |
| raise ImportError("Could not import config.py. Make sure it's in the src/ directory.") | |
| # Suppress TensorFlow logs | |
| os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' | |
| def build_video_model(finetuned_model_path=None): | |
| """ | |
| Builds the CNN-LSTM video model. | |
| Args: | |
| finetuned_model_path: Path to the finetuned_model.h5 file (can be local or downloaded from HF) | |
| """ | |
| print("Building new CNN-LSTM video model...") | |
| # --- 1. The "Encoder" (Feature Extractor: Pre-Trained Model) --- | |
| base_model = None | |
| # Try to load from provided path first | |
| if finetuned_model_path and os.path.exists(finetuned_model_path): | |
| print(f"Loading encoder from: {finetuned_model_path}") | |
| try: | |
| full_image_model = tf.keras.models.load_model(finetuned_model_path, compile=False) | |
| # Get the output of the pooling layer | |
| encoder_output = full_image_model.get_layer('global_average_pooling2d').output | |
| # Create a new model using the same input as the full model | |
| base_model = Model( | |
| inputs=full_image_model.input, | |
| outputs=encoder_output, | |
| name="finetuned_encoder" | |
| ) | |
| print("Successfully loaded encoder from finetuned_model.h5") | |
| except Exception as e: | |
| print(f"Warning: Failed to load encoder: {e}") | |
| base_model = None | |
| else: | |
| if finetuned_model_path: | |
| print(f"Warning: Model not found at {finetuned_model_path}") | |
| # Fallback to ImageNet weights if encoder not loaded | |
| if base_model is None: | |
| print("Using ImageNet Xception weights as fallback (CPU-friendly)...") | |
| base_model = Xception( | |
| weights='imagenet', | |
| include_top=False, | |
| input_shape=(config.TARGET_IMAGE_SIZE, config.TARGET_IMAGE_SIZE, 3), | |
| pooling='avg' | |
| ) | |
| # Freeze this base model | |
| base_model.trainable = False | |
| # --- 2. The Full Model (Encoder + Decoder) --- | |
| video_input = Input(shape=( | |
| config.SEQUENCE_LENGTH, | |
| config.TARGET_IMAGE_SIZE, | |
| config.TARGET_IMAGE_SIZE, | |
| 3 | |
| )) | |
| # Apply encoder to every frame | |
| encoded_frames = TimeDistributed(base_model)(video_input) | |
| # LSTM layers | |
| x = LSTM(256, return_sequences=True)(encoded_frames) | |
| x = Dropout(0.5)(x) | |
| x = LSTM(128)(x) | |
| x = Dropout(0.5)(x) | |
| # Final Classification Head | |
| x = Dense(64, activation='relu')(x) | |
| output = Dense(1, activation='sigmoid')(x) | |
| # Create the final model | |
| video_model = Model(video_input, output, name="cnn_lstm_video_model") | |
| return video_model | |
| if __name__ == "__main__": | |
| print("Running a quick test to build the model...") | |
| try: | |
| model = build_video_model() | |
| print("\n--- Model Summary ---") | |
| model.summary() | |
| print("\nModel built successfully!") | |
| except Exception as e: | |
| print(f"\nModel build FAILED: {e}") |