| { | |
| "caption_model": { | |
| "architecture": "CNN (ResNet50) + LSTM", | |
| "test_loss": 2.6034744274139405, | |
| "vocab_size": 2988, | |
| "trainable_params": 6501036, | |
| "model_file": "caption_model_final.pth", | |
| "config_file": "caption_model_config.pkl", | |
| "vocab_file": "../vocab/vocab.pkl" | |
| }, | |
| "action_model": { | |
| "architecture": "ResNet50 (Fine-tuned)", | |
| "test_accuracy": 78.57142857142857, | |
| "test_loss": 0.6965014625951726, | |
| "num_classes": 15, | |
| "class_names": [ | |
| "calling", | |
| "clapping", | |
| "cycling", | |
| "dancing", | |
| "drinking", | |
| "eating", | |
| "fighting", | |
| "hugging", | |
| "laughing", | |
| "listening_to_music", | |
| "running", | |
| "sitting", | |
| "sleeping", | |
| "texting", | |
| "using_laptop" | |
| ], | |
| "trainable_params": 24565839, | |
| "model_file": "action_model_final.pth", | |
| "config_file": "action_model_config.pkl" | |
| }, | |
| "training_info": { | |
| "date": "January 2026", | |
| "framework": "PyTorch", | |
| "device": "Kaggle 2x T4 GPUs", | |
| "caption_epochs": 15, | |
| "action_epochs": 10 | |
| } | |
| } |