Spaces:

Vara1605454
/

ImageCaptioningProject

Build error

App Files Files Community

Varsha Dewangan commited on Jun 21, 2025

Commit

ee1d4aa

0 Parent(s):

Initial clean commit for project deployment

Browse files

Files changed (30) hide show

.gitattributes +35 -0
.gitignore +30 -0
Dockerfile +65 -0
README.md +147 -0
file.py +235 -0
paths_to_forget.txt +6 -0
requirements.txt +19 -0
src/__init__.py +0 -0
src/app.py +146 -0
src/config.py +108 -0
src/data_preprocessing.py +247 -0
src/evaluation.py +691 -0
src/inference_api.py +124 -0
src/model.py +502 -0
src/train.py +471 -0
src/utils.py +571 -0
templates/auth.html +1283 -0
templates/index - Copy.txt +1343 -0
templates/index.html +1766 -0
text_files/bleu_metrics.csv +30 -0
text_files/file.py +64 -0
text_files/scores.py +72 -0
text_files/training (2).txt +335 -0
text_files/training_log_17_27.txt +806 -0
text_files/training_log_1_18.txt +668 -0
text_files/training_log_21_30.txt +734 -0
text_files/training_metrics.csv +1190 -0
text_files/validation_metrics.csv +30 -0
text_files/validation_script.py +77 -0
web_app.py +650 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,30 @@

+# Database
+instance/
+*.db
+# Uploaded files
+static/uploads/
+# Output/Log files
+output/
+# Specific large model file if you have it in your root and don't want it tracked
+yolov8x-seg.pt
+models/*.pth
+data/images/ # Add this line
+# Python cache and environment files
+__pycache__/
+*.pyc
+.env
+venv/
+.ipynb_checkpoints/
+# MacOS specific files
+.DS_Store
+# Jupyter Notebook files
+.ipynb_checkpoints/

Dockerfile ADDED Viewed

	@@ -0,0 +1,65 @@

+# ==============================================================================
+# Dockerfile for Perceptra AI (Image Captioning & Segmentation App)
+# Optimized for deployment on Hugging Face Spaces with GPU support.
+# ==============================================================================
+# -- Stage 1: Base Image and System Dependencies --
+# Start with a stable Python version. 3.9 is a good choice for broad compatibility.
+# Using the 'slim' variant to keep the image size smaller.
+FROM python:3.9-slim
+# Set the working directory inside the container. All subsequent commands
+# will run from this path.
+WORKDIR /app
+# Set environment variables to prevent Python from writing .pyc files and to
+# ensure output is sent straight to the console without buffering.
+ENV PYTHONDONTWRITEBYTECODE 1
+ENV PYTHONUNBUFFERED 1
+# Install essential system dependencies. Many Python libraries, especially for
+# computer vision, have underlying system requirements.
+# - build-essential & cmake: Needed to compile libraries like dlib (for face-recognition).
+# - libgl1-mesa-glx, libglib2.0-0, etc.: Required by OpenCV for image processing in a headless environment.
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    cmake \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    && rm -rf /var/lib/apt/lists/*
+# -- Stage 2: Python Dependencies --
+# First, copy only the requirements.txt file. This allows Docker to cache the
+# installed packages. The layer will only be re-built if requirements.txt changes.
+COPY requirements.txt .
+# Install the Python packages specified in requirements.txt.
+# --no-cache-dir: Reduces the image size by not storing the pip cache.
+RUN pip install --no-cache-dir -r requirements.txt
+# -- Stage 3: Application Code & Models --
+# Copy the rest of your application's source code into the container.
+# This includes web_app.py, the 'src' directory, and the 'templates' directory.
+COPY . .
+# Optimization: Pre-download the large YOLO model during the build process.
+# This makes the application start much faster on Hugging Face Spaces, as it
+# won't need to download the model on every startup.
+RUN python -c "from ultralytics import YOLO; YOLO('yolov8x-seg.pt')"
+# -- Stage 4: Runtime Configuration --
+# Expose the port the app will run on. Hugging Face Spaces will automatically
+# map this to the public URL. Gunicorn will run on this port.
+# The default Flask port is 5000, which we'll use here.
+EXPOSE 7860
+# The command to run your application using Gunicorn, a production-ready
+# WSGI server. This is the standard way to run Flask apps in production.
+# - --workers 2: Starts 2 worker processes to handle requests. Adjust as needed.
+# - --bind 0.0.0.0:5000: Binds to all network interfaces on port 5000.
+# - web_app:app: Tells Gunicorn to look for the Flask instance named 'app'
+#   inside the 'web_app.py' file.
+CMD ["gunicorn", "--workers", "2", "--bind", "0.0.0.0:7860", "web_app:app"]

README.md ADDED Viewed

	@@ -0,0 +1,147 @@

+Image Captioning with Attention
+This project implements an image captioning model using a ResNet50-based CNN Encoder and an LSTM Decoder with Bahdanau-style Attention. The model is trained on the COCO 2017 dataset to generate descriptive captions for images.
+Features
+Modular Architecture: Separated concerns into distinct Python files for clarity and maintainability.
+CNN Encoder: Utilizes a pre-trained ResNet50 for robust feature extraction.
+Attention Mechanism: Allows the model to focus on salient regions of an image when generating specific words.
+LSTM Decoder: Generates captions word by word, conditioned on image features and previously generated words.
+Beam Search: Implemented for high-quality caption generation during inference.
+Comprehensive Evaluation: Calculates standard metrics like BLEU (1-4), METEOR, ROUGE-L, and CIDEr.
+Training Resumption: Ability to resume training from the latest checkpoint.
+Logging: Detailed logging for training progress and evaluation results.
+Project Structure
+ImageCaptioning/
+├── data/
+│   ├── coco/
+│   │   ├── train2017/              # COCO 2017 training images
+│   │   ├── val2017/                # COCO 2017 validation images
+│   │   └── annotations/
+│   │       ├── captions_train2017.json # Training captions JSON
+│   │       └── captions_val2017.json   # Validation captions JSON
+├── models/                         # Directory for saving trained model checkpoints
+│   └── (e.g., best_model_bleu0.1037.pth)
+├── src/
+│   ├── app.py                      # Main script for running inference
+│   ├── config.py                   # Configuration parameters for training, eval, inference
+│   ├── data_preprocessing.py       # Classes for vocabulary and dataset loading
+│   ├── model.py                    # Defines the Encoder, Attention, and Decoder modules
+│   ├── train.py                    # Contains the training loop and validation functions
+│   ├── evaluation.py               # Functions for calculating various evaluation metrics
+│   └── utils.py                    # General utility functions (e.g., logging setup, attention visualization)
+├── output/                         # Output directory for logs, saved vocabulary, and evaluation results
+├── requirements.txt                # List of Python dependencies
+└── README.md                       # This file
+Setup
+Follow these steps to set up the project locally.
+1. Clone the Repository
+git clone https://github.com/YourUsername/ImageCaptioning.git # Replace with your repo URL
+cd ImageCaptioning
+2. Create a Python Virtual Environment (Recommended)
+python -m venv venv
+# On Windows
+.\venv\Scripts\activate
+# On macOS/Linux
+source venv/bin/activate
+3. Install Dependencies
+Install the required Python packages. Note that pycocoevalcap needs to be installed directly from its GitHub repository as it's not on PyPI.
+pip install -r requirements.txt
+4. Download NLTK Data
+The evaluation metrics (BLEU, METEOR) require NLTK's punkt and wordnet data.
+import nltk
+nltk.download('punkt')
+nltk.download('wordnet')
+5. Download COCO 2017 Dataset
+You'll need the COCO 2017 dataset.
+Images:
+train2017.zip (around 18GB): http://images.cocodataset.org/zips/train2017.zip
+val2017.zip (around 1GB): http://images.cocodataset.org/zips/val2017.zip
+Annotations:
+annotations_trainval2017.zip (around 250MB): http://images.cocodataset.org/annotations/annotations_trainval2017.zip
+After downloading:
+Create the data/coco directory if it doesn't exist.
+Extract train2017.zip into data/coco/train2017/.
+Extract val2017.zip into data/coco/val2017/.
+Extract annotations_trainval2017.zip into data/coco/annotations/. This will create captions_train2017.json and captions_val2017.json (among others).
+Your data/coco directory should then look like this:
+data/coco/
+├── train2017/
+├── val2017/
+└── annotations/
+    ├── captions_train2017.json
+    └── captions_val2017.json
+    └── ... (other annotation files)
+Usage
+1. Configure Parameters
+All configurable parameters are located in src/config.py. Open this file and adjust paths, hyperparameters, and other settings as needed.
+data_folder: Ensure this points to the data/coco directory.
+model_path: Update this to the path of your trained model (e.g., models/best_model_bleu0.1037.pth if you've already trained one or downloaded a pre-trained model).
+2. Train the Model
+To start or resume training, run the train.py script:
+python src/train.py
+Training logs, model checkpoints, and a saved vocabulary file will be stored in the output/ directory. The best model (based on BLEU-4 score) will be saved in the models/ directory.
+3. Evaluate the Model
+After training, you can evaluate the model's performance on the validation set:
+python src/evaluation.py
+This will generate captions for the test set and calculate various metrics. The detailed results will be saved in the output/evaluation_results/ directory.
+4. Run Inference (Generate Caption for a Single Image)
+To generate a caption for a specific image, run the app.py script. Make sure src/config.py has the correct model_path and example_image_path set.
+python src/app.py
+The generated caption will be printed to the console. You can modify inference_config in src/config.py to change the beam_size or max_caption_length.
+Pre-trained Models
+(Optional: If you plan to provide pre-trained models, you would include instructions here on how users can download and place them in the models/ directory.)
+Future Improvements
+Implement a web interface (e.g., using Flask or FastAPI) for interactive captioning.
+Explore different CNN backbones (e.g., EfficientNet, Vision Transformers).
+Integrate advanced attention mechanisms or transformer architectures.
+Add support for more datasets.
+Quantization or pruning for model optimization.
+License
+(Add your chosen license, e.g., MIT, Apache 2.0)

file.py ADDED Viewed

	@@ -0,0 +1,235 @@

+import cv2
+import numpy as np
+import torch
+from PIL import Image
+from sklearn.metrics import (jaccard_score, f1_score,
+                           accuracy_score, precision_score,
+                           recall_score)
+from scipy.spatial.distance import directed_hausdorff
+# MUST BE FIRST STREAMLIT COMMAND
+import streamlit as st
+st.set_page_config(
+    page_title="Advanced Segmentation Metrics Analyzer",
+    page_icon="🧪",
+    layout="wide"
+)
+# Model loading with enhanced error handling
+@st.cache_resource
+def load_model():
+    try:
+        # First try official ultralytics package
+        from ultralytics import YOLO
+        return YOLO('yolov8x-seg.pt')
+    except ImportError:
+        try:
+            # Fallback to torch hub
+            model = torch.hub.load('ultralytics/yolov8', 'yolov8x-seg', pretrained=True)
+            return model.to('cuda' if torch.cuda.is_available() else 'cpu')
+        except Exception as e:
+            st.error(f"⚠️ Model loading failed: {str(e)}")
+            st.info("Please check your internet connection and try again")
+            return None
+model = load_model()
+def validate_image(img_array):
+    """Ensure 3-channel RGB format"""
+    if len(img_array.shape) == 2:  # Grayscale
+        img_array = cv2.cvtColor(img_array, cv2.COLOR_GRAY2RGB)
+    elif img_array.shape[2] == 4:  # RGBA
+        img_array = cv2.cvtColor(img_array, cv2.COLOR_RGBA2RGB)
+    elif img_array.shape[2] > 3:  # Extra channels
+        img_array = img_array[:, :, :3]
+    return img_array
+def calculate_boundary_iou(mask1, mask2, boundary_width=2):
+    """Calculate Boundary IoU with error handling"""
+    try:
+        kernel = np.ones((boundary_width, boundary_width), np.uint8)
+        boundary1 = cv2.morphologyEx(mask1, cv2.MORPH_GRADIENT, kernel)
+        boundary2 = cv2.morphologyEx(mask2, cv2.MORPH_GRADIENT, kernel)
+        return jaccard_score(boundary1.flatten(), boundary2.flatten())
+    except Exception:
+        return 0.0  # Graceful degradation
+def calculate_metrics(results, img_shape):
+    """Robust metric calculation"""
+    if not model:
+        return {"error": "Model not loaded"}
+    if not results or results[0].masks is None:
+        return {"error": "No objects detected"}
+    try:
+        # Process predictions
+        pred_masks = torch.stack([m.data[0] for m in results[0].masks]).cpu().numpy()
+        pred_masks = (pred_masks > 0.5).astype(np.uint8)
+        # Generate mock ground truth
+        gt_masks = np.zeros_like(pred_masks)
+        h, w = img_shape[:2]
+        gt_masks[:, h//4:3*h//4, w//4:3*w//4] = 1
+        # Initialize metrics
+        metrics = {
+            'IoU': {'mean': 0, 'per_instance': [], 'class_wise': {}},
+            'Dice': 0,
+            'Pixel_Accuracy': 0,
+            'Boundary_IoU': 0,
+            'Object_Counts': {},
+            'Class_Distribution': {}
+        }
+        # Calculate per-mask metrics
+        valid_masks = 0
+        for i, (pred_mask, gt_mask) in enumerate(zip(pred_masks, gt_masks)):
+            try:
+                pred_flat = pred_mask.flatten()
+                gt_flat = gt_mask.flatten()
+                if np.sum(gt_flat) == 0:
+                    continue
+                # Core metrics
+                metrics['IoU']['per_instance'].append(jaccard_score(gt_flat, pred_flat))
+                metrics['Dice'] += f1_score(gt_flat, pred_flat)
+                metrics['Pixel_Accuracy'] += accuracy_score(gt_flat, pred_flat)
+                metrics['Boundary_IoU'] += calculate_boundary_iou(gt_mask, pred_mask)
+                # Class tracking
+                cls = int(results[0].boxes.cls[i])
+                cls_name = model.names[cls]
+                metrics['Object_Counts'][cls_name] = metrics['Object_Counts'].get(cls_name, 0) + 1
+                metrics['Class_Distribution'][cls_name] = metrics['Class_Distribution'].get(cls_name, 0) + 1
+                valid_masks += 1
+            except Exception:
+                continue
+        # Finalize metrics
+        if valid_masks > 0:
+            metrics['IoU']['mean'] = np.mean(metrics['IoU']['per_instance'])
+            metrics['Dice'] /= valid_masks
+            metrics['Pixel_Accuracy'] /= valid_masks
+            metrics['Boundary_IoU'] /= valid_masks
+            # Class-wise metrics
+            total = sum(metrics['Object_Counts'].values())
+            metrics['IoU']['class_wise'] = {k: v/total for k, v in metrics['Object_Counts'].items()}
+        return metrics
+    except Exception as e:
+        return {"error": f"Metric calculation failed: {str(e)}"}
+def visualize_results(img, results):
+    """Generate visualizations with error handling"""
+    try:
+        # Segmentation overlay
+        seg_img = img.copy()
+        if results[0].masks is not None:
+            for mask in results[0].masks:
+                mask_points = mask.xy[0].astype(int)
+                cv2.fillPoly(seg_img, [mask_points], (0, 0, 255, 100))
+        # Bounding boxes
+        det_img = img.copy()
+        for box, cls, conf in zip(results[0].boxes.xyxy, results[0].boxes.cls, results[0].boxes.conf):
+            x1, y1, x2, y2 = map(int, box)
+            cv2.rectangle(det_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
+            cv2.putText(det_img, f"{model.names[int(cls)]} {conf:.2f}",
+                       (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2)
+        return seg_img, det_img
+    except Exception:
+        return img, img  # Fallback to original images
+def process_image(input_img):
+    """Main processing pipeline"""
+    try:
+        img = np.array(input_img)
+        img = validate_image(img)
+        results = model(img)
+        seg_img, det_img = visualize_results(img, results)
+        metrics = calculate_metrics(results, img.shape)
+        return Image.fromarray(seg_img), Image.fromarray(det_img), metrics
+    except Exception as e:
+        st.error(f"Processing failed: {str(e)}")
+        return None, None, {"error": str(e)}
+# Main UI
+def main():
+    st.title("🧪 Advanced Segmentation Metrics Analyzer")
+    st.markdown("""
+    Upload an image to analyze object segmentation performance using YOLOv8.
+    The system provides detailed metrics and visualizations.
+    """)
+    with st.sidebar:
+        st.header("Configuration")
+        conf_threshold = st.slider("Confidence Threshold", 0.1, 1.0, 0.5)
+        boundary_width = st.slider("Boundary Width (pixels)", 1, 10, 2)
+        st.markdown("---")
+        st.markdown(f"**Device:** {'GPU 🔥' if torch.cuda.is_available() else 'CPU 🐢'}")
+    uploaded_file = st.file_uploader(
+        "Choose an image",
+        type=["jpg", "jpeg", "png", "bmp"],
+        help="Supports JPG, PNG, BMP formats"
+    )
+    if uploaded_file:
+        try:
+            img = Image.open(uploaded_file)
+            col1, col2 = st.columns(2)
+            with col1:
+                st.image(img, caption="Original Image", use_column_width=True)
+            if st.button("Analyze", type="primary"):
+                with st.spinner("Processing..."):
+                    seg_img, det_img, metrics = process_image(img)
+                    if metrics and "error" not in metrics:
+                        tabs = st.tabs(["Visual Results", "Metrics Dashboard", "Raw Data"])
+                        with tabs[0]:
+                            st.subheader("Segmentation Analysis")
+                            cols = st.columns(2)
+                            cols[0].image(seg_img, caption="Segmentation Mask", use_column_width=True)
+                            cols[1].image(det_img, caption="Detected Objects", use_column_width=True)
+                        with tabs[1]:
+                            st.subheader("Performance Metrics")
+                            st.metric("Mean IoU", f"{metrics['IoU']['mean']:.2%}",
+                                     help="Intersection over Union")
+                            st.metric("Dice Coefficient", f"{metrics['Dice']:.2%}",
+                                     help="F1 Score for segmentation")
+                            st.metric("Pixel Accuracy", f"{metrics['Pixel_Accuracy']:.2%}")
+                            st.plotly_chart({
+                                'data': [{
+                                    'x': list(metrics['Class_Distribution'].keys()),
+                                    'y': list(metrics['Class_Distribution'].values()),
+                                    'type': 'bar'
+                                }],
+                                'layout': {'title': 'Class Distribution'}
+                            })
+                        with tabs[2]:
+                            st.download_button(
+                                "Download Metrics",
+                                str(metrics),
+                                "metrics.json",
+                                "application/json"
+                            )
+                            st.json(metrics)
+                    elif metrics and "error" in metrics:
+                        st.error(metrics["error"])
+        except Exception as e:
+            st.error(f"Error loading image: {str(e)}")
+if __name__ == "__main__":
+    main()

paths_to_forget.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+output/
+static/uploads/
+instance/users.db
+models/best_model_bleu*.pth
+yolov8x-seg.pt
+data/images/ # Add this line

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+torch
+torchvision
+Pillow
+numpy
+matplotlib
+tqdm
+nltk
+rouge-score
+pycocotools
+pycocoevalcap @ git+https://github.com/salaniz/pycocoevalcap.git
+flask
+flask_sqlalchemy
+gunicorn
+ultralytics
+face-recognition
+Pillow
+opencv-python-headless
+numpy
+werkzeug

src/__init__.py ADDED Viewed

File without changes

src/app.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import os
+import torch
+from PIL import Image
+import sys # Import sys for flushing stdout
+# Import modules from your project structure
+# Ensure utils is imported early to set up logging
+from .utils import get_logger, get_eval_transform, visualize_attention
+from .model import ImageCaptioningModel
+from .data_preprocessing import COCOVocabulary
+from .config import INFERENCE_CONFIG, update_config_with_latest_model # config is imported here
+from .evaluation import calculate_bleu_scores_detailed # evaluation is imported here
+# Get the module-specific logger. This logger will inherit from the root logger
+# which is configured when `utils` is imported.
+logger = get_logger(__name__)
+def run_inference_example(model_path, image_path, config=None):
+    """
+    Function to run inference on a single image and generate a caption.
+    Args:
+        model_path (str): Path to the saved model checkpoint (.pth file).
+        image_path (str): Path to the image file for captioning.
+        config (dict, optional): Configuration dictionary for inference parameters
+                                 (e.g., beam_size, max_caption_length).
+    Returns:
+        str: The generated caption for the image.
+    Raises:
+        FileNotFoundError: If the model checkpoint or image file is not found.
+        Exception: For other unexpected errors during inference.
+    """
+    logger.info("Loading model for inference...")
+    if not os.path.exists(model_path):
+        raise FileNotFoundError(f"Model checkpoint not found at {model_path}. "
+                                "Please train the model first or provide a valid path.")
+    # Load the complete checkpoint (model state, optimizer state, vocabulary, config)
+    # map_location='cpu' ensures it loads to CPU even if trained on GPU
+    checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
+    # Extract configuration and vocabulary from the checkpoint
+    model_config_from_checkpoint = checkpoint.get('config', {})
+    vocabulary = checkpoint['vocabulary']
+    # Initialize the model structure with parameters saved in the checkpoint
+    # Ensure dropout is set to 0.0 for inference and fine_tune_encoder is False
+    model = ImageCaptioningModel(
+        vocab_size=vocabulary.vocab_size,
+        embed_dim=model_config_from_checkpoint.get('embed_dim', 256),
+        attention_dim=model_config_from_checkpoint.get('attention_dim', 256),
+        decoder_dim=model_config_from_checkpoint.get('decoder_dim', 256),
+        dropout=0.0, # Dropout should be off during inference
+        fine_tune_encoder=False, # Encoder should not be fine-tuned during inference
+        max_caption_length=config.get('max_caption_length', 20) if config else 20 # Use config's max length for inference
+    )
+    # Load the trained weights into the model
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model.eval() # Set the model to evaluation mode (important for batch norm, dropout)
+    # Determine the device to run inference on
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model = model.to(device) # Move model to GPU if available
+    logger.info(f"Model loaded successfully on device: {device}")
+    # Get the image transformation pipeline for evaluation/inference
+    transform = get_eval_transform()
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image not found at {image_path}. Please check the image path.")
+    # Load and preprocess the image
+    try:
+        image = Image.open(image_path).convert('RGB')
+        image_tensor = transform(image).to(device)
+    except Exception as e:
+        raise Exception(f"Error loading or transforming image {image_path}: {e}")
+    logger.info(f"Generating caption for {image_path} using beam search (beam_size="
+                f"{config.get('beam_size', 5) if config else 5})...")
+    # Generate the caption using the model's integrated method
+    generated_caption = model.generate_caption(
+        image_tensor,
+        vocabulary,
+        device,
+        beam_size=config.get('beam_size', 5) if config else 5,
+        max_length=config.get('max_caption_length', 20) if config else 20
+    )
+    # Optional: Visualize attention weights
+    visualize_attention(model, image_path, vocabulary, device,
+                        save_path=os.path.join('output', 'attention_visualization.png'))
+    # These logs are now placed AFTER the point where the logger is definitely active.
+    logger.info("\n" + "="*50)
+    logger.info("             GENERATED CAPTION")
+    logger.info("="*50)
+    logger.info(f"Image: {image_path}")
+    logger.info(f"Caption: {generated_caption}")
+    logger.info("="*50 + "\n")
+    sys.stdout.flush() # Explicitly flush the standard output buffer
+    return generated_caption
+if __name__ == '__main__':
+    # When `app.py` is run directly, it will run the inference example.
+    # Update INFERENCE_CONFIG with the latest model path if available
+    update_config_with_latest_model(INFERENCE_CONFIG)
+    # --- User Input/Configuration for Inference ---
+    # These values are now primarily controlled via INFERENCE_CONFIG in config.py
+    # You can override them here if you need to test specific scenarios immediately.
+    my_image_path = INFERENCE_CONFIG['example_image_path']
+    my_model_path = INFERENCE_CONFIG['model_path']
+    # You can also set a reference caption here if you know it for comparison
+    my_reference_caption = "Two riders on horses are performing a reining maneuver on a green grassy field surrounded by trees" # Example reference, replace or leave empty
+    # Use a copy of INFERENCE_CONFIG to avoid modifying the global config directly
+    inference_params = INFERENCE_CONFIG.copy()
+    logger.info("--- Running Inference Example ---")
+    try:
+        generated_caption = run_inference_example(my_model_path, my_image_path, config=inference_params)
+        # You can add evaluation of this single generated caption against its reference here if desired
+        if my_reference_caption:
+            # calculate_bleu_scores_detailed is already imported from evaluation
+            bleu_scores = calculate_bleu_scores_detailed([my_reference_caption], [generated_caption])
+            logger.info("\n--- Single Image Evaluation ---")
+            logger.info(f"Reference: {my_reference_caption}")
+            logger.info(f"Generated: {generated_caption}")
+            logger.info(f"BLEU-4 Score: {bleu_scores['BLEU-4']:.4f}")
+            logger.info("-------------------------------\n")
+            sys.stdout.flush() # Explicitly flush after single image evaluation too
+    except FileNotFoundError as e:
+        logger.error(f"Error: {e}")
+        logger.error("Please ensure your model, vocabulary, and image paths are correct "
+                     "and data is downloaded as per README.md.")
+        sys.stdout.flush() # Flush errors too
+    except Exception as e:
+        logger.critical(f"An unexpected error occurred during inference: {e}", exc_info=True)
+        sys.stdout.flush() # Flush critical errors too

src/config.py ADDED Viewed

	@@ -0,0 +1,108 @@

+import os
+# Base data folder, assuming COCO 2017 is extracted here
+# Adjust this path if your data is located elsewhere
+_BASE_DATA_FOLDER = 'data/images'
+# Output directory for logs, saved vocabulary, and temporary files
+# and where the best model checkpoint will be saved for easy access
+_OUTPUT_DIR = 'output'
+_MODELS_DIR = 'models' # Where the final best model will be saved
+# Ensure output and models directories exist
+os.makedirs(_OUTPUT_DIR, exist_ok=True)
+os.makedirs(_MODELS_DIR, exist_ok=True)
+# --- Configuration for Training ---
+TRAINING_CONFIG = {
+    'data_folder': _BASE_DATA_FOLDER,
+    'train_image_folder': 'train2017',
+    'val_image_folder': 'val2017',
+    'train_caption_file': os.path.join(_BASE_DATA_FOLDER, 'annotations', 'captions_train2017.json'),
+    'val_caption_file': os.path.join(_BASE_DATA_FOLDER, 'annotations', 'captions_val2017.json'),
+    # Subset sizes for quicker testing during development. Set to None for full dataset.
+    'vocab_subset_size': None,
+    'train_subset_size': None, # e.g., 200000 for a large subset, None for full
+    'val_subset_size': None,   # e.g., 10000 for a subset, None for full
+    # Model Hyperparameters
+    'embed_dim': 256,
+    'attention_dim': 256,
+    'decoder_dim': 256,
+    'dropout': 0.5,
+    'fine_tune_encoder': True, # Set to False to freeze ResNet weights during training
+    # Training Parameters
+    'batch_size': 64,
+    'num_workers': 4, # Adjust based on your CPU cores and RAM (e.g., 2, 4, 8)
+    'learning_rate': 4e-4,
+    'encoder_learning_rate': 1e-5, # Lower LR for encoder if fine_tune_encoder is True
+    'lr_reduce_factor': 0.5,
+    'lr_patience': 5,
+    'num_epochs': 20, # Total number of epochs to run
+    'log_step': 100,  # Print loss every N steps
+    'grad_clip': 5.0, # Gradient clipping value
+    'max_caption_length': 30, # Max length of captions, including <START> and <END>
+    'val_beam_size': 3,       # Beam size used for validation inference during training
+    'val_inference_batches': None, # None to generate captions for all validation batches, or an int for a subset
+    'output_dir': _OUTPUT_DIR, # Where training logs and vocabulary will be saved
+    'models_dir': _MODELS_DIR  # Where the best model checkpoint will be saved
+}
+# --- Configuration for Evaluation ---
+# This uses the validation set for evaluation, as is common practice.
+EVALUATION_CONFIG = {
+    'data_folder': _BASE_DATA_FOLDER,
+    'test_image_folder': 'val2017', # Typically evaluate on the validation set for final metrics
+    'test_caption_file': os.path.join(_BASE_DATA_FOLDER, 'annotations', 'captions_val2017.json'),
+    'test_subset_size': None, # Evaluate on a subset for faster testing, or None for full validation set
+    'eval_batch_size': 1,     # Must be 1 for accurate beam search evaluation
+    'beam_size': 5,           # Beam size for caption generation during evaluation
+    'max_caption_length': 30,
+    'num_workers': 4,
+    'eval_output_dir': os.path.join(_OUTPUT_DIR, 'evaluation_results'), # Directory to save evaluation results JSONs
+    'output_dir': _OUTPUT_DIR,
+    # Placeholder for model path. This will be updated dynamically after training or
+    # can be set manually if you have a pre-trained model.
+    'model_path': os.path.join(_MODELS_DIR, 'best_model_bleu0.1058.pth') # Placeholder, update after training
+}
+# --- Configuration for Inference Example ---
+INFERENCE_CONFIG = {
+    'beam_size': 5,
+    'max_caption_length': 30,
+    # Placeholder for model path. This will be updated dynamically after training or
+    # can be set manually if you have a pre-trained model.
+    'model_path': os.path.join(_MODELS_DIR, 'models/best_model_bleu0.1058.pth'), # Placeholder, update after training
+    # Path to an example image for quick inference demonstration
+    'example_image_path': os.path.join(_BASE_DATA_FOLDER, 'new_one.jpg') # Example image from COCO val2017
+}
+# --- Utility Functions for updating config with latest trained model ---
+def update_config_with_latest_model(config_dict):
+    """
+    Finds the latest best model checkpoint in the models directory and updates
+    the given configuration dictionary's 'model_path'.
+    """
+    saved_models = [f for f in os.listdir(_MODELS_DIR) if f.startswith('best_model_bleu') and f.endswith('.pth')]
+    if saved_models:
+        # Get the one with the highest BLEU score in its name
+        latest_model_name = max(saved_models, key=lambda f: float(f.split('bleu')[1].replace('.pth', '')))
+        latest_model_path = os.path.join(_MODELS_DIR, latest_model_name)
+        config_dict['model_path'] = latest_model_path
+        print(f"Updated config with latest model: {latest_model_path}")
+    else:
+        print(f"Warning: No best model found in '{_MODELS_DIR}'. Inference/Evaluation might fail.")
+# Update inference and evaluation configs to point to the latest model if available
+update_config_with_latest_model(EVALUATION_CONFIG)
+update_config_with_latest_model(INFERENCE_CONFIG)

src/data_preprocessing.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import os
+import json
+import pickle
+import random
+from collections import Counter
+from tqdm import tqdm
+from PIL import Image
+import torch
+from torch.utils.data import Dataset
+import torchvision.transforms as transforms
+from .utils import get_logger, get_eval_transform # Import logger and transforms from utils
+logger = get_logger(__name__)
+class COCOVocabulary:
+    """
+    Vocabulary builder for COCO captions.
+    Handles tokenization, building word-to-index and index-to-word mappings,
+    and converting captions to numerical indices.
+    """
+    def __init__(self, min_word_freq=5):
+        """
+        Initializes the COCOVocabulary.
+        Args:
+            min_word_freq (int): Minimum frequency for a word to be included in the vocabulary.
+                                 Words less frequent than this will be replaced by <UNK>.
+        """
+        self.min_word_freq = min_word_freq
+        self.word2idx = {} # Maps words to their numerical indices
+        self.idx2word = {} # Maps numerical indices back to words
+        self.word_freq = Counter() # Counts frequency of each word
+        self.vocab_size = 0 # Total number of unique words in the vocabulary
+    def build_vocabulary(self, captions):
+        """
+        Builds the vocabulary from a list of captions.
+        Args:
+            captions (list): A list of strings, where each string is a caption.
+        """
+        logger.info("Building vocabulary...")
+        # 1. Count word frequencies
+        for caption in tqdm(captions, desc="Counting word frequencies"):
+            tokens = self.tokenize(caption)
+            self.word_freq.update(tokens)
+        # 2. Add special tokens
+        special_tokens = ['<PAD>', '<START>', '<END>', '<UNK>']
+        for token in special_tokens:
+            if token not in self.word2idx: # Avoid re-adding if already present
+                self.word2idx[token] = len(self.word2idx)
+                self.idx2word[len(self.idx2word)] = token
+        # 3. Add words that meet the minimum frequency threshold
+        for word, freq in self.word_freq.items():
+            if freq >= self.min_word_freq:
+                if word not in self.word2idx: # Avoid re-adding words if they are special tokens
+                    self.word2idx[word] = len(self.word2idx)
+                    self.idx2word[len(self.idx2word)] = word
+        self.vocab_size = len(self.word2idx)
+        logger.info(f"Vocabulary built successfully. Size: {self.vocab_size}")
+    def tokenize(self, caption):
+        """
+        Simple tokenization: convert to lowercase, strip leading/trailing spaces,
+        and split by space. Normalizes multiple spaces.
+        Args:
+            caption (str): The input caption string.
+        Returns:
+            list: A list of tokenized words.
+        """
+        caption = caption.lower().strip()
+        # Normalize multiple spaces into a single space
+        caption = ' '.join(caption.split())
+        tokens = caption.split()
+        return tokens
+    def caption_to_indices(self, caption, max_length=20):
+        """
+        Converts a caption string into a list of numerical indices.
+        Adds <START> and <END> tokens and pads with <PAD> up to max_length.
+        Args:
+            caption (str): The input caption string.
+            max_length (int): The maximum desired length for the indexed caption.
+        Returns:
+            list: A list of integer indices representing the caption.
+        """
+        tokens = self.tokenize(caption)
+        indices = [self.word2idx['<START>']] # Start with the <START> token
+        for token in tokens:
+            if len(indices) >= max_length - 1: # Reserve space for <END>
+                break
+            idx = self.word2idx.get(token, self.word2idx['<UNK>']) # Use <UNK> for unknown words
+            indices.append(idx)
+        indices.append(self.word2idx['<END>']) # End with the <END> token
+        # Pad with <PAD> tokens if the caption is shorter than max_length
+        while len(indices) < max_length:
+            indices.append(self.word2idx['<PAD>'])
+        return indices[:max_length] # Ensure the caption does not exceed max_length
+    def indices_to_caption(self, indices):
+        """
+        Converts a list of numerical indices back into a human-readable caption string.
+        Stops at <END> token and ignores <PAD> and <START> tokens.
+        Args:
+            indices (list or numpy.ndarray): A list or array of integer indices.
+        Returns:
+            str: The reconstructed caption string.
+        """
+        words = []
+        for idx in indices:
+            word = self.idx2word.get(idx, '<UNK>') # Get word, default to <UNK>
+            if word == '<END>':
+                break # Stop decoding when <END> token is encountered
+            if word not in ['<PAD>', '<START>']: # Ignore special tokens
+                words.append(word)
+        return ' '.join(words)
+class COCODataset(Dataset):
+    """
+    PyTorch Dataset for COCO Image Captioning.
+    Loads image paths and their corresponding captions,
+    and returns preprocessed image tensors and indexed caption tensors.
+    """
+    def __init__(self, image_dir, caption_file, vocabulary=None,
+                 max_caption_length=20, subset_size=None, transform=None):
+        """
+        Initializes the COCODataset.
+        Args:
+            image_dir (str): Path to the directory containing COCO images (e.g., 'train2017', 'val2017').
+            caption_file (str): Path to the COCO captions JSON file (e.g., 'captions_train2017.json').
+            vocabulary (COCOVocabulary, optional): A pre-built COCOVocabulary object. If None,
+                                                   a new vocabulary will be built from the captions.
+            max_caption_length (int): Maximum length for indexed captions.
+            subset_size (int, optional): If specified, uses a random subset of this size from the dataset.
+            transform (torchvision.transforms.Compose, optional): Image transformations to apply.
+        """
+        self.image_dir = image_dir
+        self.max_caption_length = max_caption_length
+        self.transform = transform if transform is not None else get_eval_transform() # Default transform
+        try:
+            with open(caption_file, 'r') as f:
+                self.coco_data = json.load(f)
+            logger.info(f"Successfully loaded captions from {caption_file}")
+        except FileNotFoundError:
+            logger.error(f"Caption file not found at {caption_file}. Please check the path.")
+            raise
+        except json.JSONDecodeError:
+            logger.error(f"Error decoding JSON from {caption_file}. Ensure it's a valid JSON file.")
+            raise
+        # Create a mapping from image ID to its filename for quick lookup
+        self.id_to_filename = {img_info['id']: img_info['file_name'] for img_info in self.coco_data['images']}
+        self.data = [] # Stores (image_path, caption, image_id) tuples
+        missing_image_files = 0
+        # Process annotations to pair image paths with captions
+        for ann in tqdm(self.coco_data['annotations'], desc="Processing annotations"):
+            image_id = ann['image_id']
+            if image_id in self.id_to_filename:
+                caption = ann['caption']
+                filename = self.id_to_filename[image_id]
+                image_full_path = os.path.join(image_dir, filename)
+                if os.path.exists(image_full_path):
+                    self.data.append({
+                        'image_path': image_full_path,
+                        'caption': caption,
+                        'image_id': image_id # Store original image_id for evaluation
+                    })
+                else:
+                    missing_image_files += 1
+                    # logger.warning(f"Image file not found: {image_full_path}. Skipping this annotation.")
+            else:
+                logger.warning(f"Image ID {image_id} not found in images list. Skipping annotation.")
+        if missing_image_files > 0:
+            logger.warning(f"Skipped {missing_image_files} annotations due to missing image files. "
+                           "Please ensure all images are in the specified directory.")
+        # If subset_size is specified, take a random sample
+        if subset_size and subset_size < len(self.data):
+            self.data = random.sample(self.data, subset_size)
+            logger.info(f"Using subset of {subset_size} samples for the dataset.")
+        logger.info(f"Dataset size after filtering: {len(self.data)} samples.")
+        # Build vocabulary if not provided
+        if vocabulary is None:
+            self.vocabulary = COCOVocabulary()
+            captions_for_vocab = [item['caption'] for item in self.data]
+            self.vocabulary.build_vocabulary(captions_for_vocab)
+        else:
+            self.vocabulary = vocabulary
+    def __len__(self):
+        """Returns the total number of samples in the dataset."""
+        return len(self.data)
+    def __getitem__(self, idx):
+        """
+        Retrieves an item from the dataset at the given index.
+        Returns:
+            tuple: (image_tensor, caption_tensor, caption_length, image_id)
+        """
+        item = self.data[idx]
+        # Load and transform image
+        try:
+            image = Image.open(item['image_path']).convert('RGB')
+            if self.transform:
+                image = self.transform(image)
+        except Exception as e:
+            logger.error(f"Error loading image {item['image_path']}: {e}. Returning a black image as fallback.")
+            # Return a black image tensor of expected size (3, 224, 224) if image loading fails
+            image = torch.zeros(3, 224, 224)
+        # Convert caption to indices
+        caption_indices = self.vocabulary.caption_to_indices(
+            item['caption'], self.max_caption_length
+        )
+        caption_tensor = torch.tensor(caption_indices, dtype=torch.long)
+        # Calculate actual length of the caption (excluding padding, including START/END)
+        try:
+            # Find the index of <END> token, length is (index + 1)
+            end_idx = caption_indices.index(self.vocabulary.word2idx['<END>'])
+            caption_length = end_idx + 1
+        except ValueError:
+            # If <END> not found (shouldn't happen with proper max_caption_length),
+            # count non-PAD tokens.
+            caption_length = len([idx for idx in caption_indices if idx != self.vocabulary.word2idx['<PAD>']])
+        caption_length = torch.tensor(caption_length, dtype=torch.long)
+        # Return image tensor, caption tensor, actual caption length, and original image ID
+        return image, caption_tensor, caption_length, item['image_id']

src/evaluation.py ADDED Viewed

	@@ -0,0 +1,691 @@

+import os
+import time
+import json
+import numpy as np
+from tqdm import tqdm
+import torch
+from torch.utils.data import DataLoader
+import math # For perplexity
+import random
+from .config import EVALUATION_CONFIG, update_config_with_latest_model
+from .data_preprocessing import COCOVocabulary
+# Import necessary NLTK components for BLEU, METEOR
+try:
+    import nltk
+    from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
+    from nltk.translate.meteor_score import meteor_score
+    from nltk.tokenize import word_tokenize
+    # Suppress NLTK download messages if already downloaded
+    nltk.download('punkt', quiet=True)
+    nltk.download('wordnet', quiet=True)
+except ImportError:
+    print("NLTK not installed or data not downloaded. BLEU/METEOR scores will be skipped.")
+    print("Please install NLTK (`pip install nltk`) and download data (`python -c \"import nltk; nltk.download('punkt'); nltk.download('wordnet')\"`)")
+    corpus_bleu = None
+    meteor_score = None
+    word_tokenize = None
+    SmoothingFunction = None
+# Import ROUGE
+try:
+    from rouge_score import rouge_scorer
+except ImportError:
+    print("rouge-score not installed. ROUGE-L score will be skipped.")
+    print("Please install it: `pip install rouge-score`")
+    rouge_scorer = None
+# Import pycocotools and pycocoevalcap for CIDEr
+try:
+    from pycocotools.coco import COCO
+    from pycocoevalcap.eval import COCOEvalCap
+    import tempfile
+except ImportError:
+    print("pycocotools or pycocoevalcap not installed. CIDEr score will be skipped.")
+    print("Please install: `pip install pycocotools` and `pip install git+https://github.com/salaniz/pycocoevalcap.git`")
+    COCO = None
+    COCOEvalCap = None
+    tempfile = None
+from .model import ImageCaptioningModel # Import the model
+from .data_preprocessing import COCODataset # Import dataset
+from .utils import get_logger, get_eval_transform # Import utilities
+logger = get_logger(__name__)
+def calculate_bleu_scores_detailed(references, hypotheses):
+    """
+    Calculates detailed BLEU scores (BLEU-1 to BLEU-4) for a corpus.
+    Args:
+        references (list of str): List of reference captions. Each reference is a single string.
+        hypotheses (list of str): List of hypothesis (generated) captions. Each hypothesis is a single string.
+    Returns:
+        dict: A dictionary containing BLEU-1, BLEU-2, BLEU-3, BLEU-4 scores.
+              Returns zeros if NLTK is not available or an error occurs.
+    """
+    if corpus_bleu is None or word_tokenize is None or SmoothingFunction is None:
+        logger.error("NLTK requirements for BLEU not met. Returning 0 for BLEU scores.")
+        return {'BLEU-1': 0, 'BLEU-2': 0, 'BLEU-3': 0, 'BLEU-4': 0}
+    try:
+        # Each reference is a list of ONE tokenized sentence (as `corpus_bleu` expects multiple references per hypothesis)
+        # We assume one reference per image for simplicity.
+        ref_tokens = [[word_tokenize(ref.lower())] for ref in references]
+        hyp_tokens = [word_tokenize(hyp.lower()) for hyp in hypotheses]
+        # Use smoothing function for better BLEU calculation, especially for short sentences or small test sets
+        smooth = SmoothingFunction().method1
+        # Calculate corpus-level BLEU scores for different n-grams
+        bleu_1 = corpus_bleu(ref_tokens, hyp_tokens, weights=(1, 0, 0, 0), smoothing_function=smooth)
+        bleu_2 = corpus_bleu(ref_tokens, hyp_tokens, weights=(0.5, 0.5, 0, 0), smoothing_function=smooth)
+        bleu_3 = corpus_bleu(ref_tokens, hyp_tokens, weights=(0.33, 0.33, 0.33, 0), smoothing_function=smooth)
+        bleu_4 = corpus_bleu(ref_tokens, hyp_tokens, weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=smooth)
+        return {
+            'BLEU-1': bleu_1,
+            'BLEU-2': bleu_2,
+            'BLEU-3': bleu_3,
+            'BLEU-4': bleu_4
+        }
+    except Exception as e:
+        logger.error(f"Error calculating BLEU scores: {e}")
+        return {'BLEU-1': 0, 'BLEU-2': 0, 'BLEU-3': 0, 'BLEU-4': 0}
+def calculate_meteor_score(references, hypotheses):
+    """
+    Calculates the METEOR score for a corpus.
+    Args:
+        references (list of str): List of reference captions.
+        hypotheses (list of str): List of hypothesis (generated) captions.
+    Returns:
+        float: Average METEOR score, or None if NLTK/WordNet not available.
+    """
+    if meteor_score is None or word_tokenize is None:
+        logger.error("NLTK requirements for METEOR (wordnet) not met. Returning None for METEOR score.")
+        return None
+    scores = []
+    try:
+        for ref, hyp in zip(references, hypotheses):
+            ref_tokens = word_tokenize(ref.lower())
+            hyp_tokens = word_tokenize(hyp.lower())
+            # meteor_score expects a list of reference sentences (even if only one)
+            score = meteor_score([ref_tokens], hyp_tokens)
+            scores.append(score)
+        return np.mean(scores) if scores else 0.0
+    except Exception as e:
+        logger.error(f"Error calculating METEOR score: {e}")
+        return None
+def calculate_rouge_l_score(references, hypotheses):
+    """
+    Calculates the ROUGE-L F-measure score for a corpus.
+    Args:
+        references (list of str): List of reference captions.
+        hypotheses (list of str): List of hypothesis (generated) captions.
+    Returns:
+        float: Average ROUGE-L score, or None if rouge-score library not available.
+    """
+    if rouge_scorer is None:
+        logger.error("rouge-score library not available. Returning None for ROUGE-L score.")
+        return None
+    try:
+        # Use 'rougeL' for Longest Common Subsequence based ROUGE
+        scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)
+        scores = []
+        for ref, hyp in zip(references, hypotheses):
+            score = scorer.score(ref, hyp)
+            scores.append(score['rougeL'].fmeasure) # We are interested in the F-measure
+        return np.mean(scores) if scores else 0.0
+    except Exception as e:
+        logger.error(f"Error calculating ROUGE-L score: {e}")
+        return None
+def calculate_cider_score(references, hypotheses):
+    """
+    Calculates the CIDEr score using pycocoevalcap library.
+    Requires pycocotools and pycocoevalcap to be installed.
+    Args:
+        references (list of str): List of reference captions.
+        hypotheses (list of str): List of hypothesis (generated) captions.
+    Returns:
+        float: CIDEr score, or None if pycocotools/pycocoevalcap not available.
+    """
+    if COCO is None or COCOEvalCap is None or tempfile is None:
+        logger.error("pycocotools or pycocoevalcap not available. Returning None for CIDEr score.")
+        return None
+    try:
+        # pycocoevalcap requires data in a specific COCO format
+        # Create dummy image IDs for the COCO objects
+        dummy_image_ids = list(range(len(references)))
+        # Format references for COCO
+        refs_coco_format = []
+        for i, ref_str in enumerate(references):
+            refs_coco_format.append({"image_id": dummy_image_ids[i], "id": i, "caption": ref_str})
+        # Format hypotheses for COCO
+        hyps_coco_format = []
+        for i, hyp_str in enumerate(hypotheses):
+            hyps_coco_format.append({"image_id": dummy_image_ids[i], "id": i, "caption": hyp_str})
+        # Save to temporary JSON files as required by COCO/COCOEvalCap
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f_ref:
+            # Need to create a minimal COCO-like structure for references
+            json.dump({"annotations": refs_coco_format, "images": [{"id": i} for i in dummy_image_ids]}, f_ref)
+            ref_path = f_ref.name
+        with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as f_hyp:
+            json.dump(hyps_coco_format, f_hyp)
+            hyp_path = f_hyp.name
+        # Initialize COCO and COCOEvalCap objects
+        coco = COCO(ref_path)
+        cocoRes = coco.loadRes(hyp_path)
+        cocoEval = COCOEvalCap(coco, cocoRes)
+        cocoEval.params['image_id'] = cocoRes.getImgIds() # Specify images to evaluate
+        cocoEval.evaluate() # Perform evaluation
+        # Clean up temporary files
+        os.remove(ref_path)
+        os.remove(hyp_path)
+        return cocoEval.eval['CIDEr'] # CIDEr score is directly available
+    except Exception as e:
+        logger.error(f"Error calculating CIDEr score: {e}")
+        return None
+def calculate_length_statistics(generated_captions, reference_captions):
+    """
+    Calculates statistics related to caption lengths.
+    Args:
+        generated_captions (list of str): List of generated captions.
+        reference_captions (list of str): List of reference captions.
+    Returns:
+        dict: Dictionary containing average, std dev, and difference in lengths.
+    """
+    gen_lengths = [len(cap.split()) for cap in generated_captions]
+    ref_lengths = [len(cap.split()) for cap in reference_captions]
+    return {
+        'avg_generated_length': np.mean(gen_lengths) if gen_lengths else 0,
+        'avg_reference_length': np.mean(ref_lengths) if ref_lengths else 0,
+        'length_difference': (np.mean(gen_lengths) - np.mean(ref_lengths)) if gen_lengths and ref_lengths else 0,
+        'length_std_generated': np.std(gen_lengths) if gen_lengths else 0,
+        'length_std_reference': np.std(ref_lengths) if ref_lengths else 0
+    }
+def calculate_vocabulary_statistics(generated_captions, vocabulary):
+    """
+    Calculates vocabulary usage statistics for generated captions.
+    Args:
+        generated_captions (list of str): List of generated captions.
+        vocabulary (COCOVocabulary): The vocabulary object used by the model.
+    Returns:
+        dict: Dictionary with unique word count, vocabulary coverage, etc.
+    """
+    all_words = []
+    from collections import Counter # Import here to avoid circular dependency issues
+    for caption in generated_captions:
+        words = caption.lower().split()
+        all_words.extend(words)
+    unique_words = set(all_words)
+    word_freq = Counter(all_words)
+    return {
+        'unique_words_used': len(unique_words),
+        'total_vocabulary_size': vocabulary.vocab_size,
+        'vocabulary_coverage': len(unique_words) / vocabulary.vocab_size if vocabulary.vocab_size > 0 else 0,
+        'avg_word_frequency': np.mean(list(word_freq.values())) if word_freq else 0,
+        'most_common_generated_words': word_freq.most_common(10)
+    }
+def calculate_diversity_metrics(generated_captions):
+    """
+    Calculates diversity metrics for generated captions, such as Type-Token Ratio (TTR),
+    Self-BLEU, and caption uniqueness.
+    Args:
+        generated_captions (list of str): List of generated captions.
+    Returns:
+        dict: Dictionary containing diversity metrics.
+    """
+    # Type-Token Ratio (TTR)
+    all_words = []
+    from collections import Counter
+    for caption in generated_captions:
+        words = caption.lower().split()
+        all_words.extend(words)
+    unique_words = set(all_words)
+    ttr = len(unique_words) / len(all_words) if all_words else 0
+    # Self-BLEU (diversity measure) - calculate on a subset for efficiency
+    self_bleu = 0
+    try:
+        if corpus_bleu and word_tokenize and SmoothingFunction:
+            smooth = SmoothingFunction().method1
+            self_bleu_scores = []
+            # Sample a subset of generated captions for Self-BLEU to avoid long computation
+            sample_size = min(1000, len(generated_captions))
+            sampled_captions = random.sample(generated_captions, sample_size) if len(generated_captions) > sample_size else generated_captions
+            for i, caption in enumerate(sampled_captions):
+                # References are all other captions in the sample
+                references_for_self_bleu = [[word_tokenize(other_cap.lower())]
+                                            for j, other_cap in enumerate(sampled_captions) if i != j]
+                hypothesis = word_tokenize(caption.lower())
+                if references_for_self_bleu and hypothesis: # Ensure there are references and hypothesis tokens
+                    # Calculate sentence BLEU with other captions as references
+                    score = corpus_bleu(references_for_self_bleu, [hypothesis], smoothing_function=smooth)
+                    self_bleu_scores.append(score)
+            self_bleu = np.mean(self_bleu_scores) if self_bleu_scores else 0
+        else:
+            logger.warning("NLTK not fully available for Self-BLEU calculation. Skipping.")
+    except Exception as e:
+        logger.error(f"Error calculating Self-BLEU: {e}")
+        self_bleu = 0
+    # Caption uniqueness
+    unique_captions = len(set(generated_captions))
+    uniqueness_ratio = unique_captions / len(generated_captions) if len(generated_captions) > 0 else 0
+    return {
+        'type_token_ratio': ttr,
+        'self_bleu': self_bleu,
+        'unique_captions_count': unique_captions,
+        'caption_uniqueness_ratio': uniqueness_ratio
+    }
+def calculate_perplexity(model, data_loader, vocabulary, device):
+    """
+    Calculates the perplexity of the model on a given dataset.
+    Perplexity measures how well a probability model predicts a sample. Lower is better.
+    Args:
+        model (nn.Module): The trained image captioning model.
+        data_loader (DataLoader): DataLoader for the dataset.
+        vocabulary (COCOVocabulary): The vocabulary object.
+        device (torch.device): Device to run calculation on.
+    Returns:
+        float: Perplexity score, or infinity if calculation fails.
+    """
+    model.eval()
+    total_loss = 0
+    total_words = 0
+    # Use CrossEntropyLoss with sum reduction to get the sum of losses over all tokens
+    criterion = torch.nn.CrossEntropyLoss(ignore_index=vocabulary.word2idx['<PAD>'], reduction='sum')
+    with torch.no_grad():
+        for images, captions_from_loader, caption_lengths_from_loader, _ in tqdm(data_loader, desc="Calculating Perplexity"):
+            images = images.to(device)
+            captions_for_model = captions_from_loader.to(device)
+            caption_lengths_for_model = caption_lengths_from_loader.to(device)
+            # Forward pass to get scores
+            scores, caps_sorted, decode_lengths, _, _ = model(images, captions_for_model, caption_lengths_for_model)
+            # Prepare targets: remove <START> token and slice to match the sequence length of 'scores'.
+            # scores are (batch_size, max_decode_len_in_batch, vocab_size)
+            # targets should be (batch_size, max_decode_len_in_batch)
+            targets = caps_sorted[:, 1:scores.size(1) + 1].contiguous().view(-1) # Flatten targets
+            scores_flat = scores.view(-1, scores.size(-1)) # Flatten scores
+            loss = criterion(scores_flat, targets) # Calculate loss for all tokens
+            total_loss += loss.item()
+            # Count non-padded words in the targets that were actually used for loss.
+            num_valid_targets_in_batch = targets.ne(vocabulary.word2idx['<PAD>']).sum().item()
+            total_words += num_valid_targets_in_batch
+    if total_words == 0:
+        logger.warning("No valid words found to calculate perplexity (total_words is 0). Returning inf.")
+        return float('inf')
+    avg_loss_per_word = total_loss / total_words
+    # Perplexity is exp(average negative log-likelihood)
+    try:
+        perplexity = math.exp(avg_loss_per_word)
+    except OverflowError: # Handle cases where avg_loss_per_word is very large, leading to overflow
+        perplexity = float('inf')
+    return perplexity
+def print_evaluation_results(metrics):
+    """
+    Prints the evaluation results in a formatted way.
+    Args:
+        metrics (dict): Dictionary containing all evaluation metrics.
+    """
+    logger.info("\n"+"="*60)
+    logger.info("                      EVALUATION RESULTS")
+    logger.info("="*60)
+    # BLEU Scores
+    if 'BLEU-1' in metrics:
+        logger.info(f"\nBLEU Scores:")
+        logger.info(f"  BLEU-1: {metrics['BLEU-1']:.4f}")
+        logger.info(f"  BLEU-2: {metrics['BLEU-2']:.4f}")
+        logger.info(f"  BLEU-3: {metrics['BLEU-3']:.4f}")
+        logger.info(f"  BLEU-4: {metrics['BLEU-4']:.4f}")
+    # Other metrics
+    if 'METEOR' in metrics and metrics['METEOR'] is not None:
+        logger.info(f"\nMETEOR Score: {metrics['METEOR']:.4f}")
+    if 'ROUGE-L' in metrics and metrics['ROUGE-L'] is not None:
+        logger.info(f"ROUGE-L Score: {metrics['ROUGE-L']:.4f}")
+    if 'CIDEr' in metrics and metrics['CIDEr'] is not None:
+        logger.info(f"CIDEr Score: {metrics['CIDEr']:.4f}")
+    if 'Perplexity' in metrics and metrics['Perplexity'] is not None:
+        logger.info(f"Perplexity: {metrics['Perplexity']:.2f}")
+    # Length Statistics
+    logger.info(f"\nLength Statistics:")
+    logger.info(f"  Avg Generated Length: {metrics.get('avg_generated_length', 0):.2f}")
+    logger.info(f"  Avg Reference Length: {metrics.get('avg_reference_length', 0):.2f}")
+    logger.info(f"  Length Difference (Gen - Ref): {metrics.get('length_difference', 0):.2f}")
+    logger.info(f"  Std Dev Generated Length: {metrics.get('length_std_generated', 0):.2f}")
+    logger.info(f"  Std Dev Reference Length: {metrics.get('length_std_reference', 0):.2f}")
+    # Diversity Metrics
+    logger.info(f"\nDiversity Metrics:")
+    logger.info(f"  Type-Token Ratio: {metrics.get('type_token_ratio', 0):.4f}")
+    logger.info(f"  Caption Uniqueness Ratio: {metrics.get('caption_uniqueness_ratio', 0):.4f}")
+    logger.info(f"  Self-BLEU (Higher is lower diversity): {metrics.get('self_bleu', 0):.4f}")
+    logger.info(f"  Unique Captions Count: {metrics.get('unique_captions_count', 0)}")
+    # Vocabulary Usage
+    logger.info(f"\nVocabulary Usage:")
+    logger.info(f"  Unique Words Used in Generated Captions: {metrics.get('unique_words_used', 0)}")
+    logger.info(f"  Vocabulary Coverage (Used / Total): {metrics.get('vocabulary_coverage', 0):.4f}")
+    if 'most_common_generated_words' in metrics:
+        logger.info(f"  Most Common Generated Words: {metrics['most_common_generated_words']}")
+    logger.info(f"\nEvaluation Info:")
+    eval_info = metrics.get('evaluation_info', {})
+    logger.info(f"  Total Samples Evaluated: {eval_info.get('total_samples', 0)}")
+    logger.info(f"  Evaluation Time: {eval_info.get('evaluation_time_seconds', 0):.2f}s")
+    logger.info(f"  Test Data Path: {eval_info.get('test_data_path', 'N/A')}")
+    logger.info(f"  Image Directory Used: {eval_info.get('image_dir_used', 'N/A')}")
+    logger.info(f"  Device: {eval_info.get('device', 'unknown')}")
+    logger.info(f"  Model Architecture: {eval_info.get('model_architecture', 'N/A')}")
+    logger.info("="*60)
+def save_evaluation_results(metrics, generated_captions, reference_captions, image_ids, output_dir='evaluation_results'):
+    """
+    Saves detailed evaluation results to JSON files.
+    Args:
+        metrics (dict): Dictionary containing all evaluation metrics.
+        generated_captions (list of str): List of generated captions.
+        reference_captions (list of str): List of reference captions.
+        image_ids (list): List of original image IDs corresponding to captions.
+        output_dir (str): Directory to save the results.
+    """
+    os.makedirs(output_dir, exist_ok=True) # Ensure output directory exists
+    # Save metrics
+    metrics_path = os.path.join(output_dir, 'metrics.json')
+    # Convert numpy types to Python types for JSON serialization
+    serializable_metrics = {}
+    for key, value in metrics.items():
+        if isinstance(value, (np.float32, np.float64)):
+            serializable_metrics[key] = float(value)
+        elif isinstance(value, (np.int32, np.int64)):
+            serializable_metrics[key] = int(value)
+        else:
+            serializable_metrics[key] = value
+    with open(metrics_path, 'w') as f:
+        json.dump(serializable_metrics, f, indent=2)
+    # Save generated captions and their references along with image_ids
+    captions_data = []
+    for img_id, gen_cap, ref_cap in zip(image_ids, generated_captions, reference_captions):
+        captions_data.append({
+            'image_id': img_id,
+            'generated_caption': gen_cap,
+            'reference_caption': ref_cap
+        })
+    captions_path = os.path.join(output_dir, 'captions.json')
+    with open(captions_path, 'w') as f:
+        json.dump(captions_data, f, indent=2)
+    logger.info(f"\nDetailed evaluation results saved to: {output_dir}/")
+    logger.info(f"Metrics saved to: {metrics_path}")
+    logger.info(f"Captions saved to: {captions_path}")
+def perform_evaluation(model, vocabulary, test_config):
+    """
+    Performs comprehensive evaluation of the image captioning model on a test set.
+    Args:
+        model (nn.Module): The trained image captioning model.
+        vocabulary (COCOVocabulary): The vocabulary object used by the model.
+        test_config (dict): Configuration dictionary for evaluation.
+    Returns:
+        dict: Dictionary containing all evaluation metrics.
+    """
+    logger.info("Starting comprehensive model evaluation...")
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    model.to(device)
+    model.eval() # Set model to evaluation mode
+    logger.info(f"Model set to evaluation mode on device: {device}")
+    # Data paths for evaluation from config
+    data_folder = test_config['data_folder']
+    test_image_folder = test_config['test_image_folder']
+    test_caption_file = test_config['test_caption_file']
+    if not os.path.exists(test_caption_file):
+        raise FileNotFoundError(f"Test caption file not found: {test_caption_file}")
+    # Construct the correct image directory path for evaluation
+    image_dir_for_eval = os.path.join(data_folder, test_image_folder)
+    if not os.path.exists(image_dir_for_eval):
+        logger.error(f"Image directory for evaluation not found: {image_dir_for_eval}")
+        logger.error("Please ensure COCO images are extracted to the correct path.")
+        return {'error': f'Image directory not found: {image_dir_for_eval}'}
+    logger.info(f"Attempting to load evaluation images from directory: {image_dir_for_eval}")
+    # Create test dataset
+    test_dataset = COCODataset(
+        image_dir=image_dir_for_eval,
+        caption_file=test_caption_file,
+        vocabulary=vocabulary, # Use the vocabulary from training
+        max_caption_length=test_config.get('max_caption_length', 20),
+        subset_size=test_config.get('test_subset_size'),
+        transform=get_eval_transform() # Use standard eval transform
+    )
+    test_loader = DataLoader(
+        test_dataset,
+        batch_size=test_config.get('eval_batch_size', 1), # Batch size 1 is crucial for beam search
+        shuffle=False, # Do not shuffle test data
+        num_workers=test_config.get('num_workers', 2),
+        pin_memory=True
+    )
+    logger.info(f"Test dataset size: {len(test_dataset)}")
+    if len(test_dataset) == 0:
+        logger.warning("Test dataset is empty. Evaluation will not produce meaningful results.")
+        return {'error': 'Test dataset is empty', 'image_dir_checked': image_dir_for_eval}
+    # Generate captions for all test images
+    logger.info("Generating captions for evaluation set...")
+    generated_captions_list = []
+    reference_captions_list = []
+    image_ids_list = [] # To store original image IDs for mapping
+    eval_start_time = time.time()
+    with torch.no_grad(): # Disable gradient calculations
+        for i, (images, caption_indices_batch, _, original_image_ids_batch) in enumerate(tqdm(test_loader, desc="Generating captions")):
+            images = images.to(device)
+            for j in range(images.size(0)): # Iterate through batch (should be size 1 if eval_batch_size=1)
+                image_tensor_single = images[j] # Get single image tensor from batch
+                # Generate caption using the model's beam search method
+                generated_caption = model.generate_caption(
+                    image_tensor_single, vocabulary, device,
+                    beam_size=test_config.get('beam_size', 5),
+                    max_length=test_config.get('max_caption_length', 20)
+                )
+                # Convert reference caption indices back to string
+                reference_caption_str = vocabulary.indices_to_caption(caption_indices_batch[j].cpu().numpy())
+                generated_captions_list.append(generated_caption)
+                reference_captions_list.append(reference_caption_str)
+                # Ensure image_id is a string or compatible type for JSON serialization
+                image_ids_list.append(str(original_image_ids_batch[j].item()))
+    eval_time = time.time() - eval_start_time
+    logger.info(f"Caption generation completed in {eval_time:.2f} seconds for {len(generated_captions_list)} images.")
+    if not generated_captions_list or not reference_captions_list:
+        logger.error("No captions were generated or no reference captions were loaded. Cannot calculate metrics.")
+        return {'error': 'No generated or reference captions available for metric calculation.'}
+    # Calculate evaluation metrics
+    logger.info("Calculating evaluation metrics...")
+    metrics = {}
+    # Calculate standard metrics
+    bleu_scores = calculate_bleu_scores_detailed(reference_captions_list, generated_captions_list)
+    metrics.update(bleu_scores)
+    meteor_score_val = calculate_meteor_score(reference_captions_list, generated_captions_list)
+    if meteor_score_val is not None:
+        metrics['METEOR'] = meteor_score_val
+    rouge_score_val = calculate_rouge_l_score(reference_captions_list, generated_captions_list)
+    if rouge_score_val is not None:
+        metrics['ROUGE-L'] = rouge_score_val
+    cider_score_val = calculate_cider_score(reference_captions_list, generated_captions_list)
+    if cider_score_val is not None:
+        metrics['CIDEr'] = cider_score_val
+    # Calculate length and diversity statistics
+    length_stats = calculate_length_statistics(generated_captions_list, reference_captions_list)
+    metrics.update(length_stats)
+    vocab_stats = calculate_vocabulary_statistics(generated_captions_list, vocabulary)
+    metrics.update(vocab_stats)
+    diversity_stats = calculate_diversity_metrics(generated_captions_list)
+    metrics.update(diversity_stats)
+    # Calculate perplexity
+    try:
+        perplexity = calculate_perplexity(model, test_loader, vocabulary, device)
+        metrics['Perplexity'] = perplexity
+    except Exception as e:
+        logger.error(f"Could not calculate perplexity: {e}")
+    # Add meta information about the evaluation run
+    metrics['evaluation_info'] = {
+        'total_samples': len(generated_captions_list),
+        'evaluation_time_seconds': eval_time,
+        'test_data_path': test_caption_file,
+        'image_dir_used': image_dir_for_eval,
+        'device': str(device),
+        'model_architecture': 'ResNet50 Encoder + LSTM Decoder with Attention',
+        'beam_size_for_inference': test_config.get('beam_size', 5),
+        'max_caption_length_for_inference': test_config.get('max_caption_length', 20)
+    }
+    # Print and save results
+    print_evaluation_results(metrics)
+    save_evaluation_results(metrics, generated_captions_list, reference_captions_list, image_ids_list, output_dir=test_config.get('eval_output_dir', 'output/evaluation_results'))
+    return metrics
+if __name__ == '__main__':
+    # When `evaluation.py` is run directly, it will perform evaluation.
+    from .config import EVALUATION_CONFIG, update_config_with_latest_model
+    import pickle # For loading vocabulary
+    logger.info("Starting model evaluation process...")
+    # Load the vocabulary first
+    VOCABULARY_FILE_PATH = 'output/vocabulary.pkl' # Path to the vocabulary file
+    if not os.path.exists(VOCABULARY_FILE_PATH):
+        logger.error(f"Vocabulary not found at {VOCABULARY_FILE_PATH}. Please train the model first or provide a pre-trained vocabulary.")
+        exit() # Exit if vocabulary is not found
+    try:
+        with open(VOCABULARY_FILE_PATH, 'rb') as f:
+            vocabulary = pickle.load(f)
+        logger.info(f"Loaded vocabulary from {VOCABULARY_FILE_PATH}")
+    except Exception as e:
+        logger.error(f"Error loading vocabulary from {VOCABULARY_FILE_PATH}: {e}")
+        exit()
+    # Update evaluation config to point to the latest trained model
+    update_config_with_latest_model(EVALUATION_CONFIG)
+    model_path = EVALUATION_CONFIG.get('model_path')
+    if not model_path or not os.path.exists(model_path):
+        logger.error(f"Model checkpoint not found at {model_path}. Please train the model or specify a valid model_path in config.py.")
+        exit()
+    try:
+        # Load the model state dict and config from the checkpoint
+        checkpoint = torch.load(model_path, map_location='cpu')
+        model_config_from_checkpoint = checkpoint.get('config', {})
+        # Initialize model with parameters from checkpoint config (or defaults if missing)
+        eval_model = ImageCaptioningModel(
+            vocab_size=vocabulary.vocab_size, # Use the loaded vocabulary's size
+            embed_dim=model_config_from_checkpoint.get('embed_dim', 256),
+            attention_dim=model_config_from_checkpoint.get('attention_dim', 256),
+            decoder_dim=model_config_from_checkpoint.get('decoder_dim', 256),
+            dropout=0.0, # No dropout during evaluation
+            fine_tune_encoder=False, # No fine-tuning during evaluation
+            max_caption_length=model_config_from_checkpoint.get('max_caption_length', 20)
+        )
+        eval_model.load_state_dict(checkpoint['model_state_dict'])
+        logger.info(f"Model loaded successfully from {model_path} for evaluation.")
+        # Perform the comprehensive evaluation
+        eval_metrics = perform_evaluation(eval_model, vocabulary, EVALUATION_CONFIG)
+        logger.info("Model Evaluation Complete!")
+    except FileNotFoundError as e:
+        logger.error(f"Error during evaluation setup: {e}")
+        logger.error("Please ensure the model path and data paths are correct.")
+    except Exception as e:
+        logger.critical(f"An unexpected error occurred during evaluation: {e}", exc_info=True)

src/inference_api.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import torch
+from PIL import Image
+# Import core model components and utilities using ABSOLUTE IMPORTS
+# Since 'src' is added to sys.path, we refer to modules directly under 'src'.
+from src.model import ImageCaptioningModel
+from src.data_preprocessing import COCOVocabulary
+from src.utils import get_logger, get_eval_transform
+from src.config import INFERENCE_CONFIG, update_config_with_latest_model # Import global config
+logger = get_logger(__name__)
+# --- Global variables to store the loaded model and vocabulary ---
+# These will be loaded once when this module is first imported.
+_model = None
+_vocabulary = None
+_device = None
+_transform = None
+def _load_model_and_vocabulary():
+    """
+    Loads the image captioning model and vocabulary.
+    This function should be called only once during application startup.
+    """
+    global _model, _vocabulary, _device, _transform
+    if _model is not None:
+        logger.info("Model and vocabulary already loaded.")
+        return
+    logger.info("Initializing model and vocabulary for web inference...")
+    # Update INFERENCE_CONFIG with the path to the latest best model
+    # This ensures the web app uses the correct trained model.
+    update_config_with_latest_model(INFERENCE_CONFIG)
+    model_path = INFERENCE_CONFIG['model_path']
+    example_image_path = INFERENCE_CONFIG['example_image_path'] # Not directly used for inference, but useful for context
+    if not os.path.exists(model_path):
+        logger.error(f"Model checkpoint not found at {model_path}. "
+                     "Please ensure the model is trained and saved.")
+        raise FileNotFoundError(f"Model checkpoint not found: {model_path}")
+    try:
+        # Load the complete checkpoint (model state, vocabulary, config)
+        checkpoint = torch.load(model_path, map_location='cpu', weights_only=False)
+        # Extract configuration and vocabulary from the checkpoint
+        model_config_from_checkpoint = checkpoint.get('config', {})
+        _vocabulary = checkpoint['vocabulary']
+        # Initialize the model structure with parameters saved in the checkpoint
+        _model = ImageCaptioningModel(
+            vocab_size=_vocabulary.vocab_size,
+            embed_dim=model_config_from_checkpoint.get('embed_dim', 256),
+            attention_dim=model_config_from_checkpoint.get('attention_dim', 256),
+            decoder_dim=model_config_from_checkpoint.get('decoder_dim', 256),
+            dropout=0.0, # Dropout should be off during inference
+            fine_tune_encoder=False, # Encoder should not be fine-tuned during inference
+            max_caption_length=INFERENCE_CONFIG.get('max_caption_length', 20)
+        )
+        # Load the trained weights into the model
+        _model.load_state_dict(checkpoint['model_state_dict'])
+        _model.eval() # Set the model to evaluation mode (important for batch norm, dropout)
+        # Determine the device to run inference on
+        _device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        _model = _model.to(_device) # Move model to GPU if available
+        logger.info(f"Model loaded successfully on device: {_device}")
+        # Get the image transformation pipeline for evaluation/inference
+        _transform = get_eval_transform()
+        logger.info("Model and vocabulary are ready for inference.")
+    except Exception as e:
+        logger.critical(f"Failed to load model or vocabulary: {e}", exc_info=True)
+        # Reraise the exception to prevent the Flask app from starting without the model
+        raise
+# Call the loading function immediately when this module is imported
+# This ensures the model is loaded only once when the Flask app starts
+_load_model_and_vocabulary()
+def generate_caption_for_image(image_path: str) -> str:
+    """
+    Generates a caption for a given image path using the pre-loaded model.
+    Args:
+        image_path (str): The full path to the image file.
+    Returns:
+        str: The generated caption.
+    Raises:
+        FileNotFoundError: If the image file does not exist.
+        Exception: For errors during image loading or caption generation.
+    """
+    if _model is None or _vocabulary is None or _transform is None or _device is None:
+        logger.error("Model or vocabulary not loaded. Cannot generate caption.")
+        raise RuntimeError("Image captioning model is not initialized.")
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image not found at {image_path}.")
+    logger.info(f"Processing image: {image_path}")
+    try:
+        image = Image.open(image_path).convert('RGB')
+        image_tensor = _transform(image).to(_device)
+    except Exception as e:
+        raise Exception(f"Error loading or transforming image {image_path}: {e}")
+    # Generate the caption using the model's integrated method
+    generated_caption = _model.generate_caption(
+        image_tensor,
+        _vocabulary,
+        _device,
+        beam_size=INFERENCE_CONFIG.get('beam_size', 5),
+        max_length=INFERENCE_CONFIG.get('max_caption_length', 20)
+    )
+    logger.info(f"Generated caption: {generated_caption}")
+    return generated_caption

src/model.py ADDED Viewed

	@@ -0,0 +1,502 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torchvision.models as models # Used for ResNet50
+from .utils import get_logger # Import logger
+logger = get_logger(__name__)
+class EncoderCNN(nn.Module):
+    """
+    Encoder using a pre-trained ResNet50 model.
+    The output feature maps are adaptively pooled to a fixed size
+    and then reshaped for the attention mechanism in the decoder.
+    """
+    def __init__(self, encoded_image_size=14, fine_tune=True):
+        """
+        Initializes the EncoderCNN.
+        Args:
+            encoded_image_size (int): The spatial dimension (e.g., 14x14) to which
+                                      the feature maps will be adaptively pooled.
+            fine_tune (bool): If True, allows the parameters of the pre-trained
+                              ResNet to be updated during training. If False, they are frozen.
+        """
+        super(EncoderCNN, self).__init__()
+        self.encoded_image_size = encoded_image_size
+        # Load pre-trained ResNet50 and remove the final classification layer
+        # We use the default recommended weights for ResNet50.
+        resnet = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
+        # Remove the average pooling layer and the fully connected layer at the end.
+        # We want the feature maps before these layers for spatial attention.
+        # The `modules` list will contain layers up to `layer4` (the last convolutional block).
+        modules = list(resnet.children())[:-2]
+        self.resnet = nn.Sequential(*modules)
+        # Freeze parameters of the pre-trained ResNet if fine_tune is False.
+        # This prevents updating their weights during training.
+        if not fine_tune:
+            for param in self.resnet.parameters():
+                param.requires_grad = False
+            logger.info("ResNet encoder base layers are frozen.")
+        else:
+            logger.info("ResNet encoder base layers are fine-tuning enabled.")
+        # Adaptive pool to a fixed size (e.g., 14x14).
+        # This ensures a consistent spatial dimension for the feature maps,
+        # regardless of the input image size, useful for attention.
+        self.adaptive_pool = nn.AdaptiveAvgPool2d((encoded_image_size, encoded_image_size))
+        # The output feature dimension from ResNet50 before the last avg pool/fc is 2048.
+        self.encoder_dim = 2048
+    def forward(self, images):
+        """
+        Forward pass through the ResNet encoder.
+        Args:
+            images (torch.Tensor): Input images, shape (batch_size, 3, H, W).
+        Returns:
+            torch.Tensor: Encoded image features,
+                          shape (batch_size, encoder_dim, encoded_image_size, encoded_image_size).
+        """
+        # Pass images through the ResNet feature extractor
+        out = self.resnet(images)
+        # Apply adaptive pooling to get a fixed spatial size (e.g., 14x14)
+        out = self.adaptive_pool(out)
+        # The output shape is (batch_size, encoder_dim, encoded_image_size, encoded_image_size)
+        return out
+class Attention(nn.Module):
+    """
+    Additive Attention Mechanism (Bahdanau style).
+    Calculates attention weights based on encoded image features and decoder's hidden state.
+    """
+    def __init__(self, encoder_dim, decoder_dim, attention_dim):
+        """
+        Initializes the Attention module.
+        Args:
+            encoder_dim (int): Feature size of encoded images (e.g., 2048 for ResNet50).
+            decoder_dim (int): Hidden state size of the decoder LSTM.
+            attention_dim (int): Size of the linear layers within the attention mechanism.
+        """
+        super(Attention, self).__init__()
+        # Linear layer to transform encoder output for attention calculation
+        self.encoder_att = nn.Linear(encoder_dim, attention_dim)
+        # Linear layer to transform decoder hidden state for attention calculation
+        self.decoder_att = nn.Linear(decoder_dim, attention_dim)
+        # Linear layer to calculate attention "score" (or energy)
+        # This layer projects the combined features to a single scalar per pixel.
+        self.full_att = nn.Linear(attention_dim, 1)
+        self.relu = nn.ReLU()
+        # Softmax over the "num_pixels" dimension to get attention weights that sum to 1
+        self.softmax = nn.Softmax(dim=1)
+    def forward(self, encoder_out, decoder_hidden):
+        """
+        Forward pass through the attention mechanism.
+        Args:
+            encoder_out (torch.Tensor): Encoded images, shape (batch_size, num_pixels, encoder_dim).
+            decoder_hidden (torch.Tensor): Previous decoder hidden state, shape (batch_size, decoder_dim).
+        Returns:
+            tuple:
+                - attention_weighted_encoding (torch.Tensor): Context vector,
+                                                             shape (batch_size, encoder_dim).
+                - alpha (torch.Tensor): Attention weights (probability distribution over pixels),
+                                        shape (batch_size, num_pixels).
+        """
+        # Transform encoder output: (batch_size, num_pixels, attention_dim)
+        att1 = self.encoder_att(encoder_out)
+        # Transform decoder hidden state, then unsqueeze to (batch_size, 1, attention_dim)
+        # for broadcasting during addition with att1
+        att2 = self.decoder_att(decoder_hidden).unsqueeze(1)
+        # Calculate attention scores: (batch_size, num_pixels)
+        # Sum of transformed encoder output and transformed decoder hidden state,
+        # passed through ReLU and then a linear layer to get a single score per pixel.
+        att = self.full_att(self.relu(att1 + att2)).squeeze(2)
+        # Apply softmax to get attention weights (alpha): (batch_size, num_pixels)
+        alpha = self.softmax(att)
+        # Calculate attention-weighted encoding: (batch_size, encoder_dim)
+        # This is the context vector, a weighted sum of the encoder features.
+        attention_weighted_encoding = (encoder_out * alpha.unsqueeze(2)).sum(dim=1)
+        return attention_weighted_encoding, alpha
+class DecoderWithAttention(nn.Module):
+    """
+    LSTM Decoder with Attention mechanism.
+    Generates captions word by word, using the attention-weighted image features
+    and previously generated words.
+    """
+    def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size,
+                 encoder_dim=2048, dropout=0.5):
+        """
+        Initializes the DecoderWithAttention.
+        Args:
+            attention_dim (int): Size of the attention linear layer.
+            embed_dim (int): Dimension of word embeddings.
+            decoder_dim (int): Hidden state size of the decoder LSTM.
+            vocab_size (int): Total size of the vocabulary.
+            encoder_dim (int): Feature size of encoded images (default 2048 for ResNet50).
+            dropout (float): Dropout rate for regularization.
+        """
+        super(DecoderWithAttention, self).__init__()
+        self.encoder_dim = encoder_dim
+        self.attention_dim = attention_dim
+        self.embed_dim = embed_dim
+        self.decoder_dim = decoder_dim
+        self.vocab_size = vocab_size
+        self.dropout_rate = dropout
+        # Attention network
+        self.attention = Attention(encoder_dim, decoder_dim, attention_dim)
+        # Word embedding layer
+        self.embedding = nn.Embedding(vocab_size, embed_dim)
+        self.embedding_dropout = nn.Dropout(self.dropout_rate)
+        # LSTMCell for decoding
+        # Input to LSTMCell is the concatenation of word embedding and attention-weighted encoding
+        self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True)
+        # Linear layers to initialize the LSTM's hidden and cell states from the encoder output
+        self.init_h = nn.Linear(encoder_dim, decoder_dim)
+        self.init_c = nn.Linear(encoder_dim, decoder_dim)
+        # Linear layer to create a "gate" for the attention-weighted encoding (Visual Sentinel)
+        # This f_beta gate allows the model to decide how much of the attention-weighted
+        # context to use for generating the next word, enabling it to ignore irrelevant visual information.
+        self.f_beta = nn.Linear(decoder_dim, encoder_dim)
+        self.sigmoid = nn.Sigmoid() # Activation for the gate
+        # Linear layer to project decoder output to vocabulary size (scores for each word)
+        self.fc = nn.Linear(decoder_dim, vocab_size)
+        self.dropout_layer = nn.Dropout(self.dropout_rate)
+        # Initialize some weights
+        self.init_weights()
+        # A placeholder for max caption length during inference/generation
+        # This will typically be set by the calling model or config
+        self.max_caption_length_for_inference = 20
+    def init_weights(self):
+        """Initializes some parameters with values from the uniform distribution."""
+        self.embedding.weight.data.uniform_(-0.1, 0.1)
+        self.fc.bias.data.fill_(0)
+        self.fc.weight.data.uniform_(-0.1, 0.1)
+    def load_pretrained_embeddings(self, embeddings):
+        """
+        Loads pre-trained embeddings into the embedding layer.
+        Args:
+            embeddings (torch.Tensor): A tensor of pre-trained word embeddings.
+        """
+        self.embedding.weight = nn.Parameter(embeddings)
+        # Optionally, freeze embeddings if they are pre-trained and you don't want to fine-tune them
+        # self.embedding.weight.requires_grad = False
+    def fine_tune_embeddings(self, fine_tune=True):
+        """
+        Allows or disallows fine-tuning of the embedding layer.
+        Args:
+            fine_tune (bool): If True, embedding weights are trainable. If False, they are frozen.
+        """
+        for p in self.embedding.parameters():
+            p.requires_grad = fine_tune
+    def init_hidden_state(self, encoder_out):
+        """
+        Creates initial hidden and cell states for the LSTM from the encoded image.
+        Uses the mean of the encoder output features to initialize the LSTM states.
+        Args:
+            encoder_out (torch.Tensor): Encoded images, shape (batch_size, num_pixels, encoder_dim).
+        Returns:
+            tuple: (hidden state (h), cell state (c)), each of shape (batch_size, decoder_dim).
+        """
+        # Calculate mean of encoder output across pixels
+        mean_encoder_out = encoder_out.mean(dim=1)
+        h = self.init_h(mean_encoder_out) # (batch_size, decoder_dim)
+        c = self.init_c(mean_encoder_out) # (batch_size, decoder_dim)
+        return h, c
+    def forward(self, encoder_out, encoded_captions, caption_lengths):
+        """
+        Forward pass through the decoder during training.
+        Args:
+            encoder_out (torch.Tensor): Encoded images from CNN,
+                                        shape (batch_size, encoder_dim, enc_image_size_H, enc_image_size_W).
+            encoded_captions (torch.Tensor): Captions, shape (batch_size, max_caption_length).
+            caption_lengths (torch.Tensor): Actual lengths of captions (before padding), shape (batch_size,).
+        Returns:
+            tuple:
+                - predictions (torch.Tensor): Predicted word scores,
+                                              shape (batch_size, max_decode_length_in_batch, vocab_size).
+                - encoded_captions (torch.Tensor): Captions sorted by length.
+                - decode_lengths (list): Actual decoding lengths for each caption in the batch.
+                - alphas (torch.Tensor): Attention weights,
+                                         shape (batch_size, max_decode_length_in_batch, num_pixels).
+                - sort_ind (torch.Tensor): Indices used to sort the batch.
+        """
+        batch_size = encoder_out.size(0)
+        enc_image_h = encoder_out.size(2)
+        enc_image_w = encoder_out.size(3)
+        num_pixels = enc_image_h * enc_image_w
+        # Reshape encoder_out for attention: (batch_size, num_pixels, encoder_dim)
+        # Permute from (N, C, H, W) to (N, H, W, C) then flatten H*W
+        encoder_out = encoder_out.permute(0, 2, 3, 1).contiguous()
+        encoder_out = encoder_out.view(batch_size, num_pixels, self.encoder_dim)
+        # Sort input data by decreasing lengths for packed sequences.
+        # This is crucial for efficient processing with `pack_padded_sequence`.
+        caption_lengths, sort_ind = caption_lengths.sort(dim=0, descending=True)
+        encoder_out = encoder_out[sort_ind] # Apply sorting to encoder output
+        encoded_captions = encoded_captions[sort_ind] # Apply sorting to captions
+        # Embedding: (batch_size, max_caption_length, embed_dim)
+        embeddings = self.embedding(encoded_captions)
+        embeddings = self.embedding_dropout(embeddings)
+        # Initialize LSTM state (h, c) from the mean of encoder output
+        h, c = self.init_hidden_state(encoder_out)
+        # Create tensors to hold word predictions and attention weights.
+        # We predict up to (max_caption_length - 1) words (excluding the <START> token).
+        decode_lengths = (caption_lengths - 1).tolist() # Lengths of sequences to decode
+        max_decode_length = max(decode_lengths) # Max length in the current batch
+        predictions = torch.zeros(batch_size, max_decode_length, self.vocab_size).to(encoder_out.device)
+        alphas = torch.zeros(batch_size, max_decode_length, num_pixels).to(encoder_out.device)
+        # For each time step in the decoding process
+        for t in range(max_decode_length):
+            # Get batch size for current time step.
+            # Sequences are padded, so some might finish early.
+            batch_size_t = sum([l > t for l in decode_lengths])
+            # Apply attention mechanism to the active sequences in the batch
+            attention_weighted_encoding, alpha = self.attention(encoder_out[:batch_size_t], h[:batch_size_t])
+            # Apply sigmoid gate to attention-weighted encoding (Visual Sentinel)
+            gate = self.sigmoid(self.f_beta(h[:batch_size_t]))
+            attention_weighted_encoding = gate * attention_weighted_encoding
+            # Perform one step of LSTM decoding
+            # Input to LSTM: (current_word_embedding + attention_weighted_encoding)
+            h, c = self.decode_step(
+                torch.cat([embeddings[:batch_size_t, t, :], attention_weighted_encoding], dim=1),
+                (h[:batch_size_t], c[:batch_size_t])
+            )
+            # Predict next word using the fully connected layer
+            preds = self.fc(self.dropout_layer(h))
+            # Store predictions and attention weights for the current time step
+            predictions[:batch_size_t, t, :] = preds
+            alphas[:batch_size_t, t, :] = alpha
+        return predictions, encoded_captions, decode_lengths, alphas, sort_ind
+class ImageCaptioningModel(nn.Module):
+    """
+    Complete Image Captioning Model integrating EncoderCNN and DecoderWithAttention.
+    Provides methods for both training (forward pass) and inference (caption generation).
+    """
+    def __init__(self, vocab_size, embed_dim=256, attention_dim=256, encoder_dim=2048,
+                 decoder_dim=256, dropout=0.5, fine_tune_encoder=True, max_caption_length=20):
+        """
+        Initializes the ImageCaptioningModel.
+        Args:
+            vocab_size (int): Total size of the vocabulary.
+            embed_dim (int): Dimension of word embeddings.
+            attention_dim (int): Size of the attention linear layer.
+            encoder_dim (int): Feature size of encoded images (default 2048 for ResNet50).
+            decoder_dim (int): Hidden state size of the decoder LSTM.
+            dropout (float): Dropout rate for regularization.
+            fine_tune_encoder (bool): If True, allows the encoder parameters to be updated.
+            max_caption_length (int): Maximum length of generated captions during inference.
+        """
+        super(ImageCaptioningModel, self).__init__()
+        # Initialize the Encoder (ResNet50-based)
+        self.encoder = EncoderCNN(encoded_image_size=14, fine_tune=fine_tune_encoder)
+        # Ensure encoder_dim matches ResNet50 output dimension
+        self.encoder_dim = self.encoder.encoder_dim # This will be 2048
+        # Initialize the Decoder with Attention
+        self.decoder = DecoderWithAttention(
+            attention_dim=attention_dim,
+            embed_dim=embed_dim,
+            decoder_dim=decoder_dim,
+            vocab_size=vocab_size,
+            encoder_dim=self.encoder_dim, # Pass the correct encoder_dim
+            dropout=dropout
+        )
+        self.decoder.max_caption_length_for_inference = max_caption_length # Set max length for inference
+    def forward(self, images, captions, caption_lengths):
+        """
+        Forward pass through the complete model for training.
+        Args:
+            images (torch.Tensor): Input images.
+            captions (torch.Tensor): Target captions.
+            caption_lengths (torch.Tensor): Actual lengths of captions.
+        Returns:
+            tuple: (predictions, encoded_captions, decode_lengths, alphas, sort_ind)
+                   as returned by the decoder's forward pass.
+        """
+        encoder_out = self.encoder(images) # Encode images
+        predictions, encoded_captions, decode_lengths, alphas, sort_ind = self.decoder(
+            encoder_out, captions, caption_lengths # Decode captions
+        )
+        return predictions, encoded_captions, decode_lengths, alphas, sort_ind
+    def generate_caption(self, image_tensor, vocabulary, device, beam_size=5, max_length=None):
+        """
+        Performs beam search to generate a caption for a single image.
+        This method is now part of the ImageCaptioningModel class.
+        Args:
+            image_tensor (torch.Tensor): Preprocessed image tensor (3, H, W). NOT batched.
+            vocabulary (COCOVocabulary): Vocabulary object.
+            device (torch.device): Device to run the model on (cpu/cuda).
+            beam_size (int): Size of beam for beam search.
+            max_length (int, optional): Maximum length of the generated caption.
+                                        If None, uses self.decoder.max_caption_length_for_inference.
+        Returns:
+            str: Generated caption string.
+        """
+        self.eval() # Set model to evaluation mode
+        # Use the max_length from config if provided, otherwise fallback to model's default
+        current_max_length = max_length if max_length is not None else self.decoder.max_caption_length_for_inference
+        with torch.no_grad():
+            # Add batch dimension and move to device for the encoder
+            # image_tensor goes from (C, H, W) to (1, C, H, W)
+            image_tensor_batched = image_tensor.unsqueeze(0).to(device)
+            # Get encoder output: (1, encoder_dim, encoded_image_size, encoded_image_size)
+            encoder_output_from_cnn = self.encoder(image_tensor_batched)
+            # Reshape encoder_output_from_cnn to (1, num_pixels, encoder_dim) for attention
+            # Permute from (N, C, H, W) to (N, H, W, C) then flatten H*W
+            encoder_out = encoder_output_from_cnn.permute(0, 2, 3, 1).contiguous()
+            encoder_out = encoder_out.view(1, -1, self.encoder_dim) # (1, num_pixels, encoder_dim)
+            # Expand for beam search: (beam_size, num_pixels, encoder_dim)
+            encoder_out = encoder_out.expand(beam_size, encoder_out.size(1), encoder_out.size(2))
+            # Tensor to store top k previous words at each step; initialized with <START> token for all beams
+            k_prev_words = torch.LongTensor([[vocabulary.word2idx['<START>']]] * beam_size).to(device)
+            # Tensor to store top k sequences; initially just the <START> token
+            seqs = k_prev_words
+            # Tensor to store top k sequences' scores (log probabilities); initially all zeros
+            top_k_scores = torch.zeros(beam_size, 1).to(device)
+            # Lists to store completed captions and their scores
+            complete_seqs = list()
+            complete_seqs_scores = list()
+            # Initialize hidden state and cell state for LSTM
+            # encoder_out is already expanded for beam_size, so init_hidden_state will work
+            h, c = self.decoder.init_hidden_state(encoder_out)
+            # Start decoding loop
+            step = 1
+            while True:
+                # Get embeddings for the previously predicted words
+                embeddings = self.decoder.embedding(k_prev_words).squeeze(1) # (beam_size, embed_dim)
+                # Calculate attention-weighted encoding and attention weights
+                attention_weighted_encoding, alpha = self.decoder.attention(encoder_out, h)
+                # Apply visual sentinel gate
+                gate = self.decoder.sigmoid(self.decoder.f_beta(h))
+                attention_weighted_encoding = gate * attention_weighted_encoding
+                # Perform one step of LSTM decoding
+                h, c = self.decoder.decode_step(
+                    torch.cat([embeddings, attention_weighted_encoding], dim=1),
+                    (h, c)
+                ) # (beam_size, decoder_dim)
+                # Get scores for the next word
+                scores = self.decoder.fc(h) # (beam_size, vocab_size)
+                scores = F.log_softmax(scores, dim=1) # Convert to log-probabilities
+                # Add current scores to previous scores for beam search
+                scores = top_k_scores.expand_as(scores) + scores # (beam_size, vocab_size)
+                # For the first step, all k generated words are from the same parent (<START>).
+                # For subsequent steps, they are from different parents.
+                if step == 1:
+                    # For the first step, select top 'beam_size' words from the first beam's scores
+                    top_k_scores, top_k_words = scores[0].topk(beam_size, 0, True, True)  # (beam_size)
+                else:
+                    # Flatten scores to find the top 'beam_size' overall (from all current beams)
+                    top_k_scores, top_k_words = scores.view(-1).topk(beam_size, 0, True, True)  # (beam_size)
+                # Convert flattened indices to actual row (previous word's beam index)
+                # and column (next word's vocabulary index) indices
+                prev_word_inds = top_k_words // vocabulary.vocab_size  # (beam_size)
+                next_word_inds = top_k_words % vocabulary.vocab_size  # (beam_size)
+                # Add new words to sequences
+                seqs = torch.cat([seqs[prev_word_inds], next_word_inds.unsqueeze(1)], dim=1)  # (beam_size, step + 1)
+                # Identify completed sequences (where <END> is generated)
+                incomplete_inds = [ind for ind, next_word in enumerate(next_word_inds)
+                                   if next_word != vocabulary.word2idx['<END>']]
+                complete_inds = list(set(range(len(next_word_inds))) - set(incomplete_inds))
+                # Add complete sequences to their lists
+                if len(complete_inds) > 0:
+                    complete_seqs.extend(seqs[complete_inds].tolist())
+                    complete_seqs_scores.extend(top_k_scores[complete_inds])
+                # Update beam_size: number of active beams for the next step
+                beam_size -= len(complete_inds)
+                if beam_size == 0: # If all beams complete, break
+                    break
+                # Filter seqs, hidden states, cell states, scores, and previous words for incomplete sequences
+                seqs = seqs[incomplete_inds]
+                h = h[prev_word_inds[incomplete_inds]]
+                c = c[prev_word_inds[incomplete_inds]]
+                top_k_scores = top_k_scores[incomplete_inds].unsqueeze(1) # Reshape for next step
+                k_prev_words = next_word_inds[incomplete_inds].unsqueeze(1)
+                encoder_out = encoder_out[prev_word_inds[incomplete_inds]] # Propagate encoder_out for active beams
+                # Break if maximum caption length is exceeded
+                if step > current_max_length:
+                    break
+                step += 1
+            # If no complete captions were found (e.g., all beams exceeded max_length before <END>),
+            # pick the best incomplete sequence found so far.
+            if not complete_seqs:
+                # Take the best sequence among the currently active (incomplete) beams
+                final_seqs = seqs.tolist()
+                final_scores = top_k_scores.squeeze(1).tolist()
+                if not final_seqs: # Fallback if even no incomplete sequences are available (shouldn't happen)
+                    return ""
+                i = final_scores.index(max(final_scores))
+                best_seq = final_seqs[i]
+            else:
+                # Find the best caption among all completed sequences based on score
+                i = complete_seqs_scores.index(max(complete_seqs_scores))
+                best_seq = complete_seqs[i]
+            # Convert the best sequence of indices back to a human-readable caption
+            return vocabulary.indices_to_caption(best_seq)

src/train.py ADDED Viewed

	@@ -0,0 +1,471 @@

+import os
+import time
+import math
+import pickle
+import gc # For memory optimization
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import DataLoader
+from torch.nn.utils.rnn import pack_padded_sequence
+from .model import ImageCaptioningModel # Import the model
+from .data_preprocessing import COCODataset, COCOVocabulary # Import data handling classes
+from .evaluation import calculate_bleu_scores_detailed # Import evaluation metric
+from .utils import get_logger, get_train_transform, get_eval_transform # Import utilities
+logger = get_logger(__name__)
+def train_epoch(model, train_loader, criterion, optimizer, device, epoch, config):
+    """
+    Performs a single training epoch.
+    Args:
+        model (nn.Module): The image captioning model.
+        train_loader (DataLoader): DataLoader for training data.
+        criterion (nn.Module): Loss function.
+        optimizer (torch.optim.Optimizer): Optimizer.
+        device (torch.device): Device to run training on (cpu/cuda).
+        epoch (int): Current epoch number (0-indexed).
+        config (dict): Configuration dictionary.
+    Returns:
+        float: Average training loss for the epoch.
+    """
+    model.train() # Set model to training mode
+    running_loss = 0.0
+    start_time = time.time()
+    total_batches = len(train_loader)
+    # Use tqdm for a progress bar
+    for i, (images, captions, lengths, _) in enumerate(train_loader):
+        images = images.to(device)
+        captions = captions.to(device)
+        lengths = lengths.to(device)
+        # Forward pass
+        # scores: (batch_size, max_decode_length_in_batch, vocab_size)
+        # caps_sorted: (batch_size, max_padded_length_from_dataset)
+        # decode_lengths: list of actual lengths for current batch (after sorting)
+        scores, caps_sorted, decode_lengths, _, _ = model(images, captions, lengths)
+        # Prepare targets for loss calculation
+        # Pack scores to remove padding and ensure correct length for loss calculation.
+        # This matches the dynamic lengths of the sequences.
+        scores_packed = pack_padded_sequence(scores, decode_lengths, batch_first=True).data
+        # Slice targets to match the length of scores_packed, removing the <START> token.
+        # The target sequence is `caption[1:]` because the model predicts the word
+        # at `t+1` given `caption[t]`.
+        targets = caps_sorted[:, 1:] # Remove <START> token from targets
+        targets_packed = pack_padded_sequence(targets, decode_lengths, batch_first=True).data
+        loss = criterion(scores_packed, targets_packed)
+        # Backward pass and optimize
+        optimizer.zero_grad() # Clear gradients from previous step
+        loss.backward() # Compute gradients
+        # Gradient clipping to prevent exploding gradients, especially common in RNNs
+        torch.nn.utils.clip_grad_norm_(model.parameters(), config.get('grad_clip', 5.0))
+        optimizer.step() # Update model parameters
+        running_loss += loss.item()
+        # Log training progress periodically
+        if (i + 1) % config.get('log_step', 100) == 0:
+            current_loss = loss.item()
+            perplexity = math.exp(current_loss) if current_loss < float('inf') else float('inf')
+            logger.info(f"Epoch [{epoch+1}/{config['num_epochs']}], Step [{i+1}/{total_batches}], "
+                        f"Loss: {current_loss:.4f}, Perplexity: {perplexity:.4f}")
+    epoch_loss = running_loss / total_batches
+    epoch_time = time.time() - start_time
+    logger.info(f"Epoch {epoch+1} Training finished. Avg Loss: {epoch_loss:.4f}, Time: {epoch_time:.2f}s")
+    return epoch_loss
+def validate_epoch(model, val_loader, criterion, vocabulary, device, config):
+    """
+    Performs a single validation epoch.
+    Generates captions for a subset of the validation set to calculate BLEU scores.
+    Args:
+        model (nn.Module): The image captioning model.
+        val_loader (DataLoader): DataLoader for validation data.
+        criterion (nn.Module): Loss function (used for validation loss).
+        vocabulary (COCOVocabulary): Vocabulary object, used for converting indices to words.
+        device (torch.device): Device to run validation on (cpu/cuda).
+        config (dict): Configuration dictionary.
+    Returns:
+        tuple: (Average validation loss, list of generated captions, list of reference captions)
+    """
+    model.eval() # Set model to evaluation mode
+    val_running_loss = 0.0
+    val_generated_captions = []
+    val_reference_captions = []
+    with torch.no_grad(): # Disable gradient calculations for validation
+        total_batches = len(val_loader)
+        # Iterate through the validation loader for loss calculation and caption generation
+        for i, (images, captions, lengths, _) in enumerate(val_loader):
+            images = images.to(device)
+            val_captions_for_loss = captions.to(device)
+            val_lengths_for_loss = lengths.to(device)
+            # Forward pass for loss calculation (similar to training)
+            val_scores, val_caps_sorted, val_decode_lengths, _, _ = model(images, val_captions_for_loss, val_lengths_for_loss)
+            val_scores_packed = pack_padded_sequence(val_scores, val_decode_lengths, batch_first=True).data
+            val_targets = val_caps_sorted[:, 1:] # Remove <START>
+            val_targets_packed = pack_padded_sequence(val_targets, val_decode_lengths, batch_first=True).data
+            val_loss = criterion(val_scores_packed, val_targets_packed)
+            val_running_loss += val_loss.item()
+            # Generate captions using beam search for a subset of batches or all
+            # The `val_inference_batches` config parameter controls how many batches to run inference on.
+            val_inference_batches_limit = config.get('val_inference_batches')
+            if val_inference_batches_limit is None or i < val_inference_batches_limit:
+                # Iterate through each image in the current batch to generate captions
+                for j in range(images.size(0)):
+                    image_tensor_single = images[j] # Get a single image tensor (C, H, W)
+                    generated_caption = model.generate_caption(
+                        image_tensor_single, vocabulary, device,
+                        beam_size=config.get('val_beam_size', 3), # Use beam search for validation
+                        max_length=config.get('max_caption_length', 20)
+                    )
+                    # Convert reference caption indices back to string for metric calculation
+                    reference_caption_str = vocabulary.indices_to_caption(captions[j].cpu().numpy())
+                    val_generated_captions.append(generated_caption)
+                    val_reference_captions.append(reference_caption_str)
+    val_avg_loss = val_running_loss / total_batches
+    perplexity = math.exp(val_avg_loss) if val_avg_loss < float('inf') else float('inf')
+    logger.info(f"Validation Avg Loss: {val_avg_loss:.4f}, Perplexity: {perplexity:.4f}")
+    return val_avg_loss, val_generated_captions, val_reference_captions
+def train_model(config):
+    """
+    Main training function. Orchestrates training and validation epochs.
+    Args:
+        config (dict): Configuration dictionary containing all training parameters.
+    Returns:
+        tuple: (Trained model, optimizer, scheduler, vocabulary)
+    """
+    logger.info("Starting training process...")
+    # Set device (CUDA if available, else CPU)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    logger.info(f"Using device: {device}")
+    # Load data paths from configuration
+    data_folder = config['data_folder']
+    train_image_folder = config['train_image_folder']
+    val_image_folder = config['val_image_folder']
+    train_caption_file = config['train_caption_file']
+    val_caption_file = config['val_caption_file']
+    # Check if caption files exist
+    if not os.path.exists(train_caption_file):
+        raise FileNotFoundError(f"Training caption file not found: {train_caption_file}")
+    if not os.path.exists(val_caption_file):
+        raise FileNotFoundError(f"Validation caption file not found: {val_caption_file}")
+    # Image transformations for training and validation
+    train_transform = get_train_transform()
+    val_transform = get_eval_transform() # Use eval transform for validation images
+    # ======================== VOCABULARY HANDLING ========================
+    # Define paths for loading/saving vocabulary
+    # First, try to load from a pre-saved vocabulary file in the output directory
+    VOCABULARY_FILE_PATH = os.path.join(config['output_dir'], 'vocabulary.pkl')
+    vocabulary = None # Initialize vocabulary to None
+    # Try to LOAD vocabulary
+    if os.path.exists(VOCABULARY_FILE_PATH):
+        try:
+            with open(VOCABULARY_FILE_PATH, 'rb') as f:
+                vocabulary = pickle.load(f)
+            logger.info(f"Loaded vocabulary from {VOCABULARY_FILE_PATH}")
+        except Exception as e:
+            logger.warning(f"Could not load vocabulary from {VOCABULARY_FILE_PATH}: {e}. Will attempt to build new vocabulary.")
+            vocabulary = None # Ensure it's None if loading fails
+    else:
+        logger.info(f"Vocabulary file not found at {VOCABULARY_FILE_PATH}. Will build new vocabulary.")
+    # If vocabulary is still None (meaning it couldn't be loaded), then BUILD a new one
+    if vocabulary is None:
+        logger.info("Building new vocabulary from training dataset...")
+        # Create a temporary dataset to build the vocabulary.
+        # No image transforms are needed for vocabulary building.
+        temp_train_dataset_for_vocab = COCODataset(
+            image_dir=os.path.join(data_folder, train_image_folder), # Image dir is still needed for dataset init
+            caption_file=train_caption_file,
+            subset_size=config.get('vocab_subset_size'), # Use subset if specified for vocab building
+            transform=None,
+            vocabulary=None # Explicitly tell it to build a new vocabulary
+        )
+        vocabulary = temp_train_dataset_for_vocab.vocabulary
+        del temp_train_dataset_for_vocab # Free up memory
+        gc.collect() # Force garbage collection
+        logger.info("New vocabulary built.")
+        # Save the newly built vocabulary
+        try:
+            os.makedirs(os.path.dirname(VOCABULARY_FILE_PATH), exist_ok=True)
+            with open(VOCABULARY_FILE_PATH, 'wb') as f:
+                pickle.dump(vocabulary, f)
+            logger.info(f"Saved newly built vocabulary to {VOCABULARY_FILE_PATH}")
+        except Exception as e:
+            logger.error(f"Error saving newly built vocabulary to {VOCABULARY_FILE_PATH}: {e}")
+    # ===========================================================================
+    # Create datasets for training and validation using the determined vocabulary
+    train_dataset = COCODataset(
+        image_dir=os.path.join(data_folder, train_image_folder),
+        caption_file=train_caption_file,
+        vocabulary=vocabulary, # Pass the vocabulary
+        max_caption_length=config.get('max_caption_length', 20),
+        subset_size=config.get('train_subset_size'),
+        transform=train_transform
+    )
+    val_dataset = COCODataset(
+        image_dir=os.path.join(data_folder, val_image_folder),
+        caption_file=val_caption_file,
+        vocabulary=vocabulary, # Pass the same vocabulary
+        max_caption_length=config.get('max_caption_length', 20),
+        subset_size=config.get('val_subset_size'),
+        transform=val_transform
+    )
+    # Create data loaders
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=config.get('batch_size', 64),
+        shuffle=True, # Shuffle training data
+        num_workers=config.get('num_workers', 2),
+        pin_memory=True # Pin memory for faster data transfer to GPU
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=config.get('batch_size', 64),
+        shuffle=False, # Do not shuffle validation data
+        num_workers=config.get('num_workers', 2),
+        pin_memory=True
+    )
+    logger.info(f"Training dataset size: {len(train_dataset)}")
+    logger.info(f"Validation dataset size: {len(val_dataset)}")
+    # Initialize model
+    model = ImageCaptioningModel(
+        vocab_size=vocabulary.vocab_size,
+        embed_dim=config.get('embed_dim', 256),
+        attention_dim=config.get('attention_dim', 256),
+        decoder_dim=config.get('decoder_dim', 256),
+        dropout=config.get('dropout', 0.5),
+        fine_tune_encoder=config.get('fine_tune_encoder', True),
+        max_caption_length=config.get('max_caption_length', 20) # Pass for model's generate_caption
+    ).to(device) # Move model to specified device
+    # Loss function and optimizer
+    # CrossEntropyLoss ignores the <PAD> token in target labels
+    criterion = nn.CrossEntropyLoss(ignore_index=vocabulary.word2idx['<PAD>']).to(device)
+    # Separate optimizer for encoder and decoder if fine_tune_encoder is True.
+    # This allows setting different learning rates.
+    encoder_params = list(model.encoder.parameters())
+    decoder_params = list(model.decoder.parameters())
+    optimizer = optim.Adam([
+        {'params': encoder_params, 'lr': config.get('encoder_learning_rate', 1e-5) if config.get('fine_tune_encoder', True) else 0.0},
+        {'params': decoder_params, 'lr': config.get('learning_rate', 4e-4)}
+    ])
+    # Learning rate scheduler: Reduces learning rate when a metric (BLEU-4) stops improving
+    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
+        optimizer,
+        mode='max', # Monitor validation metric (e.g., BLEU-4, which we want to maximize)
+        factor=config.get('lr_reduce_factor', 0.5), # Factor by which the learning rate will be reduced
+        patience=config.get('lr_patience', 5), # Number of epochs with no improvement after which learning rate will be reduced
+        verbose=True, # Print messages when LR is updated
+        min_lr=1e-7 # Minimum learning rate
+    )
+    # ======================== RESUMPTION LOGIC ========================
+    start_epoch = 0
+    # Initialize best_val_score to a very low value for 'max' mode, so any improvement is noted
+    best_val_score = 0.0
+    output_dir = config['output_dir']
+    models_dir = config['models_dir']
+    # Try to find and load the latest checkpoint to resume training
+    latest_checkpoint_path = None
+    # Look for best_model_bleu*.pth first, then model_epoch_*.pth
+    saved_models = [f for f in os.listdir(models_dir) if f.startswith('best_model_bleu') and f.endswith('.pth')]
+    if not saved_models:
+        saved_models = [f for f in os.listdir(output_dir) if f.startswith('model_epoch_') and f.endswith('.pth')]
+    if saved_models:
+        if 'best_model_bleu' in saved_models[0]:
+            # Sort by BLEU score extracted from filename for best_model_bleu naming
+            latest_checkpoint_name = max(saved_models, key=lambda f: float(f.split('bleu')[1].replace('.pth', '')))
+        else: # For 'model_epoch_X.pth' or similar, sort by epoch number
+            latest_checkpoint_name = sorted(saved_models, key=lambda x: int(x.split('_')[-1].split('.')[0]))[-1]
+        # Determine the full path of the latest checkpoint
+        if latest_checkpoint_name.startswith('best_model_bleu'):
+            latest_checkpoint_path = os.path.join(models_dir, latest_checkpoint_name)
+        else:
+            latest_checkpoint_path = os.path.join(output_dir, latest_checkpoint_name)
+        logger.info(f"Attempting to resume training from checkpoint: {latest_checkpoint_path}")
+        try:
+            # Load checkpoint without strict=False unless there are known key mismatches
+            checkpoint = torch.load(latest_checkpoint_path, map_location=device)
+            model.load_state_dict(checkpoint['model_state_dict'])
+            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
+            # Load scheduler state if it exists in the checkpoint (important for correct LR adjustment)
+            if 'scheduler_state_dict' in checkpoint:
+                scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
+            else:
+                logger.warning("Scheduler state not found in checkpoint. Scheduler will restart its state.")
+            start_epoch = checkpoint['epoch']
+            # Safely get best_val_score, default to 0.0 if not found
+            best_val_score = checkpoint.get('best_val_score', 0.0)
+            logger.info(f"Resumed training from epoch {start_epoch}. Best validation score so far: {best_val_score:.4f}")
+        except Exception as e:
+            logger.error(f"Could not load checkpoint from {latest_checkpoint_path}: {e}. Starting training from scratch.")
+            # Reset start_epoch and best_val_score if loading fails
+            start_epoch = 0
+            best_val_score = 0.0
+    else:
+        logger.info("No checkpoint found. Starting training from scratch.")
+    # ===========================================================================
+    # Training loop
+    num_epochs = config.get('num_epochs', 10)
+    for epoch in range(start_epoch, num_epochs): # Start from 'start_epoch' for resuming
+        # Train for one epoch
+        epoch_train_loss = train_epoch(model, train_loader, criterion, optimizer, device, epoch, config)
+        # Validate after each training epoch
+        val_avg_loss, val_generated_captions, val_reference_captions = validate_epoch(
+            model, val_loader, criterion, vocabulary, device, config
+        )
+        # Calculate BLEU scores on validation set for tracking and scheduler stepping
+        if val_generated_captions and val_reference_captions:
+            val_bleu_scores = calculate_bleu_scores_detailed(val_reference_captions, val_generated_captions)
+            current_val_score_for_scheduler = val_bleu_scores['BLEU-4'] # Use BLEU-4 for scheduler
+            logger.info(f"Epoch {epoch+1} Validation BLEU-4: {current_val_score_for_scheduler:.4f}")
+            # Step the scheduler based on validation BLEU-4.
+            # This will reduce the learning rate if BLEU-4 does not improve for 'patience' epochs.
+            scheduler.step(current_val_score_for_scheduler)
+            # Save the best model based on BLEU-4 score on the validation set
+            if current_val_score_for_scheduler > best_val_score:
+                best_val_score = current_val_score_for_scheduler
+                # Save best model to the 'models' directory
+                model_path = os.path.join(models_dir, f"best_model_bleu{best_val_score:.4f}.pth")
+                torch.save({
+                    'epoch': epoch + 1,
+                    'model_state_dict': model.state_dict(),
+                    'optimizer_state_dict': optimizer.state_dict(),
+                    'scheduler_state_dict': scheduler.state_dict(), # IMPORTANT: Save scheduler state!
+                    'loss': epoch_train_loss,
+                    'vocabulary': vocabulary,
+                    'config': config, # Save config for easy loading later
+                    'best_val_score': best_val_score # Save the best score achieved
+                }, model_path)
+                logger.info(f"Saved best model checkpoint to {model_path}")
+        else:
+            logger.warning("No captions generated during validation for metric calculation. Scheduler stepped with 0.0.")
+            scheduler.step(0.0) # Step with a low value if no metrics
+        # Save checkpoint periodically (optional)
+        # This is good practice for resuming training even if it's not the "best" model yet.
+        if (epoch + 1) % config.get('save_interval', 5) == 0:
+            model_path_periodic = os.path.join(output_dir, f"model_epoch_{epoch+1}.pth")
+            torch.save({
+                'epoch': epoch + 1,
+                'model_state_dict': model.state_dict(),
+                'optimizer_state_dict': optimizer.state_dict(),
+                'scheduler_state_dict': scheduler.state_dict(), # IMPORTANT: Save scheduler state!
+                'loss': epoch_train_loss,
+                'vocabulary': vocabulary,
+                'config': config,
+                'best_val_score': best_val_score # Also save current best score here
+            }, model_path_periodic)
+            logger.info(f"Saved periodic model checkpoint to {model_path_periodic}")
+        # ======================== MEMORY OPTIMIZATION AFTER EACH EPOCH ========================
+        logger.info("Performing memory optimization after epoch...")
+        # Clear PyTorch's CUDA cache (if using GPU)
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            logger.info("CUDA cache emptied.")
+        # Force Python's garbage collector to run to free up unreferenced objects
+        gc.collect()
+        logger.info("Python garbage collector run.")
+        # ======================================================================================
+    logger.info("Training complete.")
+    return model, optimizer, scheduler, vocabulary # Return trained components for potential further use
+if __name__ == '__main__':
+    # When `train.py` is run directly, it will initiate the training process.
+    from config import TRAINING_CONFIG, update_config_with_latest_model, _MODELS_DIR, _OUTPUT_DIR
+    # Update config to ensure it looks for latest model in 'models' dir
+    # This specifically helps if you copy pre-trained models into 'models' folder for initial load.
+    # If starting from scratch, it will still default to 0.0000.
+    update_config_with_latest_model(TRAINING_CONFIG)
+    logger.info("Starting model training process...")
+    try:
+        trained_model, optimizer, scheduler, vocabulary = train_model(TRAINING_CONFIG)
+        logger.info("Model Training Complete!")
+        # Optional: You might want to save the final model explicitly if it's not the best one.
+        # This ensures you have the model from the last epoch.
+        final_model_path = os.path.join(_MODELS_DIR, f"final_model_epoch_{TRAINING_CONFIG['num_epochs']}.pth")
+        torch.save({
+            'epoch': TRAINING_CONFIG['num_epochs'],
+            'model_state_dict': trained_model.state_dict(),
+            'optimizer_state_dict': optimizer.state_dict(),
+            'scheduler_state_dict': scheduler.state_dict(),
+            'vocabulary': vocabulary,
+            'config': TRAINING_CONFIG,
+            'best_val_score': 0 # Placeholder, retrieve from scheduler if needed
+        }, final_model_path)
+        logger.info(f"Saved final model checkpoint to {final_model_path}")
+    except FileNotFoundError as e:
+        logger.error(f"Critical data file missing: {e}")
+        logger.error("Please ensure the COCO dataset and annotation files are correctly placed as described in README.md.")
+    except Exception as e:
+        logger.critical(f"An unhandled error occurred during training: {e}", exc_info=True)
+        # exc_info=True prints the full traceback

src/utils.py ADDED Viewed

	@@ -0,0 +1,571 @@

+# import logging
+# import sys
+# import os
+# import shutil
+# import matplotlib.pyplot as plt
+# import numpy as np
+# from PIL import Image
+# import torch
+# import torchvision.transforms as transforms
+# # --- Logging Configuration ---
+# # Configure logging to output to console and a file.
+# # This logger will be used across all modules.
+# logger = logging.getLogger(__name__) # Get a logger specific to this module
+# logger.setLevel(logging.INFO) # Set the minimum level of messages to be logged
+# # Ensure handlers are not duplicated if script is run multiple times in same session
+# if not logger.handlers:
+#     # Console handler
+#     c_handler = logging.StreamHandler(sys.stdout)
+#     c_handler.setLevel(logging.INFO)
+#     # File handler - logs to 'training.log' in the 'output' directory
+#     # Ensure 'output' directory exists before creating the log file
+#     log_dir = 'output'
+#     os.makedirs(log_dir, exist_ok=True)
+#     f_handler = logging.FileHandler(os.path.join(log_dir, 'training.log'))
+#     f_handler.setLevel(logging.INFO)
+#     # Formatters
+#     c_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+#     f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+#     c_handler.setFormatter(c_format)
+#     f_handler.setFormatter(f_format)
+#     # Add handlers to the logger
+#     logger.addHandler(c_handler)
+#     logger.addHandler(f_handler)
+# # Get a top-level logger to ensure all modules use the same setup
+# def get_logger(name=__name__):
+#     """Returns a logger instance with predefined settings."""
+#     return logging.getLogger(name)
+# # --- Image Transformation Utilities ---
+# def get_train_transform():
+#     """Get image transform for training (resize, horizontal flip, normalize)"""
+#     return transforms.Compose([
+#         transforms.Resize((224, 224)), # Resize images to 224x224 pixels
+#         transforms.RandomHorizontalFlip(), # Randomly flip images horizontally for data augmentation
+#         transforms.ToTensor(), # Convert PIL Image or numpy.ndarray to tensor
+#         transforms.Normalize(mean=[0.485, 0.456, 0.406], # Normalize pixel values
+#                              std=[0.229, 0.224, 0.225])
+#     ])
+# def get_eval_transform():
+#     """Get image transform for evaluation/inference (resize, normalize)"""
+#     return transforms.Compose([
+#         transforms.Resize((224, 224)), # Resize images to 224x224 pixels
+#         transforms.ToTensor(), # Convert PIL Image or numpy.ndarray to tensor
+#         transforms.Normalize(mean=[0.485, 0.456, 0.406], # Normalize pixel values
+#                              std=[0.229, 0.224, 0.225])
+#     ])
+# # --- Dataset Analysis Utility (moved from original script for modularity) ---
+# class DatasetAnalyzer:
+#     """Utility class to analyze COCO dataset statistics."""
+#     @staticmethod
+#     def analyze_captions(caption_file, max_samples=None):
+#         """
+#         Analyzes caption statistics from a COCO-format JSON file.
+#         Args:
+#             caption_file (str): Path to the COCO captions JSON file.
+#             max_samples (int, optional): Maximum number of captions to analyze.
+#                                          Useful for large datasets. Defaults to None (all).
+#         Returns:
+#             dict: A dictionary containing various caption statistics.
+#         """
+#         try:
+#             with open(caption_file, 'r') as f:
+#                 data = json.load(f)
+#         except FileNotFoundError:
+#             logger.error(f"Caption file not found for analysis: {caption_file}")
+#             return {}
+#         except json.JSONDecodeError:
+#             logger.error(f"Error decoding JSON from {caption_file}. Ensure it's valid.")
+#             return {}
+#         captions = [ann['caption'] for ann in data['annotations']]
+#         if max_samples:
+#             captions = captions[:max_samples]
+#         # Basic statistics
+#         lengths = [len(caption.split()) for caption in captions]
+#         stats = {
+#             'total_captions': len(captions),
+#             'avg_length': np.mean(lengths) if lengths else 0,
+#             'std_length': np.std(lengths) if lengths else 0,
+#             'min_length': min(lengths) if lengths else 0,
+#             'max_length': max(lengths) if lengths else 0,
+#             'median_length': np.median(lengths) if lengths else 0
+#         }
+#         # Word frequency
+#         all_words = []
+#         from collections import Counter # Import here to avoid circular dependency issues if Counter is used elsewhere
+#         for caption in captions:
+#             words = caption.lower().split()
+#             all_words.extend(words)
+#         word_freq = Counter(all_words)
+#         stats['unique_words'] = len(word_freq)
+#         stats['most_common_words'] = word_freq.most_common(20)
+#         return stats
+#     @staticmethod
+#     def plot_length_distribution(caption_file, max_samples=None, save_path=None):
+#         """
+#         Plots the distribution of caption lengths.
+#         Args:
+#             caption_file (str): Path to the COCO captions JSON file.
+#             max_samples (int, optional): Maximum number of captions to plot. Defaults to None (all).
+#             save_path (str, optional): Path to save the plot. If None, displays the plot.
+#         """
+#         try:
+#             with open(caption_file, 'r') as f:
+#                 data = json.load(f)
+#         except FileNotFoundError:
+#             logger.error(f"Caption file not found for plotting: {caption_file}")
+#             return
+#         except json.JSONDecodeError:
+#             logger.error(f"Error decoding JSON from {caption_file}. Ensure it's valid.")
+#             return
+#         captions = [ann['caption'] for ann in data['annotations']]
+#         if max_samples:
+#             captions = captions[:max_samples]
+#         lengths = [len(caption.split()) for caption in captions]
+#         plt.figure(figsize=(10, 6))
+#         plt.hist(lengths, bins=50, alpha=0.7, edgecolor='black')
+#         plt.xlabel('Caption Length (words)')
+#         plt.ylabel('Frequency')
+#         plt.title('Distribution of Caption Lengths')
+#         plt.grid(True, alpha=0.3)
+#         if save_path:
+#             plt.savefig(save_path, bbox_inches='tight', dpi=150)
+#             logger.info(f"Caption length distribution plot saved to {save_path}")
+#         else:
+#             plt.show()
+# # Import json here as it's used by DatasetAnalyzer
+# import json
+# # --- Attention Visualization Utility ---
+# def visualize_attention(model, image_path, vocabulary, device, save_path=None, max_words_to_show=10):
+#     """
+#     Visualizes attention weights on an image for a generated caption.
+#     This function requires the model to have a `generate_caption` method
+#     and access to the encoder and decoder components to extract attention.
+#     Args:
+#         model (ImageCaptioningModel): The trained image captioning model.
+#         image_path (str): Path to the image file for visualization.
+#         vocabulary (COCOVocabulary): The vocabulary object.
+#         device (torch.device): Device to run the model on (cpu/cuda).
+#         save_path (str, optional): Path to save the visualization plot. If None, displays the plot.
+#         max_words_to_show (int): Maximum number of words to visualize attention for.
+#     """
+#     logger = get_logger(__name__) # Get logger for this function
+#     model.eval() # Set model to evaluation mode
+#     # Load and preprocess image
+#     transform = get_eval_transform()
+#     try:
+#         image = Image.open(image_path).convert('RGB')
+#     except FileNotFoundError:
+#         logger.error(f"Image not found at {image_path} for attention visualization.")
+#         return
+#     except Exception as e:
+#         logger.error(f"Error loading image {image_path} for attention visualization: {e}")
+#         return
+#     image_tensor = transform(image).unsqueeze(0).to(device) # Add batch dimension
+#     with torch.no_grad():
+#         # Get encoder output
+#         # (1, encoder_dim, encoded_image_size, encoded_image_size)
+#         encoder_out = model.encoder(image_tensor)
+#         # Reshape for attention: (1, num_pixels, encoder_dim)
+#         encoder_out_reshaped = encoder_out.permute(0, 2, 3, 1).contiguous()
+#         encoder_out_reshaped = encoder_out_reshaped.view(1, -1, model.encoder_dim)
+#         # Initialize decoder states
+#         h, c = model.decoder.init_hidden_state(encoder_out_reshaped)
+#         # Start of sentence token
+#         word_idx = vocabulary.word2idx['<START>']
+#         caption_words = []
+#         attention_weights = []
+#         # Generate caption word by word and collect attention weights
+#         # Iterate up to max_words_to_show or until <END> token is generated
+#         for _ in range(model.decoder.max_caption_length_for_inference): # Use model's max_length
+#             if word_idx == vocabulary.word2idx['<END>'] or len(caption_words) >= max_words_to_show:
+#                 break
+#             # Get embeddings for current word
+#             # (1, embed_dim)
+#             embeddings = model.decoder.embedding(torch.LongTensor([word_idx]).to(device))
+#             # Get attention-weighted encoding and alpha
+#             # alpha: (1, num_pixels)
+#             awe, alpha = model.decoder.attention(encoder_out_reshaped, h)
+#             attention_weights.append(alpha.cpu().numpy())
+#             # Apply gate to attention-weighted encoding
+#             gate = model.decoder.sigmoid(model.decoder.f_beta(h))
+#             awe = gate * awe
+#             # Perform one step of LSTM decoding
+#             h, c = model.decoder.decode_step(
+#                 torch.cat([embeddings, awe], dim=1),
+#                 (h, c)
+#             )
+#             # Predict next word
+#             scores = model.decoder.fc(h) # (1, vocab_size)
+#             word_idx = scores.argmax(dim=1).item() # Get the index of the predicted word
+#             word = vocabulary.idx2word[word_idx]
+#             caption_words.append(word)
+#     # Visualize the attention maps
+#     num_plots = len(caption_words)
+#     if num_plots == 0:
+#         logger.warning("No words generated for attention visualization. Cannot create plot.")
+#         return
+#     # Adjust figure size dynamically based on number of plots
+#     fig, axes = plt.subplots(1, num_plots, figsize=(4 * num_plots, 5))
+#     if num_plots == 1: # Ensure axes is iterable even for single plot
+#         axes = [axes]
+#     for i, (word, alpha) in enumerate(zip(caption_words, attention_weights)):
+#         # Reshape attention to encoder's spatial size (e.g., 14x14 for ResNet50)
+#         # Assuming encoded_image_size is available in model.encoder
+#         enc_img_size = model.encoder.encoded_image_size
+#         alpha_img = alpha.reshape(enc_img_size, enc_img_size)
+#         # Resize attention map to original image size for overlay
+#         alpha_img_resized = Image.fromarray(alpha_img * 255).resize(image.size, Image.LANCZOS)
+#         alpha_img_np = np.array(alpha_img_resized) / 255.0 # Normalize back to 0-1
+#         axes[i].imshow(image)
+#         axes[i].imshow(alpha_img_np, alpha=0.6, cmap='jet') # Overlay attention map
+#         axes[i].set_title(f'Word: {word}')
+#         axes[i].axis('off')
+#     plt.suptitle(f"Generated Caption (Attention Visualization): {' '.join(caption_words)}")
+#     plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to prevent title overlap
+#     if save_path:
+#         os.makedirs(os.path.dirname(save_path), exist_ok=True)
+#         plt.savefig(save_path, bbox_inches='tight', dpi=150)
+#         logger.info(f"Attention visualization saved to {save_path}")
+#     else:
+#         plt.show()
+#     return ' '.join(caption_words)
+import logging
+import sys
+import os
+import shutil
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import torch
+import torchvision.transforms as transforms
+# --- Logging Configuration ---
+# Get the root logger
+root_logger = logging.getLogger()
+# Set the minimum level for the root logger. This ensures all messages at or above
+# this level from any logger (including child loggers like those in app.py) are processed.
+root_logger.setLevel(logging.INFO)
+# Ensure handlers are not duplicated if script is run multiple times in same session
+# This check is crucial and applies to the root logger's handlers.
+if not root_logger.handlers:
+    # Console handler: directs log messages to standard output (console)
+    c_handler = logging.StreamHandler(sys.stdout)
+    c_handler.setLevel(logging.INFO) # Set level for console output
+    # File handler: directs log messages to a file
+    # Ensure the 'output' directory exists before creating the log file
+    log_dir = 'output'
+    os.makedirs(log_dir, exist_ok=True)
+    f_handler = logging.FileHandler(os.path.join(log_dir, 'training.log'))
+    f_handler.setLevel(logging.INFO) # Set level for file output
+    # Formatters define the layout of log records
+    c_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    f_format = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    c_handler.setFormatter(c_format)
+    f_handler.setFormatter(f_format)
+    # Add handlers to the ROOT logger. This is the critical change.
+    # Any logger instance obtained later will inherit these handlers by default.
+    root_logger.addHandler(c_handler)
+    root_logger.addHandler(f_handler)
+def get_logger(name=__name__):
+    """
+    Returns a logger instance with predefined settings.
+    When called with a specific name (e.g., __name__), it retrieves
+    a child logger that inherits settings (like handlers) from the root logger.
+    """
+    return logging.getLogger(name)
+# --- Image Transformation Utilities ---
+def get_train_transform():
+    """Get image transform for training (resize, horizontal flip, normalize)"""
+    return transforms.Compose([
+        transforms.Resize((224, 224)), # Resize images to 224x224 pixels
+        transforms.RandomHorizontalFlip(), # Randomly flip images horizontally for data augmentation
+        transforms.ToTensor(), # Convert PIL Image or numpy.ndarray to tensor
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], # Normalize pixel values
+                             std=[0.229, 0.224, 0.225])
+    ])
+def get_eval_transform():
+    """Get image transform for evaluation/inference (resize, normalize)"""
+    return transforms.Compose([
+        transforms.Resize((224, 224)), # Resize images to 224x224 pixels
+        transforms.ToTensor(), # Convert PIL Image or numpy.ndarray to tensor
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], # Normalize pixel values
+                             std=[0.229, 0.224, 0.225])
+    ])
+# --- Dataset Analysis Utility (moved from original script for modularity) ---
+class DatasetAnalyzer:
+    """Utility class to analyze COCO dataset statistics."""
+    @staticmethod
+    def analyze_captions(caption_file, max_samples=None):
+        """
+        Analyzes caption statistics from a COCO-format JSON file.
+        Args:
+            caption_file (str): Path to the COCO captions JSON file.
+            max_samples (int, optional): Maximum number of captions to analyze.
+                                         Useful for large datasets. Defaults to None (all).
+        Returns:
+            dict: A dictionary containing various caption statistics.
+        """
+        try:
+            with open(caption_file, 'r') as f:
+                data = json.load(f)
+        except FileNotFoundError:
+            root_logger.error(f"Caption file not found for analysis: {caption_file}")
+            return {}
+        except json.JSONDecodeError:
+            root_logger.error(f"Error decoding JSON from {caption_file}. Ensure it's valid.")
+            return {}
+        captions = [ann['caption'] for ann in data['annotations']]
+        if max_samples:
+            captions = captions[:max_samples]
+        # Basic statistics
+        lengths = [len(caption.split()) for caption in captions]
+        stats = {
+            'total_captions': len(captions),
+            'avg_length': np.mean(lengths) if lengths else 0,
+            'std_length': np.std(lengths) if lengths else 0,
+            'min_length': min(lengths) if lengths else 0,
+            'max_length': max(lengths) if lengths else 0,
+            'median_length': np.median(lengths) if lengths else 0
+        }
+        # Word frequency
+        all_words = []
+        from collections import Counter # Import here to avoid circular dependency issues if Counter is used elsewhere
+        for caption in captions:
+            words = caption.lower().split()
+            all_words.extend(words)
+        word_freq = Counter(all_words)
+        stats['unique_words'] = len(word_freq)
+        stats['most_common_words'] = word_freq.most_common(20)
+        return stats
+    @staticmethod
+    def plot_length_distribution(caption_file, max_samples=None, save_path=None):
+        """
+        Plots the distribution of caption lengths.
+        Args:
+            caption_file (str): Path to the COCO captions JSON file.
+            max_samples (int, optional): Maximum number of captions to plot. Defaults to None (all).
+            save_path (str, optional): Path to save the plot. If None, displays the plot.
+        """
+        try:
+            with open(caption_file, 'r') as f:
+                data = json.load(f)
+        except FileNotFoundError:
+            root_logger.error(f"Caption file not found for plotting: {caption_file}")
+            return
+        except json.JSONDecodeError:
+            root_logger.error(f"Error decoding JSON from {caption_file}. Ensure it's valid.")
+            return
+        captions = [ann['caption'] for ann in data['annotations']]
+        if max_samples:
+            captions = captions[:max_samples]
+        lengths = [len(caption.split()) for caption in captions]
+        plt.figure(figsize=(10, 6))
+        plt.hist(lengths, bins=50, alpha=0.7, edgecolor='black')
+        plt.xlabel('Caption Length (words)')
+        plt.ylabel('Frequency')
+        plt.title('Distribution of Caption Lengths')
+        plt.grid(True, alpha=0.3)
+        if save_path:
+            plt.savefig(save_path, bbox_inches='tight', dpi=150)
+            root_logger.info(f"Caption length distribution plot saved to {save_path}")
+        else:
+            plt.show()
+# Import json here as it's used by DatasetAnalyzer
+import json
+# --- Attention Visualization Utility ---
+def visualize_attention(model, image_path, vocabulary, device, save_path=None, max_words_to_show=10):
+    """
+    Visualizes attention weights on an image for a generated caption.
+    This function requires the model to have a `generate_caption` method
+    and access to the encoder and decoder components to extract attention.
+    Args:
+        model (ImageCaptioningModel): The trained image captioning model.
+        image_path (str): Path to the image file for visualization.
+        vocabulary (COCOVocabulary): The vocabulary object.
+        device (torch.device): Device to run the model on (cpu/cuda).
+        save_path (str, optional): Path to save the visualization plot. If None, displays the plot.
+        max_words_to_show (int): Maximum number of words to visualize attention for.
+    """
+    logger = get_logger(__name__) # Get logger for this function
+    model.eval() # Set model to evaluation mode
+    # Load and preprocess image
+    transform = get_eval_transform()
+    try:
+        image = Image.open(image_path).convert('RGB')
+    except FileNotFoundError:
+        logger.error(f"Image not found at {image_path} for attention visualization.")
+        return
+    except Exception as e:
+        logger.error(f"Error loading image {image_path} for attention visualization: {e}")
+        return
+    image_tensor = transform(image).unsqueeze(0).to(device) # Add batch dimension
+    with torch.no_grad():
+        # Get encoder output
+        # (1, encoder_dim, encoded_image_size, encoded_image_size)
+        encoder_out = model.encoder(image_tensor)
+        # Reshape for attention: (1, num_pixels, encoder_dim)
+        encoder_out_reshaped = encoder_out.permute(0, 2, 3, 1).contiguous()
+        encoder_out_reshaped = encoder_out_reshaped.view(1, -1, model.encoder_dim)
+        # Initialize decoder states
+        h, c = model.decoder.init_hidden_state(encoder_out_reshaped)
+        # Start of sentence token
+        word_idx = vocabulary.word2idx['<START>']
+        caption_words = []
+        attention_weights = []
+        # Generate caption word by word and collect attention weights
+        # Iterate up to max_words_to_show or until <END> token is generated
+        for _ in range(model.decoder.max_caption_length_for_inference): # Use model's max_length
+            if word_idx == vocabulary.word2idx['<END>'] or len(caption_words) >= max_words_to_show:
+                break
+            # Get embeddings for current word
+            # (1, embed_dim)
+            embeddings = model.decoder.embedding(torch.LongTensor([word_idx]).to(device))
+            # Get attention-weighted encoding and alpha
+            # alpha: (1, num_pixels)
+            awe, alpha = model.decoder.attention(encoder_out_reshaped, h)
+            attention_weights.append(alpha.cpu().numpy())
+            # Apply gate to attention-weighted encoding
+            gate = model.decoder.sigmoid(model.decoder.f_beta(h))
+            awe = gate * awe
+            # Perform one step of LSTM decoding
+            h, c = model.decoder.decode_step(
+                torch.cat([embeddings, awe], dim=1),
+                (h, c)
+            )
+            # Predict next word
+            scores = model.decoder.fc(h) # (1, vocab_size)
+            word_idx = scores.argmax(dim=1).item() # Get the index of the predicted word
+            word = vocabulary.idx2word[word_idx]
+            caption_words.append(word)
+    # Visualize the attention maps
+    num_plots = len(caption_words)
+    if num_plots == 0:
+        logger.warning("No words generated for attention visualization. Cannot create plot.")
+        return
+    # Adjust figure size dynamically based on number of plots
+    fig, axes = plt.subplots(1, num_plots, figsize=(4 * num_plots, 5))
+    if num_plots == 1: # Ensure axes is iterable even for single plot
+        axes = [axes]
+    for i, (word, alpha) in enumerate(zip(caption_words, attention_weights)):
+        # Reshape attention to encoder's spatial size (e.g., 14x14 for ResNet50)
+        # Assuming encoded_image_size is available in model.encoder
+        enc_img_size = model.encoder.encoded_image_size
+        alpha_img = alpha.reshape(enc_img_size, enc_img_size)
+        # Resize attention map to original image size for overlay
+        alpha_img_resized = Image.fromarray(alpha_img * 255).resize(image.size, Image.LANCZOS)
+        alpha_img_np = np.array(alpha_img_resized) / 255.0 # Normalize back to 0-1
+        axes[i].imshow(image)
+        axes[i].imshow(alpha_img_np, alpha=0.6, cmap='jet') # Overlay attention map
+        axes[i].set_title(f'Word: {word}')
+        axes[i].axis('off')
+    plt.suptitle(f"Generated Caption (Attention Visualization): {' '.join(caption_words)}")
+    plt.tight_layout(rect=[0, 0.03, 1, 0.95]) # Adjust layout to prevent title overlap
+    if save_path:
+        os.makedirs(os.path.dirname(save_path), exist_ok=True)
+        plt.savefig(save_path, bbox_inches='tight', dpi=150)
+        logger.info(f"Attention visualization saved to {save_path}")
+    else:
+        plt.show()
+    return ' '.join(caption_words)

templates/auth.html ADDED Viewed

	@@ -0,0 +1,1283 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Dynamic Facial Recognition Authentication</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Inter', 'Segoe UI', system-ui, -apple-system, sans-serif;
+            background: #0B1121;
+            min-height: 100vh;
+            overflow-x: hidden;
+            position: relative;
+            color: #E5E7EB;
+            font-weight: 400;
+            letter-spacing: -0.01em;
+        }
+        /* Animated Background - Subtle particles matching main theme */
+        .bg-particles {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            pointer-events: none;
+            z-index: 1;
+        }
+        .particle {
+            position: absolute;
+            background: rgba(59, 130, 246, 0.1);
+            border-radius: 50%;
+            animation: float 8s ease-in-out infinite;
+        }
+        .particle:nth-child(1) { width: 60px; height: 60px; left: 10%; animation-delay: 0s; }
+        .particle:nth-child(2) { width: 80px; height: 80px; left: 20%; animation-delay: 3s; }
+        .particle:nth-child(3) { width: 40px; height: 40px; left: 60%; animation-delay: 1s; }
+        .particle:nth-child(4) { width: 70px; height: 70px; left: 80%; animation-delay: 4s; }
+        .particle:nth-child(5) { width: 30px; height: 30px; left: 70%; animation-delay: 2s; }
+        @keyframes float {
+            0%, 100% { transform: translateY(0px) rotate(0deg); opacity: 0.3; }
+            50% { transform: translateY(-30px) rotate(180deg); opacity: 0.6; }
+        }
+        /* Main Container */
+        .auth-wrapper {
+            min-height: 100vh;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            padding: 20px;
+            position: relative;
+            z-index: 2;
+            background: radial-gradient(circle at 20% 80%, rgba(59, 130, 246, 0.1) 0%, transparent 50%),
+                        radial-gradient(circle at 80% 20%, rgba(99, 102, 241, 0.1) 0%, transparent 50%);
+        }
+        .auth-container {
+            background: #1E293B;
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 0;
+            box-shadow: 0 10px 25px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
+            width: 100%;
+            max-width: 450px;
+            position: relative;
+            overflow: hidden;
+            transform: translateY(20px);
+            animation: slideUp 0.8s ease-out forwards;
+        }
+        @keyframes slideUp {
+            to {
+                transform: translateY(0);
+                opacity: 1;
+            }
+        }
+        /* Header Section */
+        .auth-header {
+            background: linear-gradient(135deg, #1E293B 0%, #334155 100%);
+            padding: 30px;
+            text-align: center;
+            position: relative;
+            border-bottom: 1px solid #374151;
+        }
+        .auth-header::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            height: 3px;
+            background: linear-gradient(90deg, #3B82F6, #6366F1);
+            animation: glow 2s ease-in-out infinite alternate;
+        }
+        @keyframes glow {
+            from { box-shadow: 0 0 5px rgba(59, 130, 246, 0.5); }
+            to { box-shadow: 0 0 20px rgba(59, 130, 246, 0.8); }
+        }
+        .auth-title {
+            color: #FFFFFF;
+            font-size: 28px;
+            font-weight: 700;
+            margin-bottom: 10px;
+            letter-spacing: -0.02em;
+            transition: all 0.3s ease;
+        }
+        .auth-subtitle {
+            color: #94A3B8;
+            font-size: 14px;
+            font-weight: 400;
+        }
+        /* Tab Navigation */
+        .tab-navigation {
+            display: flex;
+            background: #111827;
+            margin: 0;
+            border-bottom: 1px solid #374151;
+        }
+        .tab-btn {
+            flex: 1;
+            padding: 20px;
+            background: none;
+            border: none;
+            color: #94A3B8;
+            font-size: 16px;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            position: relative;
+            overflow: hidden;
+            font-family: inherit;
+        }
+        .tab-btn::before {
+            content: '';
+            position: absolute;
+            bottom: 0;
+            left: 50%;
+            width: 0;
+            height: 3px;
+            background: linear-gradient(90deg, #3B82F6, #6366F1);
+            transition: all 0.3s ease;
+            transform: translateX(-50%);
+        }
+        .tab-btn.active {
+            color: #FFFFFF;
+            background: #1E293B;
+        }
+        .tab-btn.active::before {
+            width: 100%;
+        }
+        .tab-btn:hover {
+            background: #1E293B;
+            color: #FFFFFF;
+        }
+        /* Form Content */
+        .form-content {
+            padding: 40px;
+            background: #1E293B;
+        }
+        .form-container {
+            position: relative;
+            min-height: 300px;
+        }
+        .form-panel {
+            position: absolute;
+            top: 0;
+            left: 0;
+            right: 0;
+            opacity: 0;
+            transform: translateX(30px);
+            transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+            pointer-events: none;
+        }
+        .form-panel.active {
+            opacity: 1;
+            transform: translateX(0);
+            pointer-events: all;
+        }
+        .form-group {
+            margin-bottom: 25px;
+            position: relative;
+        }
+        .input-wrapper {
+            position: relative;
+            overflow: hidden;
+            border-radius: 8px;
+        }
+        .form-input {
+            width: 100%;
+            padding: 16px 20px;
+            border: 1px solid #374151;
+            border-radius: 8px;
+            font-size: 16px;
+            background: #111827;
+            color: #E5E7EB;
+            transition: all 0.3s ease;
+            font-family: inherit;
+        }
+        .form-input::placeholder {
+            color: #6B7280;
+        }
+        .form-input:focus {
+            outline: none;
+            border-color: #3B82F6;
+            background: #111827;
+            box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);
+            transform: translateY(-1px);
+        }
+        .input-icon {
+            position: absolute;
+            right: 15px;
+            top: 50%;
+            transform: translateY(-50%);
+            color: #6B7280;
+            font-size: 18px;
+            transition: all 0.3s ease;
+        }
+        .form-input:focus + .input-icon {
+            color: #3B82F6;
+            transform: translateY(-50%) scale(1.1);
+        }
+        /* Buttons */
+        .btn {
+            width: 100%;
+            padding: 16px;
+            border: none;
+            border-radius: 8px;
+            font-size: 16px;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.2s ease;
+            margin-bottom: 15px;
+            position: relative;
+            overflow: hidden;
+            font-family: inherit;
+            letter-spacing: -0.01em;
+        }
+        .btn::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: -100%;
+            width: 100%;
+            height: 100%;
+            background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.1), transparent);
+            transition: left 0.5s;
+        }
+        .btn:hover::before {
+            left: 100%;
+        }
+        .btn-primary {
+            background: #3B82F6;
+            color: #FFFFFF;
+            box-shadow: 0 4px 14px 0 rgba(59, 130, 246, 0.25);
+        }
+        .btn-primary:hover {
+            background: #2563EB;
+            transform: translateY(-1px);
+            box-shadow: 0 6px 20px 0 rgba(59, 130, 246, 0.35);
+        }
+        .btn-secondary {
+            background: #6366F1;
+            color: #FFFFFF;
+            box-shadow: 0 4px 14px 0 rgba(99, 102, 241, 0.25);
+        }
+        .btn-secondary:hover {
+            background: #5B21B6;
+            transform: translateY(-1px);
+            box-shadow: 0 6px 20px 0 rgba(99, 102, 241, 0.35);
+        }
+        .btn-danger {
+            background: #EF4444;
+            color: #FFFFFF;
+            box-shadow: 0 4px 14px 0 rgba(239, 68, 68, 0.25);
+        }
+        .btn-danger:hover {
+            background: #DC2626;
+            transform: translateY(-1px);
+            box-shadow: 0 6px 20px 0 rgba(239, 68, 68, 0.35);
+        }
+        .btn:disabled {
+            opacity: 0.6;
+            cursor: not-allowed;
+            transform: none;
+        }
+        .btn:disabled:hover {
+            transform: none;
+        }
+        /* Divider */
+        .divider {
+            display: flex;
+            align-items: center;
+            margin: 30px 0;
+            color: #6B7280;
+        }
+        .divider::before,
+        .divider::after {
+            content: '';
+            flex: 1;
+            height: 1px;
+            background: linear-gradient(90deg, transparent, #374151, transparent);
+        }
+        .divider span {
+            padding: 0 20px;
+            font-size: 14px;
+            font-weight: 500;
+            background: #1E293B;
+            border-radius: 20px;
+            border: 1px solid #374151;
+        }
+        /* Face Auth Buttons */
+        .face-auth-section {
+            margin: 20px 0;
+        }
+        .face-auth-section button {
+            background:#4079f4;
+        }
+        .face-auth-section button:hover {
+             background: #2563EB;
+        }
+        .face-btn-group {
+            display: flex;
+            gap: 12px;
+        }
+        .face-btn-group .btn {
+            flex: 1;
+            margin-bottom: 0;
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            gap: 8px;
+        }
+        .face-icon {
+            font-size: 20px;
+            animation: pulse 2s infinite;
+        }
+        @keyframes pulse {
+            0%, 100% { transform: scale(1); }
+            50% { transform: scale(1.05); }
+        }
+        /* Status Messages */
+        .status-message {
+            padding: 15px 20px;
+            border-radius: 8px;
+            margin-bottom: 25px;
+            text-align: center;
+            font-weight: 500;
+            transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+            display: none;
+            animation: slideIn 0.4s ease-out;
+        }
+        @keyframes slideIn {
+            from {
+                opacity: 0;
+                transform: translateY(-10px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        .status-message.success {
+            background: rgba(34, 197, 94, 0.1);
+            color: #22C55E;
+            border: 1px solid rgba(34, 197, 94, 0.2);
+        }
+        .status-message.error {
+            background: rgba(239, 68, 68, 0.1);
+            color: #EF4444;
+            border: 1px solid rgba(239, 68, 68, 0.2);
+        }
+        .status-message.info {
+            background: rgba(59, 130, 246, 0.1);
+            color: #3B82F6;
+            border: 1px solid rgba(59, 130, 246, 0.2);
+        }
+        /* Webcam Section */
+        .webcam-section {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: rgba(11, 17, 33, 0.95);
+            display: none;
+            align-items: center;
+            justify-content: center;
+            z-index: 1000;
+            backdrop-filter: blur(20px);
+            animation: fadeIn 0.3s ease-out;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; }
+            to { opacity: 1; }
+        }
+        .webcam-section.active {
+            display: flex;
+        }
+        .webcam-container {
+            background: #1E293B;
+            border: 1px solid #334155;
+            border-radius: 16px;
+            padding: 30px;
+            text-align: center;
+            max-width: 500px;
+            width: 90%;
+            animation: zoomIn 0.4s cubic-bezier(0.4, 0, 0.2, 1);
+            box-shadow: 0 10px 25px -3px rgba(0, 0, 0, 0.3);
+        }
+        @keyframes zoomIn {
+            from {
+                opacity: 0;
+                transform: scale(0.8);
+            }
+            to {
+                opacity: 1;
+                transform: scale(1);
+            }
+        }
+        .video-container {
+            position: relative;
+            margin: 20px 0;
+            border-radius: 12px;
+            overflow: hidden;
+            box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+        }
+        #webcamVideo {
+            width: 100%;
+            height: auto;
+            max-width: 400px;
+            display: block;
+            margin: 0 auto;
+            border-radius: 12px;
+            transition: all 0.3s ease;
+        }
+        #webcamCanvas {
+            display: none;
+        }
+        .webcam-controls {
+            display: flex;
+            gap: 15px;
+            justify-content: center;
+            margin-top: 25px;
+            flex-wrap: wrap;
+        }
+        .webcam-controls .btn {
+            flex: 1;
+            min-width: 140px;
+            margin-bottom: 0;
+        }
+        /* Loading Spinner */
+        .loading-overlay {
+            position: fixed;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            background: rgba(11, 17, 33, 0.95);
+            display: none;
+            align-items: center;
+            justify-content: center;
+            z-index: 9999;
+            backdrop-filter: blur(10px);
+        }
+        .loading-overlay.active {
+            display: flex;
+        }
+        .loading-spinner {
+            text-align: center;
+            color: #E5E7EB;
+        }
+        .spinner {
+            width: 60px;
+            height: 60px;
+            border: 4px solid #374151;
+            border-top: 4px solid #3B82F6;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+            margin: 0 auto 20px;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Security Note */
+        .security-note {
+            /* background: #111827; */
+            border: 1px solid #374151;
+            border-radius: 8px;
+            padding: 20px;
+            margin-top: 30px;
+            font-size: 14px;
+            /* color: #94A3B8; */
+            text-align: center;
+            /* animation: fadeInUp 0.6s ease-out 0.5s both; */
+        }
+        @keyframes fadeInUp {
+            from {
+                opacity: 0;
+                transform: translateY(20px);
+            }
+            to {
+                opacity: 1;
+                transform: translateY(0);
+            }
+        }
+        .security-icon {
+            color: #3B82F6;
+            margin-right: 8px;
+            font-size: 16px;
+            animation: rotate 6s linear infinite;
+        }
+        @keyframes rotate {
+            from { transform: rotate(0deg); }
+            to { transform: rotate(360deg); }
+        }
+        /* Responsive Design */
+        @media (max-width: 768px) {
+            .auth-container {
+                margin: 10px;
+                border-radius: 12px;
+            }
+            .auth-header {
+                padding: 25px 20px;
+            }
+            .form-content {
+                padding: 30px 25px;
+            }
+            .auth-title {
+                font-size: 24px;
+            }
+            .face-btn-group {
+                flex-direction: column;
+            }
+            .webcam-controls {
+                flex-direction: column;
+            }
+            .webcam-controls .btn {
+                min-width: auto;
+            }
+            .particle {
+                opacity: 0.2;
+            }
+        }
+        @media (max-width: 480px) {
+            .auth-container {
+                margin: 5px;
+            }
+            .tab-btn {
+                padding: 15px 10px;
+                font-size: 14px;
+            }
+            .form-content {
+                padding: 25px 20px;
+            }
+        }
+        /* Custom scrollbar */
+        ::-webkit-scrollbar {
+            width: 8px;
+        }
+        ::-webkit-scrollbar-track {
+            background: #1E293B;
+            border-radius: 10px;
+        }
+        ::-webkit-scrollbar-thumb {
+            background: linear-gradient(135deg, #3B82F6 0%, #6366F1 100%);
+            border-radius: 10px;
+        }
+        ::-webkit-scrollbar-thumb:hover {
+            background: linear-gradient(135deg, #2563EB 0%, #5B21B6 100%);
+        }
+    </style>
+</head>
+<body>
+    <!-- Animated Background -->
+    <div class="bg-particles">
+        <div class="particle"></div>
+        <div class="particle"></div>
+        <div class="particle"></div>
+        <div class="particle"></div>
+        <div class="particle"></div>
+    </div>
+    <!-- Loading Overlay -->
+    <div id="loadingOverlay" class="loading-overlay">
+        <div class="loading-spinner">
+            <div class="spinner"></div>
+            <p>Processing...</p>
+        </div>
+    </div>
+    <!-- Main Wrapper -->
+    <div class="auth-wrapper">
+        <div class="auth-container">
+            <!-- Header -->
+            <div class="auth-header">
+                <h1 id="authModeTitle" class="auth-title">Welcome Back</h1>
+                <p id="authSubtitle" class="auth-subtitle">Secure authentication with facial recognition</p>
+            </div>
+            <!-- Tab Navigation -->
+            <div class="tab-navigation">
+                <button id="loginTab" class="tab-btn active">
+                    🔑 Login
+                </button>
+                <button id="registerTab" class="tab-btn">
+                    👤 Register
+                </button>
+            </div>
+            <!-- Form Content -->
+            <div class="form-content">
+                <!-- Flash messages from Flask are handled here -->
+                {% with messages = get_flashed_messages(with_categories=true) %}
+                    {% if messages %}
+                        <div id="flaskStatusMessage" class="status-message">
+                            {% for category, message in messages %}
+                                <div class="flash-message flash-{{ category }}">{{ message }}</div>
+                            {% endfor %}
+                        </div>
+                    {% else %}
+                        <div id="authStatusMessage" class="status-message"></div>
+                    {% endif %}
+                {% endwith %}
+                <div class="form-container">
+                    <!-- Login Panel -->
+                    <div id="loginPanel" class="form-panel active">
+                        <form id="loginForm">
+                            <div class="form-group">
+                                <div class="input-wrapper">
+                                    <input type="email" id="loginEmail" name="email" class="form-input"
+                                           placeholder="Enter your email address" required>
+                                    <div class="input-icon">📧</div>
+                                </div>
+                            </div>
+                            <div class="form-group">
+                                <div class="input-wrapper">
+                                    <input type="password" id="loginPassword" name="password" class="form-input"
+                                           placeholder="Enter your password" required>
+                                    <div class="input-icon">🔒</div>
+                                </div>
+                            </div>
+                            <button type="submit" class="btn btn-primary">
+                                🚀 Sign In
+                            </button>
+                        </form>
+                        <div class="divider">
+                            <span>or continue with</span>
+                        </div>
+                        <div class="face-auth-section">
+                            <button type="button" id="faceLoginBtn" class="btn btn-secondary">
+                                <span class="face-icon">👁️</span>
+                                Face Recognition
+                            </button>
+                        </div>
+                    </div>
+                    <!-- Register Panel -->
+                    <div id="registerPanel" class="form-panel">
+                        <form id="registerForm">
+                            <div class="form-group">
+                                <div class="input-wrapper">
+                                    <input type="email" id="registerEmail" name="email" class="form-input"
+                                           placeholder="Enter your email address" required>
+                                    <div class="input-icon">📧</div>
+                                </div>
+                            </div>
+                            <div class="form-group">
+                                <div class="input-wrapper">
+                                    <input type="password" id="registerPassword" name="password" class="form-input"
+                                           placeholder="Create a strong password" required minlength="6">
+                                    <div class="input-icon">🔐</div>
+                                </div>
+                            </div>
+                            <button type="submit" class="btn btn-primary">
+                                ✨ Create Account
+                            </button>
+                        </form>
+                        <div class="divider">
+                            <span>enhance security with</span>
+                        </div>
+                        <div class="face-auth-section">
+                            <button type="button" id="faceRegisterBtn" class="btn btn-secondary">
+                                <span class="face-icon">📸</span>
+                                Setup Face ID
+                            </button>
+                        </div>
+                    </div>
+                </div>
+                <!-- Security Note -->
+                <div class="security-note">
+                    <span class="security-icon"></span>
+                    <!-- Your facial data is encrypted and stored locally with bank-level security  -->
+                </div>
+            </div>
+        </div>
+    </div>
+    <!-- Webcam Section -->
+    <div id="webcamSection" class="webcam-section">
+        <div class="webcam-container">
+            <h2 style="color: white; margin-bottom: 20px;">📷 Face Recognition</h2>
+            <div class="video-container">
+                <video id="webcamVideo" autoplay muted playsinline></video>
+                <canvas id="webcamCanvas"></canvas>
+            </div>
+            <div class="webcam-controls">
+                <button id="captureFaceBtn" class="btn btn-primary" style="display: none;">
+                    📸 Capture Face (0/3)
+                </button>
+                <button id="cancelWebcamBtn" class="btn btn-danger">
+                    ❌ Cancel
+                </button>
+            </div>
+        </div>
+    </div>
+    <script>
+        document.addEventListener('DOMContentLoaded', () => {
+            // DOM Elements
+            const authModeTitle = document.getElementById('authModeTitle');
+            const authSubtitle = document.getElementById('authSubtitle');
+            const loginTab = document.getElementById('loginTab');
+            const registerTab = document.getElementById('registerTab');
+            const loginPanel = document.getElementById('loginPanel');
+            const registerPanel = document.getElementById('registerPanel');
+            const loginForm = document.getElementById('loginForm');
+            const registerForm = document.getElementById('registerForm');
+            const authStatusMessage = document.getElementById('authStatusMessage'); // JS-driven message
+            const flaskStatusMessage = document.getElementById('flaskStatusMessage'); // Flask flash message container
+            const webcamSection = document.getElementById('webcamSection');
+            const webcamVideo = document.getElementById('webcamVideo');
+            const webcamCanvas = document.getElementById('webcamCanvas');
+            const captureFaceBtn = document.getElementById('captureFaceBtn');
+            const cancelWebcamBtn = document.getElementById('cancelWebcamBtn');
+            const loadingOverlay = document.getElementById('loadingOverlay');
+            const faceRegisterBtn = document.getElementById('faceRegisterBtn');
+            const faceLoginBtn = document.getElementById('faceLoginBtn');
+            let currentStream;
+            let captureCount = 0;
+            const MAX_CAPTURES = 3;
+            let capturedImages = [];
+            let currentAuthMode = 'login'; // Track current mode for webcam behavior
+            // --- Utility Functions ---
+            const showMessage = (message, type = 'info') => {
+                // Clear any existing Flask messages when showing JS messages
+                if (flaskStatusMessage) {
+                    flaskStatusMessage.innerHTML = '';
+                    flaskStatusMessage.style.display = 'none';
+                }
+                if (message) {
+                    authStatusMessage.textContent = message;
+                    authStatusMessage.className = `status-message ${type}`;
+                    authStatusMessage.style.display = 'block';
+                } else {
+                    authStatusMessage.style.display = 'none';
+                }
+                console.log(`UI Message (${type}): ${message}`);
+            };
+            const showLoading = (show) => {
+                if (show) {
+                    loadingOverlay.classList.add('active');
+                } else {
+                    loadingOverlay.classList.remove('active');
+                }
+            };
+            const resetCapture = () => {
+                captureCount = 0;
+                capturedImages = [];
+                captureFaceBtn.textContent = `📸 Capture Face (0/${MAX_CAPTURES})`;
+            };
+            // --- Tab Functions ---
+            const showLoginMode = () => {
+                currentAuthMode = 'login';
+                authModeTitle.textContent = 'Welcome Back';
+                authSubtitle.textContent = 'Secure authentication with facial recognition';
+                loginTab.classList.add('active');
+                registerTab.classList.remove('active');
+                loginPanel.classList.add('active');
+                registerPanel.classList.remove('active');
+                showMessage(''); // Clear JS messages when changing tabs
+                hideWebcam();
+            };
+            const showRegisterMode = () => {
+                currentAuthMode = 'register';
+                authModeTitle.textContent = 'Join Us Today';
+                authSubtitle.textContent = 'Create your secure account with advanced biometrics';
+                registerTab.classList.add('active');
+                loginTab.classList.remove('active');
+                registerPanel.classList.add('active');
+                loginPanel.classList.remove('active');
+                showMessage(''); // Clear JS messages when changing tabs
+                hideWebcam();
+            };
+            // --- Webcam Functions ---
+            const startWebcam = async () => {
+                try {
+                    console.log('Starting webcam...');
+                    resetCapture(); // Reset captures every time webcam starts
+                    const stream = await navigator.mediaDevices.getUserMedia({
+                        video: {
+                            width: { ideal: 640 },
+                            height: { ideal: 480 },
+                            facingMode: 'user'
+                        }
+                    });
+                    webcamVideo.srcObject = stream;
+                    currentStream = stream;
+                    webcamSection.classList.add('active');
+                    // Wait for video to be ready
+                    webcamVideo.onloadedmetadata = () => {
+                        console.log('Webcam ready');
+                        if (currentAuthMode === 'register') {
+                            captureFaceBtn.style.display = 'block';
+                            showMessage(`Position your face clearly in the camera. You need to capture ${MAX_CAPTURES} images.`, 'info');
+                            captureFaceBtn.textContent = `📸 Capture Face (0/${MAX_CAPTURES})`; // Reset text
+                        } else { // Login mode
+                            showMessage('Position your face clearly in the camera for recognition.', 'info');
+                            captureFaceBtn.style.display = 'none'; // No manual capture button for login
+                            // For login, capture automatically after a short delay
+                            setTimeout(() => {
+                                captureForLogin();
+                            }, 2000); // Give user 2 seconds to position face
+                        }
+                    };
+                } catch (error) {
+                    console.error('Error accessing webcam:', error);
+                    let errorMessage = 'Error accessing camera. Please ensure camera permissions are granted.';
+                    if (error.name === 'NotAllowedError') {
+                        errorMessage = 'Camera access denied. Please allow camera access in your browser settings.';
+                    } else if (error.name === 'NotFoundError') {
+                        errorMessage = 'No camera found. Please ensure a camera is connected and working.';
+                    }
+                    showMessage(errorMessage, 'error');
+                }
+            };
+            const hideWebcam = () => {
+                webcamSection.classList.remove('active');
+                captureFaceBtn.style.display = 'none';
+                if (currentStream) {
+                    currentStream.getTracks().forEach(track => track.stop());
+                    currentStream = null;
+                }
+                resetCapture(); // Also reset captures when hiding webcam
+            };
+            const captureImage = () => {
+                const canvas = webcamCanvas;
+                const context = canvas.getContext('2d');
+                const video = webcamVideo;
+                // Set canvas dimensions to match video
+                canvas.width = video.videoWidth;
+                canvas.height = video.videoHeight;
+                // Draw current video frame to canvas
+                context.drawImage(video, 0, 0, canvas.width, canvas.height);
+                // Get base64 image data
+                const imageData = canvas.toDataURL('image/jpeg', 0.8); // 0.8 quality for smaller size
+                console.log('Image captured, data length:', imageData.length);
+                return imageData;
+            };
+            const captureForRegister = () => {
+                if (captureCount >= MAX_CAPTURES) {
+                    console.log('Max captures already reached for registration.');
+                    return;
+                }
+                const imageData = captureImage();
+                if (imageData) {
+                    capturedImages.push(imageData);
+                    captureCount++;
+                    captureFaceBtn.textContent = `📸 Capture Face (${captureCount}/${MAX_CAPTURES})`;
+                    console.log(`Captured image ${captureCount}/${MAX_CAPTURES}`);
+                    if (captureCount === MAX_CAPTURES) {
+                        showMessage('All images captured! Processing registration...', 'info');
+                        captureFaceBtn.disabled = true; // Disable button after all captures
+                        setTimeout(() => submitFaceRegistration(), 500); // Small delay before submitting
+                    } else {
+                        showMessage(`Captured image ${captureCount}/${MAX_CAPTURES}. Capture ${MAX_CAPTURES - captureCount} more.`, 'success');
+                    }
+                } else {
+                    showMessage('Failed to capture image. Please try again.', 'error');
+                }
+            };
+            const captureForLogin = () => {
+                const imageData = captureImage();
+                if (imageData) {
+                    console.log('Image captured for login');
+                    showMessage('Image captured. Processing login...', 'info');
+                    submitFaceLogin(imageData);
+                } else {
+                    showMessage('Failed to capture image for login. Please try again.', 'error');
+                    hideWebcam(); // Close webcam on failure for login
+                }
+            };
+            // --- API Functions ---
+            const submitFaceRegistration = async () => {
+                showLoading(true);
+                const email = document.getElementById('registerEmail').value;
+                const password = document.getElementById('registerPassword').value;
+                if (!email || !password) {
+                    showMessage('Please enter email and password before capturing face images.', 'error');
+                    showLoading(false);
+                    captureFaceBtn.disabled = false; // Re-enable button
+                    return;
+                }
+                if (password.length < 6) {
+                    showMessage('Password must be at least 6 characters long.', 'error');
+                    showLoading(false);
+                    captureFaceBtn.disabled = false; // Re-enable button
+                    return;
+                }
+                try {
+                    console.log('Submitting face registration with', capturedImages.length, 'images');
+                    const response = await fetch('/face_register', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json',
+                        },
+                        body: JSON.stringify({
+                            email: email,
+                            password: password,
+                            images: capturedImages
+                        })
+                    });
+                    const result = await response.json();
+                    console.log('Face registration response:', result);
+                    if (result.success) {
+                        showMessage(result.message, 'success');
+                        hideWebcam();
+                        setTimeout(() => {
+                            showLoginMode(); // Redirect to login after successful registration
+                        }, 2000);
+                    } else {
+                        showMessage(result.message, 'error');
+                        resetCapture();
+                        captureFaceBtn.disabled = false; // Re-enable button
+                        captureFaceBtn.style.display = 'block'; // Show button again
+                    }
+                } catch (error) {
+                    console.error('Face registration error:', error);
+                    showMessage('Network error during face registration. Please try again.', 'error');
+                    resetCapture();
+                    captureFaceBtn.disabled = false; // Re-enable button
+                    captureFaceBtn.style.display = 'block'; // Show button again
+                } finally {
+                    showLoading(false);
+                }
+            };
+            const submitFaceLogin = async (imageData) => {
+                showLoading(true);
+                try {
+                    console.log('Submitting face login');
+                    const response = await fetch('/face_login', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json',
+                        },
+                        body: JSON.stringify({
+                            image: imageData
+                        })
+                    });
+                    const result = await response.json();
+                    console.log('Face login response:', result);
+                    if (result.success) {
+                        showMessage(result.message, 'success');
+                        hideWebcam();
+                        setTimeout(() => {
+                            window.location.href = '/main_app'; // Redirect to main app page after login
+                        }, 1500);
+                    } else {
+                        showMessage(result.message, 'error');
+                        hideWebcam();
+                        // No need to restart webcam, user can click Face Recognition again
+                    }
+                } catch (error) {
+                    console.error('Face login error:', error);
+                    showMessage('Network error during face login. Please try again.', 'error');
+                    hideWebcam();
+                } finally {
+                    showLoading(false);
+                }
+            };
+            const submitLogin = async (formData) => {
+                showLoading(true);
+                try {
+                    const response = await fetch('/login', {
+                        method: 'POST',
+                        body: formData
+                    });
+                    const result = await response.json();
+                    if (result.success) {
+                        showMessage(result.message, 'success');
+                        setTimeout(() => {
+                            window.location.href = '/main_app'; // Redirect to main app page after login
+                        }, 1500);
+                    } else {
+                        showMessage(result.message, 'error');
+                    }
+                } catch (error) {
+                    console.error('Login error:', error);
+                    showMessage('Network error during login. Please try again.', 'error');
+                } finally {
+                    showLoading(false);
+                }
+            };
+            const submitRegister = async (formData) => {
+                showLoading(true);
+                try {
+                    const response = await fetch('/register', {
+                        method: 'POST',
+                        body: formData
+                    });
+                    const result = await response.json();
+                    if (result.success) {
+                        showMessage(result.message, 'success');
+                        setTimeout(() => {
+                            showLoginMode(); // Redirect to login after successful registration
+                        }, 2000);
+                    } else {
+                        showMessage(result.message, 'error');
+                    }
+                } catch (error) {
+                    console.error('Registration error:', error);
+                    showMessage('Network error during registration. Please try again.', 'error');
+                } finally {
+                    showLoading(false);
+                }
+            };
+            // --- Event Listeners ---
+            loginTab.addEventListener('click', showLoginMode);
+            registerTab.addEventListener('click', showRegisterMode);
+            loginForm.addEventListener('submit', (e) => {
+                e.preventDefault();
+                const formData = new FormData(e.target);
+                submitLogin(formData);
+            });
+            registerForm.addEventListener('submit', (e) => {
+                e.preventDefault();
+                const formData = new FormData(e.target);
+                submitRegister(formData);
+            });
+            faceRegisterBtn.addEventListener('click', () => {
+                const email = document.getElementById('registerEmail').value;
+                const password = document.getElementById('registerPassword').value;
+                if (!email || !password) {
+                    showMessage('Please enter email and password before setting up face registration.', 'error');
+                    return;
+                }
+                if (password.length < 6) {
+                    showMessage('Password must be at least 6 characters long.', 'error');
+                    return;
+                }
+                startWebcam(); // Start webcam for capturing multiple images
+            });
+            faceLoginBtn.addEventListener('click', () => {
+                startWebcam(); // Start webcam for a single login attempt
+            });
+            captureFaceBtn.addEventListener('click', () => {
+                if (currentAuthMode === 'register') {
+                    captureForRegister();
+                }
+                // No action needed for login on this button, as login capture is automatic
+            });
+            cancelWebcamBtn.addEventListener('click', () => {
+                hideWebcam();
+                showMessage('Camera cancelled.', 'info');
+            });
+            // Add input focus animations
+            const formInputs = document.querySelectorAll('.form-input');
+            formInputs.forEach(input => {
+                input.addEventListener('focus', () => {
+                    input.parentElement.style.transform = 'translateY(-2px)';
+                });
+                input.addEventListener('blur', () => {
+                    input.parentElement.style.transform = 'translateY(0)';
+                });
+            });
+            // Add button click animations
+            const allButtons = document.querySelectorAll('.btn');
+            allButtons.forEach(button => {
+                button.addEventListener('click', () => {
+                    button.style.transform = 'scale(0.98)';
+                    setTimeout(() => {
+                        button.style.transform = '';
+                    }, 150);
+                });
+            });
+            // Initialize the page
+            console.log('Dynamic Facial Recognition Authentication System Loaded');
+            // Check for Flask flash messages on page load and display them
+            if (flaskStatusMessage && flaskStatusMessage.children.length > 0) {
+                 flaskStatusMessage.style.display = 'block';
+                 // Hide JS-driven message if Flask messages are present
+                 authStatusMessage.style.display = 'none';
+                 // Set a timeout to fade out Flask messages if desired, similar to JS messages
+                 setTimeout(() => {
+                     flaskStatusMessage.style.opacity = '0';
+                     setTimeout(() => flaskStatusMessage.style.display = 'none', 300); // Allow fade out
+                 }, 5000); // Hide after 5 seconds
+            } else {
+                showMessage(''); // Clear initial message if no Flask messages
+            }
+            // Add some dynamic particle movement
+            const particles = document.querySelectorAll('.particle');
+            particles.forEach((particle, index) => {
+                setInterval(() => {
+                    const randomX = Math.random() * 100;
+                    const randomY = Math.random() * 100;
+                    particle.style.left = randomX + '%';
+                    particle.style.top = randomY + '%';
+                }, 8000 + index * 1000);
+            });
+        });
+    </script>
+</body>
+</html>

templates/index - Copy.txt ADDED Viewed

	@@ -0,0 +1,1343 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>VisionCraft AI - Custom Image Captioning & Segmentation</title>
+    <style>
+        /* All original CSS from website_for_image_captioning.txt */
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+        body {
+            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            overflow-x: hidden;
+            background-color: #f0f2f5; /* Added a light background for overall page */
+        }
+        /* Hero Section */
+        .hero {
+            height: 100vh;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            display: flex;
+            align-items: center;
+            justify-content: center;
+            position: relative;
+            overflow: hidden;
+        }
+        .hero::before {
+            content: '';
+            position: absolute;
+            width: 200%;
+            height: 200%;
+            background: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 100 100"><circle cx="25" cy="25" r="1" fill="rgba(255,255,255,0.1)"/><circle cx="75" cy="75" r="1" fill="rgba(255,255,255,0.1)"/><circle cx="50" cy="50" r="0.5" fill="rgba(255,255,255,0.2)"/></svg>') repeat;
+            animation: float 20s infinite linear;
+        }
+        @keyframes float {
+            0% { transform: translate(-50%, -50%); }
+            100% { transform: translate(-30%, -30%); }
+        }
+        .hero-content {
+            text-align: center;
+            z-index: 2;
+            position: relative;
+        }
+        .hero h1 {
+            font-size: 4rem;
+            font-weight: 700;
+            margin-bottom: 1rem;
+            background: linear-gradient(45deg, #fff, #f0f8ff);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+            animation: glow 2s ease-in-out infinite alternate;
+        }
+        @keyframes glow {
+            from { text-shadow: 0 0 20px rgba(255,255,255,0.5); }
+            to { text-shadow: 0 0 30px rgba(255,255,255,0.8); }
+        }
+        .hero p {
+            font-size: 1.5rem;
+            color: rgba(255,255,255,0.9);
+            margin-bottom: 2rem;
+            max-width: 800px;
+        }
+        .cta-buttons {
+            display: flex;
+            gap: 2rem;
+            justify-content: center;
+            flex-wrap: wrap;
+        }
+        .btn {
+            padding: 1rem 2rem;
+            border: none;
+            border-radius: 50px;
+            font-size: 1.1rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            text-decoration: none;
+            display: inline-flex;
+            align-items: center;
+            gap: 0.5rem;
+        }
+        .btn-primary {
+            background: linear-gradient(45deg, #ff6b6b, #feca57);
+            color: white;
+            box-shadow: 0 10px 30px rgba(255,107,107,0.3);
+        }
+        .btn-primary:hover {
+            transform: translateY(-3px);
+            box-shadow: 0 15px 40px rgba(255,107,107,0.4);
+        }
+        .btn-secondary {
+            background: rgba(255,255,255,0.1);
+            color: white;
+            border: 2px solid rgba(255,255,255,0.3);
+            backdrop-filter: blur(10px);
+        }
+        .btn-secondary:hover {
+            background: rgba(255,255,255,0.2);
+            transform: translateY(-3px);
+        }
+        /* Navigation */
+        .navbar {
+            position: fixed;
+            top: 0;
+            width: 100%;
+            background: rgba(255,255,255,0.1);
+            backdrop-filter: blur(20px);
+            z-index: 1000;
+            padding: 1rem 0;
+            transition: all 0.3s ease;
+        }
+        .nav-container {
+            max-width: 1200px;
+            margin: 0 auto;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            padding: 0 2rem;
+        }
+        .logo {
+            font-size: 1.5rem;
+            font-weight: 700;
+            color: white;
+        }
+        .nav-links {
+            display: flex;
+            list-style: none;
+            gap: 2rem;
+        }
+        .nav-links a {
+            color: white;
+            text-decoration: none;
+            font-weight: 500;
+            transition: color 0.3s ease;
+        }
+        .nav-links a:hover {
+            color: #feca57;
+        }
+        /* Demo Section */
+        .demo-section {
+            padding: 5rem 0;
+            background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 0 2rem;
+        }
+        .section-title {
+            text-align: center;
+            font-size: 3rem;
+            font-weight: 700;
+            margin-bottom: 3rem;
+            background: linear-gradient(45deg, #667eea, #764ba2);
+            -webkit-background-clip: text;
+            -webkit-text-fill-color: transparent;
+            background-clip: text;
+        }
+        .demo-container {
+            display: grid;
+            grid-template-columns: 1fr 1fr;
+            gap: 3rem;
+            align-items: start;
+        }
+        .upload-area {
+            background: white;
+            border-radius: 20px;
+            padding: 3rem;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.1);
+            text-align: center;
+            border: 3px dashed #ddd;
+            transition: all 0.3s ease;
+            position: relative;
+            overflow: hidden;
+        }
+        .upload-area:hover {
+            border-color: #667eea;
+            transform: translateY(-5px);
+            box-shadow: 0 25px 80px rgba(0,0,0,0.15);
+        }
+        .upload-area::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: -100%;
+            width: 100%;
+            height: 100%;
+            background: linear-gradient(90deg, transparent, rgba(102,126,234,0.1), transparent);
+            transition: left 0.5s;
+        }
+        .upload-area:hover::before {
+            left: 100%;
+        }
+        .upload-icon {
+            font-size: 4rem;
+            color: #667eea;
+            margin-bottom: 1rem;
+        }
+        .results-panel {
+            background: white;
+            border-radius: 20px;
+            padding: 2rem;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.1);
+            min-height: 400px;
+        }
+        .tabs {
+            display: flex;
+            border-bottom: 2px solid #f0f0f0;
+            margin-bottom: 2rem;
+        }
+        .tab {
+            padding: 1rem 2rem;
+            cursor: pointer;
+            font-weight: 600;
+            color: #666;
+            border-bottom: 3px solid transparent;
+            transition: all 0.3s ease;
+        }
+        .tab.active {
+            color: #667eea;
+            border-color: #667eea;
+        }
+        .tab-content {
+            display: none;
+        }
+        .tab-content.active {
+            display: block;
+            animation: fadeIn 0.5s ease;
+        }
+        @keyframes fadeIn {
+            from { opacity: 0; transform: translateY(20px); }
+            to { opacity: 1; transform: translateY(0); }
+        }
+        /* Features Section */
+        .features-section {
+            padding: 5rem 0;
+            background: #fff;
+        }
+        .features-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(350px, 1fr));
+            gap: 2rem;
+            margin-top: 3rem;
+        }
+        .feature-card {
+            background: linear-gradient(135deg, #667eea, #764ba2);
+            border-radius: 20px;
+            padding: 2.5rem;
+            color: white;
+            position: relative;
+            overflow: hidden;
+            transition: transform 0.3s ease;
+        }
+        .feature-card:hover {
+            transform: translateY(-10px) scale(1.02);
+        }
+        .feature-card::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            right: -50%;
+            width: 100%;
+            height: 100%;
+            background: rgba(255,255,255,0.1);
+            transform: skewX(-15deg);
+            transition: right 0.5s ease;
+        }
+        .feature-card:hover::before {
+            right: 100%;
+        }
+        .feature-icon {
+            font-size: 3rem;
+            margin-bottom: 1rem;
+            display: block;
+        }
+        .feature-card h3 {
+            font-size: 1.5rem;
+            margin-bottom: 1rem;
+        }
+        /* Architecture Section */
+        .architecture-section {
+            padding: 5rem 0;
+            background: linear-gradient(135deg, #1e3c72 0%, #2a5298 100%);
+            color: white;
+        }
+        .architecture-viz {
+            background: rgba(255,255,255,0.1);
+            border-radius: 20px;
+            padding: 3rem;
+            margin-top: 3rem;
+            backdrop-filter: blur(10px);
+            border: 1px solid rgba(255,255,255,0.2);
+        }
+        .network-diagram {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin: 2rem 0;
+            flex-wrap: wrap;
+            gap: 2rem;
+        }
+        .network-node {
+            background: rgba(255,255,255,0.2);
+            border-radius: 15px;
+            padding: 1.5rem;
+            text-align: center;
+            backdrop-filter: blur(5px);
+            border: 1px solid rgba(255,255,255,0.3);
+            transition: all 0.3s ease;
+            cursor: pointer;
+            min-width: 150px;
+        }
+        .network-node:hover {
+            background: rgba(255,255,255,0.3);
+            transform: scale(1.05);
+        }
+        .arrow {
+            font-size: 2rem;
+            color: #feca57;
+        }
+        /* Metrics Section */
+        .metrics-section {
+            padding: 5rem 0;
+            background: #f8f9fa;
+        }
+        .metrics-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 2rem;
+            margin-top: 3rem;
+        }
+        .metric-card {
+            background: white;
+            border-radius: 15px;
+            padding: 2rem;
+            text-align: center;
+            box-shadow: 0 10px 30px rgba(0,0,0,0.1);
+            transition: transform 0.3s ease;
+        }
+        .metric-card:hover {
+            transform: translateY(-5px);
+        }
+        .metric-value {
+            font-size: 3rem;
+            font-weight: 700;
+            color: #667eea;
+            margin-bottom: 0.5rem;
+        }
+        .metric-label {
+            color: #666;
+            font-weight: 600;
+        }
+        /* Research Section */
+        .research-section {
+            padding: 5rem 0;
+            background: linear-gradient(135deg, #2c3e50 0%, #34495e 100%);
+            color: white;
+        }
+        .research-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
+            gap: 2rem;
+            margin-top: 3rem;
+        }
+        .research-card {
+            background: rgba(255,255,255,0.1);
+            border-radius: 15px;
+            padding: 2rem;
+            backdrop-filter: blur(10px);
+            border: 1px solid rgba(255,255,255,0.2);
+            transition: all 0.3s ease;
+        }
+        .research-card:hover {
+            background: rgba(255,255,255,0.2);
+            transform: translateY(-5px);
+        }
+        /* Footer */
+        .footer {
+            background: #1a1a1a;
+            color: white;
+            padding: 3rem 0;
+            text-align: center;
+        }
+        .footer-content {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 2rem;
+            margin-bottom: 2rem;
+        }
+        .footer-section h3 {
+            margin-bottom: 1rem;
+            color: #feca57;
+        }
+        .social-links {
+            display: flex;
+            justify-content: center;
+            gap: 1rem;
+            margin-top: 2rem;
+        }
+        .social-link {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            width: 50px;
+            height: 50px;
+            background: linear-gradient(45deg, #667eea, #764ba2);
+            border-radius: 50%;
+            color: white;
+            text-decoration: none;
+            font-size: 1.5rem;
+            transition: transform 0.3s ease;
+        }
+        .social-link:hover {
+            transform: scale(1.1) rotate(360deg);
+        }
+        /* Responsive Design */
+        @media (max-width: 768px) {
+            .hero h1 {
+                font-size: 2.5rem;
+            }
+            .demo-container {
+                grid-template-columns: 1fr;
+            }
+            .nav-links {
+                display: none;
+            }
+            .features-grid {
+                grid-template-columns: 1fr;
+            }
+            .network-diagram {
+                flex-direction: column;
+            }
+            .arrow {
+                transform: rotate(90deg);
+            }
+        }
+        /* Animations */
+        @keyframes pulse {
+            0%, 100% { transform: scale(1); }
+            50% { transform: scale(1.05); }
+        }
+        .pulse {
+            animation: pulse 2s infinite;
+        }
+        /* Processing Animation */
+        .processing {
+            display: inline-block;
+            width: 40px;
+            height: 40px;
+            border: 4px solid #f3f3f3;
+            border-top: 4px solid #667eea;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Progress Bar */
+        .progress-bar {
+            width: 100%;
+            height: 8px;
+            background: #f0f0f0;
+            border-radius: 4px;
+            overflow: hidden;
+            margin: 1rem 0;
+        }
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(45deg, #667eea, #764ba2);
+            width: 0%;
+            transition: width 0.3s ease;
+            border-radius: 4px;
+        }
+        /* --- NEW: Custom Styles for Flask Integration to blend with existing design --- */
+        /* These styles override/add to existing ones to make the Flask form look like part of the UI */
+        .flask-form-container {
+            background: white; /* Match demo-container's background */
+            border-radius: 20px; /* Match demo-container's border-radius */
+            padding: 3rem; /* Match demo-container's padding */
+            box-shadow: 0 20px 60px rgba(0,0,0,0.1); /* Match demo-container's shadow */
+            text-align: center;
+            border: 3px dashed #ddd; /* Match upload-area's dashed border */
+            transition: all 0.3s ease;
+            position: relative;
+            overflow: hidden;
+        }
+        /* Match hover effects of original upload-area */
+        .flask-form-container:hover {
+            border-color: #667eea;
+            transform: translateY(-5px);
+            box-shadow: 0 25px 80px rgba(0,0,0,0.15);
+        }
+        .flask-form-container::before {
+            content: '';
+            position: absolute;
+            top: 0;
+            left: -100%;
+            width: 100%;
+            height: 100%;
+            background: linear-gradient(90deg, transparent, rgba(102,126,234,0.1), transparent);
+            transition: left 0.5s;
+        }
+        .flask-form-container:hover::before {
+            left: 100%;
+        }
+        .flask-form-container h3 {
+            font-size: 1.8rem; /* Consistent heading size */
+            color: #333; /* Darker text for contrast */
+            margin-bottom: 1.5rem;
+        }
+        .flask-file-input {
+            display: block; /* Ensure it takes full width */
+            width: 100%;
+            padding: 0.8rem;
+            border: 1px solid #ddd;
+            border-radius: 5px;
+            margin-bottom: 1.5rem;
+            font-size: 1rem;
+            cursor: pointer;
+            background-color: #fff; /* White background */
+        }
+        /* Style the file input button (browser-specific styling) */
+        .flask-file-input::file-selector-button {
+            background-color: #667eea;
+            color: white;
+            border: none;
+            padding: 0.5rem 1rem;
+            border-radius: 5px;
+            cursor: pointer;
+            margin-right: 1rem;
+            transition: background-color 0.3s ease;
+        }
+        .flask-file-input::file-selector-button:hover {
+            background-color: #5567d4;
+        }
+        .flask-submit-btn {
+            /* Using btn-primary styles for consistency */
+            padding: 1rem 2rem;
+            border: none;
+            border-radius: 50px;
+            font-size: 1.1rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            text-decoration: none;
+            display: inline-flex;
+            align-items: center;
+            gap: 0.5rem;
+            background: linear-gradient(45deg, #ff6b6b, #feca57);
+            color: white;
+            box-shadow: 0 10px 30px rgba(255,107,107,0.3);
+        }
+        .flask-submit-btn:hover {
+            transform: translateY(-3px);
+            box-shadow: 0 15px 40px rgba(255,107,107,0.4);
+        }
+        .flask-result-box {
+            background: #f8f9fa; /* Light background for results */
+            padding: 1.5rem;
+            border-radius: 10px;
+            border-left: 4px solid #667eea; /* Accent border */
+            margin-top: 2rem;
+            text-align: left; /* Align text to left for readability */
+            box-shadow: 0 5px 15px rgba(0,0,0,0.05);
+        }
+        .flask-result-box h3 {
+            font-size: 1.6rem;
+            color: #667eea;
+            margin-bottom: 1rem;
+            text-align: center; /* Center heading for results */
+        }
+        .flask-result-box p {
+            font-size: 1.1rem;
+            color: #333;
+            line-height: 1.5;
+            word-wrap: break-word; /* Ensure long captions wrap */
+        }
+        .flask-uploaded-image {
+            max-width: 100%;
+            height: auto;
+            border-radius: 10px;
+            margin-top: 1.5rem;
+            margin-bottom: 1.5rem;
+            box-shadow: 0 4px 8px rgba(0,0,0,0.1);
+        }
+        /* Flash Messages */
+        .flash-message {
+            padding: 0.75rem 1.25rem;
+            margin-bottom: 1rem;
+            border: 1px solid transparent;
+            border-radius: 0.375rem;
+            font-weight: 500;
+            text-align: center;
+        }
+        .flash-success {
+            background-color: #d1e7dd; /* Light green */
+            color: #0f5132; /* Dark green */
+            border-color: #badbcc;
+        }
+        .flash-error {
+            background-color: #f8d7da; /* Light red */
+            color: #721c24; /* Dark red */
+            border-color: #f5c2c7;
+        }
+        /* Video Captioning Section (New) */
+        .video-captioning-section {
+            padding: 5rem 0;
+            background: linear-gradient(135deg, #e0f2f7 0%, #a7d9eb 100%); /* Light blue gradient */
+            text-align: center;
+            margin-top: 3rem; /* Space from previous section */
+            border-radius: 20px;
+            box-shadow: 0 20px 60px rgba(0,0,0,0.1);
+        }
+        .video-captioning-section h2 {
+            font-size: 3rem;
+            font-weight: 700;
+            margin-bottom: 1rem;
+            color: #1e3c72; /* Dark blue from your architecture section */
+        }
+        .video-captioning-section p {
+            font-size: 1.2rem;
+            color: #333;
+            margin-bottom: 2rem;
+            max-width: 800px;
+            margin-left: auto;
+            margin-right: auto;
+        }
+        .video-cta-btn {
+            /* Using btn-primary styles for consistency */
+            padding: 1rem 2.5rem;
+            border: none;
+            border-radius: 50px;
+            font-size: 1.1rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: all 0.3s ease;
+            text-decoration: none;
+            display: inline-flex;
+            align-items: center;
+            gap: 0.5rem;
+            background: linear-gradient(45deg, #2196F3, #667eea); /* Blue gradient */
+            color: white;
+            box-shadow: 0 10px 30px rgba(33,150,243,0.3);
+        }
+        .video-cta-btn:hover {
+            transform: translateY(-3px);
+            box-shadow: 0 15px 40px rgba(33,150,243,0.4);
+        }
+    </style>
+    <!-- FontAwesome for icons -->
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
+</head>
+<body>
+    <!-- Navigation -->
+    <nav class="navbar">
+        <div class="nav-container">
+            <div class="logo">VisionCraft AI</div>
+            <ul class="nav-links">
+                <li><a href="#demo">Live Demo</a></li>
+                <li><a href="#features">Features</a></li>
+                <li><a href="#architecture">Architecture</a></li>
+                <li><a href="#metrics">Performance</a></li>
+                <li><a href="#research">Research</a></li>
+            </ul>
+        </div>
+    </nav>
+    <!-- Hero Section -->
+    <section class="hero">
+        <div class="hero-content">
+            <h1>VisionCraft AI</h1>
+            <p>Revolutionary Image Captioning & Segmentation built from scratch using custom neural architectures. No pretrained models, pure innovation.</p>
+            <div class="cta-buttons">
+                <a href="#demo" class="btn btn-primary">
+                    🚀 Try Live Demo
+                </a>
+                <a href="#architecture" class="btn btn-secondary">
+                    🧠 Explore Architecture
+                </a>
+            </div>
+        </div>
+    </section>
+    <!-- Demo Section (Image Captioning & Segmentation) -->
+    <section id="demo" class="demo-section">
+        <div class="container">
+            <h2 class="section-title">Interactive AI Demo</h2>
+            <div class="demo-container">
+                <!-- Flask Image Captioning Form -->
+                <div class="upload-area flask-form-container">
+                    <h3>Upload Your Image for Captioning</h3>
+                    <p>Drag & drop an image or click to browse</p>
+                    <!-- Flash Messages -->
+                    {% with messages = get_flashed_messages(with_categories=true) %}
+                        {% if messages %}
+                            <div class="mb-4">
+                                {% for category, message in messages %}
+                                    <div class="flash-message flash-{{ category }}">
+                                        {{ message }}
+                                    </div>
+                                {% endfor %}
+                            </div>
+                        {% endif %}
+                    {% endwith %}
+                    <form action="/predict" method="post" enctype="multipart/form-data">
+                        <input id="imageInput" name="file" type="file" accept="image/*" required class="flask-file-input">
+                        <p style="font-size: 0.9em; color: #777; margin-top: 0.5rem; margin-bottom: 1rem;">PNG, JPG, JPEG, GIF formats allowed.</p>
+                        <button type="submit" class="flask-submit-btn">
+                            Generate Caption
+                        </button>
+                    </form>
+                </div>
+                <!-- Results Panel -->
+                <div class="results-panel">
+                    <div class="tabs">
+                        <div class="tab active" data-tab="caption">📝 Caption</div>
+                        <div class="tab" data-tab="segment">🎯 Segmentation</div>
+                        <div class="tab" data-tab="analysis">📊 Analysis</div>
+                    </div>
+                    <div class="tab-content active" id="caption">
+                        <h3>Generated Caption</h3>
+                        <div id="captionResult">
+                            {% if caption %}
+                                <div class="flask-result-box">
+                                    <h3>Your Uploaded Image:</h3>
+                                    {% if uploaded_image_url %}
+                                        <img src="{{ uploaded_image_url }}" alt="Uploaded Image" class="flask-uploaded-image">
+                                    {% endif %}
+                                    <h3>Generated Caption:</h3>
+                                    <p>"{{ caption }}"</p>
+                                    <!-- Placeholder for confidence score if you integrate it later from Flask -->
+                                    <!-- <div style="margin-top: 1rem; text-align: center;">Confidence: XX.X%</div> -->
+                                </div>
+                            {% else %}
+                                <p style="color: #666; font-style: italic;">Upload an image to see the AI-generated caption...</p>
+                            {% endif %}
+                        </div>
+                        <div id="confidenceScore" style="margin-top: 1rem;"></div>
+                    </div>
+                    <div class="tab-content" id="segment">
+                        <h3>Segmentation Results (Partner's Work)</h3>
+                        <div id="segmentResult">
+                            <p style="color: #666; font-style: italic;">Segmentation masks will appear here once integrated.</p>
+                            <div style="text-align: center; margin-top: 1rem;">
+                                <img src="https://placehold.co/400x250/cccccc/333333?text=Segmentation+Preview" alt="Segmentation Placeholder" class="flask-uploaded-image">
+                                <p style="margin-top: 0.5rem; font-size: 0.9em; color: #777;">Placeholder image until live segmentation is ready.</p>
+                            </div>
+                        </div>
+                    </div>
+                    <div class="tab-content" id="analysis">
+                        <h3>Technical Analysis</h3>
+                        <div id="analysisResult">
+                            <p style="color: #666; font-style: italic;">Detailed analysis coming soon from backend...</p>
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- NEW: Video Captioning Section -->
+    <section id="video-captioning" class="video-captioning-section">
+        <div class="container">
+            <h2 class="section-title">Real-time Video Captioning</h2>
+            <p>Experience live descriptions of video streams, providing instant understanding of dynamic scenes. Click the button below to explore our dedicated video captioning demo.</p>
+            <a href="#" class="video-cta-btn" onclick="alert('Video captioning feature will be integrated soon!')">
+                Go to Video Captioning 🎥
+            </a>
+        </div>
+    </section>
+    <!-- END NEW: Video Captioning Section -->
+    <!-- Features Section (Existing) -->
+    <section id="features" class="features-section">
+        <div class="container">
+            <h2 class="section-title">Exceptional Features</h2>
+            <div class="features-grid">
+                <div class="feature-card">
+                    <span class="feature-icon">🧠</span>
+                    <h3>Custom Neural Architecture</h3>
+                    <p>Built entirely from scratch without any pretrained models. Custom CNN+LSTM for captioning and U-Net variant for segmentation.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">⚡</span>
+                    <h3>Real-time Processing</h3>
+                    <p>Optimized inference pipeline capable of processing images in under 2 seconds with efficient memory management.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">🎯</span>
+                    <h3>Dual Task Mastery</h3>
+                    <p>Seamlessly integrated captioning and segmentation with shared feature extraction for enhanced performance.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">📊</span>
+                    <h3>Advanced Analytics</h3>
+                    <p>Comprehensive performance metrics, attention visualization, and detailed error analysis capabilities.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">🔧</span>
+                    <h3>Interactive Tools</h3>
+                    <p>Web-based interface with real-time processing, batch operations, and detailed result visualization.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">🚀</span>
+                    <h3>Production Ready</h3>
+                    <p>Dockerized deployment, REST API, comprehensive testing, and scalable cloud infrastructure.</p>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Architecture Section -->
+    <section id="architecture" class="architecture-section">
+        <div class="container">
+            <h2 class="section-title">Custom Architecture</h2>
+            <div class="architecture-viz">
+                <h3>Neural Network Flow</h3>
+                <div class="network-diagram">
+                    <div class="network-node" data-info="Custom CNN feature extractor with attention mechanism">
+                        <h4>Feature Extraction</h4>
+                        <p>Custom CNN</p>
+                    </div>
+                    <div class="arrow">→</div>
+                    <div class="network-node" data-info="Shared feature maps for both tasks">
+                        <h4>Shared Features</h4>
+                        <p>Multi-scale</p>
+                    </div>
+                    <div class="arrow">→</div>
+                    <div class="network-node" data-info="LSTM with attention for caption generation">
+                        <h4>Caption Branch</h4>
+                        <p>LSTM + Attention</p>
+                    </div>
+                </div>
+                <div class="network-diagram">
+                    <div class="network-node" data-info="U-Net inspired architecture for segmentation">
+                        <h4>Segmentation Branch</h4>
+                        <p>Custom U-Net</p>
+                    </div>
+                    <div class="arrow">→</div>
+                    <div class="network-node" data-info="Multi-task loss function optimization">
+                        <h4>Loss Integration</h4>
+                        <p>Multi-task Loss</p>
+                    </div>
+                    <div class="arrow">→</div>
+                    <div class="network-node" data-info="Final output processing and confidence scoring">
+                        <h4>Output Processing</h4>
+                        <p>Post-processing</p>
+                    </div>
+                </div>
+                <div style="text-align: center; margin-top: 2rem;">
+                    <button class="btn btn-primary" onclick="showArchitectureDetails()">
+                        🔍 Explore 3D Architecture
+                    </button>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Metrics Section -->
+    <section id="metrics" class="metrics-section">
+        <div class="container">
+            <h2 class="section-title">Performance Metrics</h2>
+            <div class="metrics-grid">
+                <div class="metric-card">
+                    <div class="metric-value" id="bleuScore">78.4</div>
+                    <div class="metric-label">BLEU-4 Score</div>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="miouScore">82.1</div>
+                    <div class="metric-label">mIoU Score (%)</div>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="inferenceTime">1.8</div>
+                    <div class="metric-label">Inference Time (s)</div>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="modelSize">45.2</div>
+                    <div class="metric-label">Model Size (MB)</div>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="accuracy">85.7</div>
+                    <div class="metric-label">Overall Accuracy (%)</div>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="fps">12</div>
+                    <div class="metric-label">Processing FPS</div>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Research Section -->
+    <section id="research" class="research-section">
+        <div class="container">
+            <h2 class="section-title">Research & Innovation</h2>
+            <div class="research-grid">
+                <div class="research-card">
+                    <h3>📚 Technical Documentation</h3>
+                    <p>Complete research paper with mathematical formulations, architecture details, and experimental results.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">Read Paper</button>
+                </div>
+                <div class="research-card">
+                    <h3>🔬 Ablation Studies</h3>
+                    <p>Comprehensive analysis of different architectural choices and their impact on model performance.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">View Studies</button>
+                </div>
+                <div class="research-card">
+                    <h3>💻 Code Repository</h3>
+                    <p>Open-source implementation with detailed comments, training scripts, and deployment guides.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">GitHub Repo</button>
+                </div>
+                <div class="research-card">
+                    <h3>📊 Training Insights</h3>
+                    <p>Interactive dashboard showing training progress, loss curves, and hyperparameter optimization results.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">Training Dashboard</button>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Footer -->
+    <footer class="footer">
+        <div class="container">
+            <div class="footer-content">
+                <div class="footer-section">
+                    <h3>VisionCraft AI</h3>
+                    <p>Revolutionary computer vision solutions built from scratch.</p>
+                </div>
+                <div class="footer-section">
+                    <h3>Quick Links</h3>
+                    <p><a href="#demo" style="color: #ccc; text-decoration: none;">Live Demo</a></p>
+                    <p><a href="#architecture" style="color: #ccc; text-decoration: none;">Architecture</a></p>
+                    <p><a href="#research" style="color: #ccc; text-decoration: none;">Research</a></p>
+                </div>
+                <div class="footer-section">
+                    <h3>Developer</h3>
+                    <p>Built with passion during internship at Zidio Development</p>
+                    <p>Contact: your.email@example.com</p>
+                </div>
+            </div>
+            <div class="social-links">
+                <a href="#" class="social-link">📧</a>
+                <a href="#" class="social-link">💼</a>
+                <a href="#" class="social-link">🐙</a>
+                <a href="#" class="social-link">🐦</a>
+            </div>
+            <p style="margin-top: 2rem; padding-top: 2rem; border-top: 1px solid #333; color: #ccc;">
+                © 2024 VisionCraft AI. Built with ❤️ for innovation.
+            </p>
+        </div>
+    </footer>
+    <script>
+        // Tab functionality (Existing, Modified to clear for Flask output)
+        document.querySelectorAll('.tab').forEach(tab => {
+            tab.addEventListener('click', function() {
+                // Remove active class from all tabs and contents
+                document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
+                document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
+                // Add active class to clicked tab
+                this.classList.add('active');
+                // Show corresponding content
+                const tabId = this.getAttribute('data-tab');
+                document.getElementById(tabId).classList.add('active');
+                // Clear previous results when switching tabs, except for the Flask-driven caption tab
+                if (tabId !== 'caption') {
+                    // Reset segmentation and analysis tabs if they were showing old data
+                    document.getElementById('segmentResult').innerHTML = '<p style="color: #666; font-style: italic;">Segmentation masks will appear here once integrated.</p><div style="text-align: center; margin-top: 1rem;"><img src="https://placehold.co/400x250/cccccc/333333?text=Segmentation+Preview" alt="Segmentation Placeholder" class="flask-uploaded-image"><p style="margin-top: 0.5rem; font-size: 0.9em; color: #777;">Placeholder image until live segmentation is ready.</p></div>';
+                    document.getElementById('analysisResult').innerHTML = '<p style="color: #666; font-style: italic;">Detailed analysis coming soon from backend...</p>';
+                }
+            });
+        });
+        // The original JavaScript for file upload simulation (processImage, showResults, drag/drop)
+        // is REMOVED as Flask handles the actual file upload and rendering.
+        // The HTML form now directly submits to Flask.
+        // Smooth scrolling for navigation links (Existing)
+        document.querySelectorAll('a[href^="#"]').forEach(anchor => {
+            anchor.addEventListener('click', function (e) {
+                e.preventDefault();
+                const target = document.querySelector(this.getAttribute('href'));
+                if (target) {
+                    target.scrollIntoView({
+                        behavior: 'smooth',
+                        block: 'start'
+                    });
+                }
+            });
+        });
+        // Navbar scroll effect (Existing)
+        window.addEventListener('scroll', function() {
+            const navbar = document.querySelector('.navbar');
+            if (window.scrollY > 100) {
+                navbar.style.background = 'rgba(0,0,0,0.9)';
+            } else {
+                navbar.style.background = 'rgba(255,255,255,0.1)';
+            }
+        });
+        // Animate metrics when in view (Existing)
+        function animateMetrics() {
+            const metrics = document.querySelectorAll('.metric-value');
+            const observer = new IntersectionObserver((entries) => {
+                entries.forEach(entry => {
+                    if (entry.isIntersecting) {
+                        const target = entry.target;
+                        const finalValue = parseFloat(target.textContent);
+                        let currentValue = 0;
+                        const increment = finalValue / 50;
+                        const timer = setInterval(() => {
+                            currentValue += increment;
+                            if (currentValue >= finalValue) {
+                                currentValue = finalValue;
+                                clearInterval(timer);
+                            }
+                            target.textContent = currentValue.toFixed(1);
+                        }, 30);
+                        observer.unobserve(target);
+                    }
+                });
+            });
+            metrics.forEach(metric => observer.observe(metric));
+        }
+        // Architecture details modal (Existing)
+        function showArchitectureDetails() {
+            const modal = document.createElement('div');
+            modal.style.cssText = `
+                position: fixed;
+                top: 0;
+                left: 0;
+                width: 100%;
+                height: 100%;
+                background: rgba(0,0,0,0.8);
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                z-index: 9999;
+                backdrop-filter: blur(10px);
+            `;
+            modal.innerHTML = `
+                <div style="background: white; border-radius: 20px; padding: 3rem; max-width: 800px; max-height: 80vh; overflow-y: auto; position: relative;">
+                    <button onclick="this.closest('.modal').remove()" style="position: absolute; top: 1rem; right: 1rem; border: none; background: none; font-size: 2rem; cursor: pointer; color: #666;">×</button>
+                    <h2 style="color: #667eea; margin-bottom: 2rem; text-align: center;">3D Architecture Visualization</h2>
+                    <div style="text-align: center; margin-bottom: 2rem;">
+                        <div style="width: 300px; height: 200px; background: linear-gradient(45deg, #667eea, #764ba2); border-radius: 15px; margin: 0 auto; display: flex; align-items: center; justify-content: center; color: white; font-size: 1.2rem; transform-style: preserve-3d; animation: rotate3d 4s infinite linear;">
+                            🧠 Neural Network<br>3D Visualization
+                        </div>
+                    </div>
+                    <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 2rem; margin-top: 2rem;">
+                        <div>
+                            <h3 style="color: #667eea;">Caption Branch</h3>
+                            <ul style="padding-left: 1.5rem; color: #666;">
+                                <li>CNN Feature Extractor (5 layers)</li>
+                                <li>Attention Mechanism</li>
+                                <li>LSTM Decoder (2 layers)</li>
+                                <li>Vocabulary: 10,000 words</li>
+                            </ul>
+                        </div>
+                        <div>
+                            <h3 style="color: #667eea;">Segmentation Branch</h3>
+                            <ul style="padding-left: 1.5rem; color: #666;">
+                                <li>U-Net Architecture</li>
+                                <li>Skip Connections</li>
+                                <li>Multi-scale Features</li>
+                                <li>21 Object Classes</li>
+                            </ul>
+                        </div>
+                    </div>
+                    <div style="background: #f8f9fa; padding: 2rem; border-radius: 10px; margin-top: 2rem;">
+                        <h3 style="color: #667eea; margin-bottom: 1rem;">Key Innovations</h3>
+                        <p style="color: #666; line-height: 1.8;">
+                            Our custom architecture implements shared feature extraction with dual-head processing,
+                            attention-based caption generation, and efficient multi-task learning with weighted loss optimization.
+                        </p>
+                    </div>
+                </div>
+            `;
+            modal.className = 'modal';
+            document.body.appendChild(modal);
+            // Add rotation animation
+            const style = document.createElement('style');
+            style.textContent = `
+                @keyframes rotate3d {
+                    0% { transform: rotateY(0deg) rotateX(10deg); }
+                    100% { transform: rotateY(360deg) rotateX(10deg); }
+                }
+            `;
+            document.head.appendChild(style);
+        }
+        // Network node hover effects (Existing)
+        document.querySelectorAll('.network-node').forEach(node => {
+            node.addEventListener('mouseenter', function() {
+                const info = this.getAttribute('data-info');
+                if (info) {
+                    const tooltip = document.createElement('div');
+                    tooltip.style.cssText = `
+                        position: absolute;
+                        background: rgba(0,0,0,0.9);
+                        color: white;
+                        padding: 1rem;
+                        border-radius: 8px;
+                        font-size: 0.9rem;
+                        max-width: 200px;
+                        z-index: 1000;
+                        top: -60px;
+                        left: 50%;
+                        transform: translateX(-50%);
+                        pointer-events: none;
+                    `;
+                    tooltip.textContent = info;
+                    tooltip.className = 'tooltip';
+                    this.style.position = 'relative';
+                    this.appendChild(tooltip);
+                }
+            });
+            node.addEventListener('mouseleave', function() {
+                const tooltip = this.querySelector('.tooltip');
+                if (tooltip) {
+                    tooltip.remove();
+                }
+            });
+        });
+        // Initialize animations (Existing)
+        document.addEventListener('DOMContentLoaded', function() {
+            animateMetrics();
+            // Add pulse animation to CTA buttons
+            document.querySelectorAll('.btn-primary').forEach(btn => {
+                setInterval(() => {
+                    btn.classList.add('pulse');
+                    setTimeout(() => btn.classList.remove('pulse'), 1000);
+                }, 5000);
+            });
+        });
+        // Add particle animation to hero section (Existing)
+        function createParticles() {
+            const hero = document.querySelector('.hero');
+            for (let i = 0; i < 50; i++) {
+                const particle = document.createElement('div');
+                particle.style.cssText = `
+                    position: absolute;
+                    width: 2px;
+                    height: 2px;
+                    background: rgba(255,255,255,0.5);
+                    border-radius: 50%;
+                    left: ${Math.random() * 100}%;
+                    top: ${Math.random() * 100}%;
+                    animation: float ${5 + Math.random() * 10}s infinite linear;
+                    pointer-events: none;
+                `;
+                hero.appendChild(particle);
+            }
+        }
+        // Initialize particle animation (Existing)
+        createParticles();
+        // Add typing effect to hero text (Existing)
+        function typeWriter(element, text, speed = 50) {
+            let i = 0;
+            element.innerHTML = '';
+            function typing() {
+                if (i < text.length) {
+                    element.innerHTML += text.charAt(i);
+                    i++;
+                    setTimeout(typing, speed);
+                }
+            }
+            typing();
+        }
+        // Mobile menu toggle (if needed) (Existing)
+        function toggleMobileMenu() {
+            const navLinks = document.querySelector('.nav-links');
+            navLinks.classList.toggle('active');
+        }
+        // Add mobile styles (already in your original HTML, moved to style tag) (Existing)
+        const mobileStyles = document.createElement('style');
+        mobileStyles.textContent = `
+            @media (max-width: 768px) {
+                .nav-links.active {
+                    display: flex;
+                    flex-direction: column;
+                    position: absolute;
+                    top: 100%;
+                    left: 0;
+                    width: 100%;
+                    background: rgba(0,0,0,0.9);
+                    padding: 2rem;
+                    backdrop-filter: blur(20px);
+                }
+                .nav-links.active a {
+                    padding: 1rem 0;
+                    border-bottom: 1px solid rgba(255,255,255,0.1);
+                }
+            }
+        `;
+        document.head.appendChild(mobileStyles);
+    </script>
+</body>
+</html>

templates/index.html ADDED Viewed

	@@ -0,0 +1,1766 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Perceptra AI: Intelligent Vision. Secure Insights. Real-time Understanding.</title>
+<style>
+    /* Global Reset and Base Styles */
+    * {
+        margin: 0;
+        padding: 0;
+        box-sizing: border-box;
+    }
+    body {
+        font-family: 'Inter', 'Segoe UI', system-ui, -apple-system, sans-serif;
+        line-height: 1.6;
+        color: #E5E7EB;
+        overflow-x: hidden;
+        background: #0B1121;
+        font-weight: 400;
+        letter-spacing: -0.01em;
+    }
+    /* Hero Section */
+    .hero {
+        height: 100vh;
+        background: linear-gradient(135deg, #0B1121 0%, #1E293B 50%, #334155 100%);
+        display: flex;
+        align-items: center;
+        justify-content: center;
+        position: relative;
+        overflow: hidden;
+    }
+    .hero::before {
+        content: '';
+        position: absolute;
+        top: 0;
+        left: 0;
+        right: 0;
+        bottom: 0;
+        background: radial-gradient(circle at 20% 80%, rgba(59, 130, 246, 0.1) 0%, transparent 50%),
+                    radial-gradient(circle at 80% 20%, rgba(99, 102, 241, 0.1) 0%, transparent 50%);
+        pointer-events: none;
+    }
+    .hero-content {
+        text-align: center;
+        z-index: 2;
+        position: relative;
+        max-width: 1000px;
+        padding: 0 2rem;
+    }
+    .hero h1 {
+        font-size: clamp(2.5rem, 5vw, 4.5rem);
+        font-weight: 700;
+        margin-bottom: 1.5rem;
+        color: #FFFFFF;
+        line-height: 1.1;
+        letter-spacing: -0.02em;
+    }
+    .hero p {
+        font-size: clamp(1.1rem, 2vw, 1.3rem);
+        color: #94A3B8;
+        margin-bottom: 3rem;
+        max-width: 700px;
+        margin-left: auto;
+        margin-right: auto;
+        font-weight: 400;
+        line-height: 1.7;
+    }
+    .cta-buttons {
+        display: flex;
+        gap: 1.5rem;
+        justify-content: center;
+        flex-wrap: wrap;
+    }
+    .btn {
+        padding: 1rem 2rem;
+        border: none;
+        border-radius: 8px;
+        font-size: 1rem;
+        font-weight: 600;
+        cursor: pointer;
+        transition: all 0.2s ease;
+        text-decoration: none;
+        display: inline-flex;
+        align-items: center;
+        gap: 0.5rem;
+        font-family: inherit;
+        letter-spacing: -0.01em;
+    }
+    .btn-primary {
+        background: #3B82F6;
+        color: #FFFFFF;
+        box-shadow: 0 4px 14px 0 rgba(59, 130, 246, 0.25);
+    }
+    .btn-primary:hover {
+        background: #2563EB;
+        transform: translateY(-1px);
+        box-shadow: 0 6px 20px 0 rgba(59, 130, 246, 0.35);
+    }
+    .btn-secondary {
+        background: transparent;
+        color: #E5E7EB;
+        border: 1px solid #374151;
+    }
+    .btn-secondary:hover {
+        background: #374151;
+        border-color: #4B5563;
+        transform: translateY(-1px);
+    }
+    /* Navigation */
+    .navbar {
+        position: fixed;
+        top: 0;
+        width: 100%;
+        background: rgba(11, 17, 33, 0.8);
+        backdrop-filter: blur(20px);
+        z-index: 1000;
+        padding: 1rem 0;
+        border-bottom: 1px solid rgba(255, 255, 255, 0.08);
+    }
+    .nav-container {
+        max-width: 1200px;
+        margin: 0 auto;
+        display: flex;
+        justify-content: space-between;
+        align-items: center;
+        padding: 0 2rem;
+    }
+    .logo {
+        font-size: 1.5rem;
+        font-weight: 700;
+        color: #FFFFFF;
+        letter-spacing: -0.02em;
+    }
+    .nav-links {
+        display: flex;
+        list-style: none;
+        gap: 2rem;
+    }
+    .nav-links a {
+        color: #94A3B8;
+        text-decoration: none;
+        font-weight: 500;
+        transition: color 0.2s ease;
+        font-size: 0.95rem;
+    }
+    .nav-links a:hover {
+        color: #FFFFFF;
+    }
+    /* Demo Section */
+    .demo-section, .features-section, .metrics-section, .research-section {
+        padding: 6rem 0;
+        background: #0B1121;
+    }
+    .container {
+        max-width: 1200px;
+        margin: 0 auto;
+        padding: 0 2rem;
+    }
+    .section-title {
+        text-align: center;
+        font-size: clamp(2rem, 4vw, 3rem);
+        font-weight: 700;
+        margin-bottom: 1rem;
+        color: #FFFFFF;
+        letter-spacing: -0.02em;
+    }
+    .section-subtitle {
+        text-align: center;
+        font-size: 1.1rem;
+        color: #94A3B8;
+        margin-bottom: 4rem;
+        max-width: 600px;
+        margin-left: auto;
+        margin-right: auto;
+    }
+    .demo-container {
+        display: grid;
+        grid-template-columns: 1fr 1fr;
+        gap: 3rem;
+        align-items: start;
+    }
+    /* Cards */
+    .upload-area.flask-form-container h3 {
+        color:#feca57;
+    }
+    .upload-area.flask-form-container, .results-panel, .feature-card, .metric-card, .research-card {
+        background: #1E293B;
+        border: 1px solid #334155;
+        border-radius: 12px;
+        padding: 2.5rem;
+        transition: all 0.2s ease;
+        box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+    }
+    /* .upload-area.flask-form-container:hover, .results-panel:hover, .feature-card:hover, .research-card:hover {
+        border-color: #475569;
+        transform: translateY(-2px);
+        box-shadow: 0 10px 25px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
+    } */
+.upload-area.flask-form-container:hover, .results-panel:hover, .feature-card:hover, .research-card:hover {
+    transform: translateY(-7px); /* Consistent lift */
+    border-color: #3B82F6; /* Accent blue border */
+    box-shadow:
+        0 12px 30px -5px rgba(0, 0, 0, 0.2), /* Main lift shadow */
+        0 0 20px rgba(59, 130, 246, 0.4); /* Subtle blue glow */ /* NEW */
+}
+    .flask-form-container h3, .results-panel h3 {
+        font-size: 1.5rem;
+        color: #FFFFFF;
+        margin-bottom: 1rem;
+        font-weight: 600;
+        text-align: center;
+    }
+    .flask-form-container p, .results-panel p {
+        color: #94A3B8;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .flask-file-input {
+        display: block;
+        width: 100%;
+        padding: 0.75rem;
+        border: 1px solid #374151;
+        border-radius: 8px;
+        margin-bottom: 1.5rem;
+        font-size: 1rem;
+        cursor: pointer;
+        background-color: #111827;
+        color: #E5E7EB;
+        transition: border-color 0.2s ease;
+    }
+    .flask-file-input:hover {
+        border-color: #4B5563;
+    }
+    .flask-file-input::file-selector-button {
+        background-color: #3B82F6;
+        color: #FFFFFF;
+        border: none;
+        padding: 0.5rem 1rem;
+        border-radius: 6px;
+        cursor: pointer;
+        margin-right: 1rem;
+        font-weight: 500;
+        transition: background-color 0.2s ease;
+    }
+    .flask-file-input::file-selector-button:hover {
+        background-color: #2563EB;
+    }
+    .flask-submit-btn {
+        width: 100%;
+        padding: 0.875rem 1.5rem;
+        border: none;
+        border-radius: 8px;
+        font-size: 1rem;
+        font-weight: 600;
+        cursor: pointer;
+        transition: all 0.2s ease;
+        background: #3B82F6;
+        color: #FFFFFF;
+        box-shadow: 0 4px 14px 0 rgba(59, 130, 246, 0.25);
+    }
+    .flask-submit-btn:hover {
+        background: #2563EB;
+        transform: translateY(-1px);
+        box-shadow: 0 6px 20px 0 rgba(59, 130, 246, 0.35);
+    }
+    .flask-result-box {
+        background: #111827;
+        padding: 1.5rem;
+        border-radius: 8px;
+        border-left: 4px solid #3B82F6;
+        margin-top: 2rem;
+        text-align: left;
+    }
+    .flask-result-box h3 {
+        font-size: 1.25rem;
+        color: #FFFFFF;
+        margin-bottom: 1rem;
+        text-align: center;
+        font-weight: 600;
+    }
+    .flask-result-box p {
+        font-size: 1rem;
+        color: #D1D5DB;
+        line-height: 1.6;
+        word-wrap: break-word;
+    }
+    .flask-uploaded-image {
+        max-width: 100%;
+        height: auto;
+        border-radius: 8px;
+        margin: 1.5rem 0;
+        box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1);
+    }
+    /* Flash Messages */
+    .flash-message {
+        padding: 1rem 1.5rem;
+        margin-bottom: 1rem;
+        border-radius: 8px;
+        font-weight: 500;
+        text-align: center;
+    }
+    .flash-success {
+        background-color: rgba(34, 197, 94, 0.1);
+        color: #22C55E;
+        border: 1px solid rgba(34, 197, 94, 0.2);
+    }
+    .flash-error {
+        background-color: rgba(239, 68, 68, 0.1);
+        color: #EF4444;
+        border: 1px solid rgba(239, 68, 68, 0.2);
+    }
+    /* Tabs */
+    .tabs {
+        display: flex;
+        border-bottom: 1px solid #334155;
+        margin-bottom: 2rem;
+    }
+    .tab {
+        padding: 1rem 1.5rem;
+        cursor: pointer;
+        font-weight: 500;
+        color: #94A3B8;
+        border-bottom: 2px solid transparent;
+        transition: all 0.2s ease;
+        font-size: 0.95rem;
+    }
+    .tab.active {
+        color: #3B82F6;
+        border-color: #3B82F6;
+    }
+    .tab:hover:not(.active) {
+        color: #FFFFFF;
+    }
+    .tab-content {
+        display: none;
+    }
+    .tab-content.active {
+        display: block;
+        animation: fadeIn 0.3s ease;
+    }
+    @keyframes fadeIn {
+        from { opacity: 0; transform: translateY(10px); }
+        to { opacity: 1; transform: translateY(0); }
+    }
+    /* Video Captioning Section */
+    .video-captioning-section {
+        padding: 6rem 0;
+        background: linear-gradient(135deg, #1E293B 0%, #334155 100%);
+        text-align: center;
+        margin: 4rem 0;
+        border-radius: 16px;
+    }
+    .video-captioning-section h2 {
+        font-size: clamp(2rem, 4vw, 3rem);
+        font-weight: 700;
+        margin-bottom: 1rem;
+        color: #FFFFFF;
+        letter-spacing: -0.02em;
+    }
+    .video-captioning-section p {
+        font-size: 1.1rem;
+        color: #94A3B8;
+        margin-bottom: 2rem;
+        max-width: 600px;
+        margin-left: auto;
+        margin-right: auto;
+        line-height: 1.7;
+    }
+    .video-cta-btn {
+        padding: 1rem 2rem;
+        border: none;
+        border-radius: 8px;
+        font-size: 1rem;
+        font-weight: 600;
+        cursor: pointer;
+        transition: all 0.2s ease;
+        text-decoration: none;
+        display: inline-flex;
+        align-items: center;
+        gap: 0.5rem;
+        background: #3B82F6;
+        color: #FFFFFF;
+        box-shadow: 0 4px 14px 0 rgba(59, 130, 246, 0.25);
+    }
+    .video-cta-btn:hover {
+        background: #2563EB;
+        transform: translateY(-1px);
+        box-shadow: 0 6px 20px 0 rgba(59, 130, 246, 0.35);
+    }
+    /* Features Grid */
+    .features-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
+        gap: 2rem;
+        margin-top: 3rem;
+    }
+    .feature-icon {
+        font-size: 2.5rem;
+        margin-bottom: 1.5rem;
+        display: block;
+        color: #3B82F6;
+        text-align: center;
+    }
+    .feature-card h3 {
+        font-size: 1.25rem;
+        color: #feca57;
+        margin-bottom: 1rem;
+        font-weight: 600;
+        text-align: center;
+    }
+    .feature-card p {
+        color: whitesmoke;
+        text-align: center;
+        line-height: 1.6;
+    }
+    /* Architecture Section */
+    .architecture-section {
+        padding: 6rem 0;
+        background: #111827;
+    }
+    .architecture-viz {
+        background: #1E293B;
+        border: 1px solid #334155;
+        border-radius: 12px;
+        padding: 3rem;
+        margin-top: 3rem;
+    }
+    .network-diagram-group {
+        margin-bottom: 3rem;
+        padding: 2rem;
+        border: 1px solid #374151;
+        border-radius: 12px;
+        background: #111827;
+    }
+    .network-diagram-group h4 {
+        color: #3B82F6;
+        margin-bottom: 2rem;
+        font-size: 1.5rem;
+        text-align: center;
+        font-weight: 600;
+    }
+    .network-diagram {
+        display: flex;
+        justify-content: space-between;
+        align-items: center;
+        margin: 1.5rem 0;
+        flex-wrap: wrap;
+        gap: 1.5rem;
+    }
+    .network-node {
+        background: #1E293B;
+        border: 1px solid #334155;
+        border-radius: 8px;
+        padding: 1.5rem;
+        text-align: center;
+        transition: all 0.2s ease;
+        cursor: pointer;
+        min-width: 150px;
+        flex-grow: 1;
+    }
+    .network-node:hover {
+        background: #334155;
+        border-color: #475569;
+        transform: translateY(-2px);
+    }
+    .network-node h4 {
+        font-size: 1rem;
+        margin-bottom: 0.5rem;
+        color: #feca57;
+        font-weight: 600;
+    }
+    .network-node p {
+        font-size: 0.875rem;
+        color: whitesmoke;
+    }
+    .arrow {
+        font-size: 1.5rem;
+        color: #3B82F6;
+        flex-shrink: 0;
+        margin: 0 0.5rem;
+    }
+    /* Metrics Grid */
+    .metrics-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+        gap: 2rem;
+        margin-top: 3rem;
+    }
+    .metric-card {
+        text-align: center;
+        padding: 2.5rem 2rem;
+    }
+    .metric-value {
+        font-size: 2.5rem;
+        font-weight: 700;
+        color: #3B82F6;
+        margin-bottom: 0.5rem;
+        line-height: 1;
+    }
+    .metric-label {
+        color: #feca57;
+        font-weight: 500;
+        font-size: 0.95rem;
+    }
+    /* Research Grid */
+    .research-grid {
+        display: grid;
+        grid-template-columns: repeat(auto-fit, minmax(320px, 1fr));
+        gap: 2rem;
+        margin-top: 3rem;
+    }
+    .research-card h3 {
+        color: #feca57;
+        font-size: 1.25rem;
+        margin-bottom: 1rem;
+        font-weight: 700;
+    }
+    .research-card p {
+        color: whitesmoke;
+        margin-bottom: 1.5rem;
+        line-height: 1.6;
+    }
+    .research-card .btn {
+        background: #3B82F6;
+        color: #FFFFFF;
+        box-shadow: 0 4px 14px 0 rgba(59, 130, 246, 0.25);
+    }
+    .research-card .btn:hover {
+        background: #2563EB;
+        box-shadow: 0 6px 20px 0 rgba(59, 130, 246, 0.35);
+    }
+        /* Footer */
+        .footer {
+            background: #111827;
+            color: white;
+            padding: 3rem 0;
+            text-align: center;
+        }
+        .footer-content {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
+            gap: 2rem;
+            margin-bottom: 2rem;
+        }
+        .footer-section h3 {
+            margin-bottom: 1rem;
+            color: #feca57;
+            font-weight: 700;
+            font-size: 1.3rem;
+        }
+        /* Footer links */
+        .footer-section p a {
+            color: white; /* Muted grey for text, consistent with other footer text */
+            text-decoration: none;
+            transition: color 0.3s ease; /* Smooth transition for hover effect */
+        }
+        .footer-section p a:hover {
+            color: #E74C3C; /* Professional red on hover */
+        }
+        .social-links {
+            display: flex;
+            flex-direction: column; /* Stacks the h3 and the new div vertically */
+            align-items: center;
+            gap: 1rem;
+            margin-top: 2rem;
+        }
+        .social-links h3 {
+            color: #feca57;
+            font-weight: 700;
+            font-size: 1.3rem;
+        }
+        .social-link {
+            display: inline-flex;
+            align-items: center;
+            justify-content: center;
+            width: 50px;
+            height: 50px;
+            background: #3B82F6;
+            border-radius: 50%;
+            color: white;
+            text-decoration: none;
+            font-size: 1.5rem;
+            transition: transform 0.3s ease;
+        }
+        .social-icons-row {
+            display: flex;         /* Makes the icons themselves display in a row */
+            justify-content: center; /* Centers the icons within their new div */
+            gap: 1rem;             /* Space between the icons */
+        }
+        .social-link:hover {
+            transform: scale(1.1) rotate(360deg);
+        }
+        /* Responsive Design */
+        @media (max-width: 768px) {
+            .hero h1 {
+                font-size: 2.5rem;
+            }
+            .demo-container {
+                grid-template-columns: 1fr;
+            }
+            .nav-links {
+                display: none;
+            }
+            .features-grid {
+                grid-template-columns: 1fr;
+            }
+            .network-diagram {
+                flex-direction: column;
+            }
+            .arrow {
+                transform: rotate(90deg);
+            }
+        }
+        /* Animations */
+        @keyframes pulse {
+            0%, 100% { transform: scale(1); }
+            50% { transform: scale(1.05); }
+        }
+        .pulse {
+            animation: pulse 2s infinite;
+        }
+        /* Processing Animation */
+        .processing {
+            display: inline-block;
+            width: 40px;
+            height: 40px;
+            border: 3px solid #374151;
+            border-top: 3px solid #3B82F6;
+            border-radius: 50%;
+            animation: spin 1s linear infinite;
+        }
+        @keyframes spin {
+            0% { transform: rotate(0deg); }
+            100% { transform: rotate(360deg); }
+        }
+        /* Progress Bar */
+        .progress-bar {
+            width: 100%;
+            height: 8px;
+            background: #f0f0f0;
+            border-radius: 4px;
+            overflow: hidden;
+            margin: 1rem 0;
+        }
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(45deg, #667eea, #764ba2);
+            width: 0%;
+            transition: width 0.3s ease;
+            border-radius: 4px;
+        }
+    /* Modal and 3D content */
+    .modal-content-3d {
+        width: 100%;
+        height: 500px;
+        background-color: #111827;
+        border-radius: 8px;
+        overflow: hidden;
+    }
+    .modal-content-3d canvas {
+        display: block;
+        width: 100%;
+        height: 100%;
+    }
+    /* Segmentation results */
+    .segmentation-results-display {
+        background: #111827;
+        padding: 1.5rem;
+        border-radius: 8px;
+        border-left: 4px solid #3B82F6;
+        margin-top: 2rem;
+        text-align: left;
+    }
+    .segmentation-results-display h4 {
+        font-size: 1.25rem;
+        color: #FFFFFF;
+        margin-bottom: 1rem;
+        text-align: center;
+        font-weight: 600;
+    }
+    .segmentation-results-display ul {
+        list-style-type: none;
+        padding: 0;
+        margin-top: 1rem;
+    }
+    .segmentation-results-display li {
+        background-color: rgba(59, 130, 246, 0.1);
+        margin-bottom: 0.5rem;
+        padding: 0.75rem 1rem;
+        border-radius: 6px;
+        font-size: 0.95rem;
+        color: #E5E7EB;
+        display: flex;
+        justify-content: space-between;
+        align-items: center;
+        border: 1px solid rgba(59, 130, 246, 0.2);
+    }
+    .segmentation-results-display li span {
+        font-weight: 600;
+        color: #FFFFFF;
+    }
+    /* Utility classes */
+    .text-center { text-align: center; }
+    .text-left { text-align: left; }
+    .text-right { text-align: right; }
+    .mb-1 { margin-bottom: 0.25rem; }
+    .mb-2 { margin-bottom: 0.5rem; }
+    .mb-3 { margin-bottom: 0.75rem; }
+    .mb-4 { margin-bottom: 1rem; }
+    .mt-1 { margin-top: 0.25rem; }
+    .mt-2 { margin-top: 0.5rem; }
+    .mt-3 { margin-top: 0.75rem; }
+    .mt-4 { margin-top: 1rem; }
+</style>
+    <!-- FontAwesome for icons -->
+    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
+    <!-- Three.js Library -->
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
+    <!-- OrbitControls for camera interaction -->
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/controls/OrbitControls.min.js"></script>
+</head>
+<body>
+    <!-- Navigation -->
+    <nav class="navbar">
+        <div class="nav-container">
+            <div class="logo">Perceptra AI</div>
+            <ul class="nav-links">
+                <li><a href="#demo">Live Demo</a></li>
+                <li><a href="#features">Features</a></li>
+                <li><a href="#architecture">Architecture</a></li>
+                <li><a href="#metrics">Performance</a></li>
+                <li><a href="#research">Research</a></li>
+                <li><a href="/logout">Logout</a></li> {# Added Logout link #}
+            </ul>
+        </div>
+    </nav>
+    <!-- Hero Section -->
+    <section class="hero">
+        <div class="hero-content">
+            <h1>Perceptra AI: Unlocking Visual Intelligence</h1>
+            <p >Transforming static images and live video streams into actionable insights.
+                Experience custom-built AI for intelligent captioning, precise segmentation, and real-time visual understanding,
+                all powered by advanced, secure technology.</p>
+            <div class="cta-buttons">
+                <a href="#demo" class="btn btn-primary">
+                    🚀 Try Live Demo
+                </a>
+                <a href="#architecture" class="btn btn-secondary">
+                    🧠 Explore Architecture
+                </a>
+            </div>
+        </div>
+    </section>
+    <!-- Demo Section (Image Captioning & Segmentation) -->
+    <section id="demo" class="demo-section">
+        <div class="container">
+            <h2 class="section-title">Visual Intelligence Studio</h2>
+            <div class="demo-container">
+                <!-- Flask Image Captioning Form -->
+                <div class="upload-area flask-form-container">
+                    <h3>Upload Your Image for Analysis</h3>
+                    <p>Drag & drop an image or click to browse</p>
+                    <!-- Flash Messages -->
+                    {% with messages = get_flashed_messages(with_categories=true) %}
+                        {% if messages %}
+                            <div class="mb-4">
+                                {% for category, message in messages %}
+                                    <div class="flash-message flash-{{ category }}">
+                                        {{ message }}
+                                    </div>
+                                {% endfor %}
+                            </div>
+                        {% endif %}
+                    {% endwith %}
+                    <form action="/predict" method="post" enctype="multipart/form-data">
+                        <input id="imageInput" name="file" type="file" accept="image/*" required class="flask-file-input">
+                        <p style="font-size: 0.9em; color: #777; margin-top: 0.5rem; margin-bottom: 1rem;">PNG, JPG, JPEG, GIF formats allowed.</p>
+                        <button type="submit" class="flask-submit-btn">
+                            Analyze Image
+                        </button>
+                    </form>
+                </div>
+                <!-- Results Panel -->
+                <div class="results-panel">
+                    <div class="tabs">
+                        <div class="tab {% if not segmentation_image_url %}active{% endif %}" data-tab="caption">📝 Caption</div>
+                        <div class="tab {% if segmentation_image_url %}active{% endif %}" data-tab="segment">🎯 Segmentation</div>
+                    </div>
+                    <div class="tab-content {% if not segmentation_image_url %}active{% endif %}" id="caption">
+                        <h3>Generated Caption</h3>
+                        <div id="captionResult">
+                            {% if caption %}
+                                <div class="flask-result-box">
+                                    <h3>Your Uploaded Image:</h3>
+                                    {% if uploaded_image_url %}
+                                        <img src="{{ uploaded_image_url }}" alt="Uploaded Image" class="flask-uploaded-image">
+                                    {% endif %}
+                                    <h3>Generated Caption:</h3>
+                                    <p>"{{ caption }}"</p>
+                                </div>
+                            {% else %}
+                                <p style="color: #666; font-style: italic;">Upload an image to see the AI-generated caption...</p>
+                            {% endif %}
+                        </div>
+                    </div>
+                    <div class="tab-content {% if segmentation_image_url %}active{% endif %}" id="segment">
+                        <h3>Segmentation Results</h3>
+                        <div id="segmentResult">
+                            {% if segmentation_image_url %}
+                                <div class="segmentation-results-display">
+                                    <h4>Segmented Image:</h4>
+                                    <img src="{{ segmentation_image_url }}" alt="Segmented Image" class="flask-uploaded-image">
+                                    {% if segmentation_metrics.num_objects is defined %}
+                                        <h4>Detected Objects ({{ segmentation_metrics.num_objects }}):</h4>
+                                        <ul>
+                                            {% for obj in segmentation_metrics.detected_objects %}
+                                                <li><span>{{ obj }}</span></li>
+                                            {% endfor %}
+                                            {% if segmentation_metrics.error %}
+                                                <li style="color: red;">Error: {{ segmentation_metrics.error }}</li>
+                                            {% endif %}
+                                        </ul>
+                                    {% elif segmentation_metrics.status %}
+                                        <p style="color: #666;">{{ segmentation_metrics.status }}</p>
+                                    {% else %}
+                                         <p style="color: #666; font-style: italic;">No segmentation results available. Upload an image to analyze.</p>
+                                    {% endif %}
+                                </div>
+                            {% else %}
+                                <p style="color: #666; font-style: italic;">Segmentation masks will appear here after image analysis.</p>
+                                <div style="text-align: center; margin-top: 1rem;">
+                                    <img src="https://placehold.co/400x250/cccccc/333333?text=Segmentation+Preview" alt="Segmentation Placeholder" class="flask-uploaded-image">
+                                    <p style="margin-top: 0.5rem; font-size: 0.9em; color: #777;">Placeholder image until live segmentation is ready.</p>
+                                </div>
+                            {% endif %}
+                        </div>
+                    </div>
+                </div>
+            </div>
+        </div>
+    </section>
+<!-- Dedicated LiveSense AI: Real-time Video Captioning Section -->
+    <section id="video-captioning" class="video-captioning-section">
+        <div class="container">
+            <h2 class="section-title">LiveSense AI: Real-time Video Understanding</h2>
+            <p>Step into the future of dynamic vision. Our dedicated LiveSense AI platform offers instant, intelligent descriptions of live video feeds, transforming real-world events into actionable insights.</p>
+            <a href="[Your LiveSense AI App URL Here]" target="_blank" class="video-cta-btn">
+                Launch LiveSense AI Application 🚀
+            </a>
+        </div>
+    </section>
+    <!-- END Dedicated LiveSense AI: Real-time Video Captioning Section -->
+<!-- Features Section -->
+    <section id="features" class="features-section">
+        <div class="container">
+            <h2 class="section-title">Core Capabilities & Innovation</h2>
+            <div class="features-grid">
+                <div class="feature-card">
+                    <span class="feature-icon">👁️</span>
+                    <h3>Intelligent Image Captioning</h3>
+                    <p>Our custom-built deep learning model accurately describes the content of static images, transforming visual data into rich, human-like narratives.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">🎯</span>
+                   <h3>Precision Image Segmentation</h3>
+                    <p>Leveraging advanced techniques, we precisely identify and segment objects within images, providing detailed insights into scene composition and object boundaries.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">⚡</span>
+                    <h3>Real-time Dynamic Vision</h3>
+                    <p>Experience instantaneous understanding of live video streams. Our optimized AI processes webcam feeds in real-time, providing continuous, intelligent descriptions and tracking of evolving scenes as they happen.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">🔐</span>
+                    <h3>Robust Biometric Security</h3>
+                    <p>Safeguard access to sensitive AI capabilities with our multi-layered authentication. Featuring secure facial recognition and traditional email/password login, we ensure unparalleled user protection and data integrity.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">🧠</span>
+                    <h3>Proprietary Deep Learning Engine</h3>
+                    <p>Driven by custom-engineered neural architectures, including bespoke CNN-LSTM for captioning and advanced segmentation networks. Developed entirely from scratch for optimized performance and unique insights.</p>
+                </div>
+                <div class="feature-card">
+                    <span class="feature-icon">📊</span>
+                    <h3>Performance & Operational Intelligence</h3>
+                    <p>Designed for high-throughput and low-latency operations, our system features adaptive processing, intelligent caching, and comprehensive performance analytics, ensuring scalable and reliable AI service delivery.</p>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Architecture Section -->
+    <section id="architecture" class="architecture-section">
+        <div class="container">
+            <h2 class="section-title">Perceptra AI: Integrated Vision & Security Architecture</h2>
+            <div class="architecture-viz">
+                <h3>Core AI & System Components Overview</h3>
+                <div class="network-diagram-group">
+                    <h4>1. Static Image Analysis Pipeline</h4>
+                    <div class="network-diagram">
+                        <div class="network-node" data-info="The entry point for static images uploaded by users.">
+                            <h4>Image Input</h4>
+                            <p>Files/URLs</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="Our custom-built model: ResNet50 Encoder extracts features, fed into an LSTM Decoder with Attention for generating descriptive captions.">
+                            <h4>Image Captioning Module</h4>
+                            <p>ResNet50-LSTM-Attention</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="Integration of the powerful YOLOv8x-seg model for accurate object detection and precise instance segmentation.">
+                            <h4>Image Segmentation Module</h4>
+                            <p>YOLOv8x-seg</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="Structured JSON outputs containing generated captions, identified objects, and segmentation masks.">
+                            <h4>Analyzed Output</h4>
+                            <p>Captions & Masks</p>
+                        </div>
+                    </div>
+                </div>
+                <div class="network-diagram-group" style="margin-top: 3rem;">
+                    <h4>2. Real-time Video Intelligence (LiveSense AI)</h4>
+                    <div class="network-diagram">
+                        <div class="network-node" data-info="Captures live video streams directly from the user's webcam for instant processing.">
+                            <h4>Webcam Input</h4>
+                            <p>Live Stream</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="Utilizes the BLIP model for real-time video understanding, enhanced by adaptive frame sampling, batch processing, and intelligent caching.">
+                            <h4>Dynamic Vision Core</h4>
+                            <p>BLIP & Optimizations</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="Provides continuous, contextually rich descriptions of evolving scenes, displayed instantly in the UI.">
+                            <h4>Live Caption Stream</h4>
+                            <p>Real-time Output</p>
+                        </div>
+                    </div>
+                </div>
+                <div class="network-diagram-group" style="margin-top: 3rem;">
+                    <h4>3. Secure Identity & Application Layer</h4>
+                    <div class="network-diagram">
+                        <div class="network-node" data-info="Supports user authentication via email/password and advanced facial recognition, capturing biometric data securely.">
+                            <h4>User Inputs</h4>
+                            <p>Biometrics & Passwords</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="The core Flask backend orchestrates all AI services, handles API requests, and manages data flow.">
+                            <h4>Backend Orchestration</h4>
+                            <p>Flask API & Logic</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="Securely stores user credentials, face encodings, and manages authentication states using SQLAlchemy.">
+                            <h4>User Database</h4>
+                            <p>SQLite/SQLAlchemy</p>
+                        </div>
+                        <div class="arrow">→</div>
+                        <div class="network-node" data-info="The user-facing web interface built with HTML, CSS, and JavaScript, providing interactive demos and real-time displays.">
+                            <h4>Frontend Interface</h4>
+                            <p>UI/UX</p>
+                        </div>
+                    </div>
+                </div>
+                <div style="text-align: center; margin-top: 4rem;">
+                    <!-- <button class="btn btn-primary" onclick="showArchitectureDetails()">
+                        🔍 Explore Conceptual 3D Model
+                    </button> -->
+                    <p style="color: rgba(255,255,255,0.7); font-size: 0.9em; margin-top: 1rem;">
+                        Hover over nodes for details. The 3D model provides a conceptual visualization of a core AI pipeline within our system.
+                    </p>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Metrics Section -->
+  <!-- Metrics Section -->
+    <section id="metrics" class="metrics-section">
+        <div class="container">
+            <h2 class="section-title">Performance Metrics</h2>
+            <div class="metrics-grid">
+                <div class="metric-card">
+                    <div class="metric-value" id="bleuScore">10.49%</div>
+                    <div class="metric-label">BLEU-4 Score</div>
+                    <p class="metric-subtext">For custom, scratch-built model</p>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="ciderScore">1.03</div>
+                    <div class="metric-label">CIDEr Score</div>
+                    <p class="metric-subtext">Measures agreement with human captions</p>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="meteorScore">31.58%</div>
+                    <div class="metric-label">METEOR Score</div>
+                    <p class="metric-subtext">Balances precision and recall of unigrams</p>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="inferenceLatency">27.7 ms</div>
+                    <div class="metric-label">Avg. Inference Latency</div>
+                    <p class="metric-subtext">Time to process one image</p>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="processingFps">36.1 FPS</div>
+                    <div class="metric-label">Processing Throughput</div>
+                    <p class="metric-subtext">Frames processed per second for live image</p>
+                </div>
+                <div class="metric-card">
+                    <div class="metric-value" id="perplexity" style="font-size: 2rem;">12.43</div>
+                    <div class="metric-label">Perplexity</div>
+                    <p class="metric-subtext">Lower indicates better language model prediction</p>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Research Section -->
+    <section id="research" class="research-section">
+        <div class="container">
+            <h2 class="section-title">Research & Innovation</h2>
+            <div class="research-grid">
+                <div class="research-card">
+                    <h3>📚 Technical Documentation</h3>
+                    <p>Complete research paper with mathematical formulations, architecture details, and experimental results.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">Read Paper</button>
+                </div>
+                <!-- <div class="research-card">
+                    <h3>🔬 Ablation Studies</h3>
+                    <p>Comprehensive analysis of different architectural choices and their impact on model performance.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">View Studies</button>
+                </div> -->
+                <div class="research-card">
+                    <h3>💻 Code Repository</h3>
+                    <p>Open-source implementation with detailed comments, training scripts, and deployment guides.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">GitHub Repo</button>
+                </div>
+                <div class="research-card">
+                    <h3>📊 Training Insights</h3>
+                    <p>Interactive dashboard showing training progress, loss curves, and hyperparameter optimization results.</p>
+                    <button class="btn btn-primary" style="margin-top: 1rem;">Training Dashboard</button>
+                </div>
+            </div>
+        </div>
+    </section>
+<!-- Footer -->
+    <footer class="footer">
+        <div class="container">
+            <div class="footer-content">
+                <div class="footer-section">
+                    <h3>Perceptra AI</h3> {# Updated Brand Name #}
+                    <p style="color: #ccc; margin-top: 0.5rem; font-weight: 500;"><b>Intelligent Vision. Secure Insights. Real-time Understanding.</b></p> {# Added Tagline with styling #}
+                </div>
+                <div class="footer-section">
+                    <h3>Quick Links</h3>
+                    <p><a href="#demo"><b>Live Demo</b></a></p>
+                    <p><a href="#architecture"><b>Architecture</b></a></p>
+                    <p><a href="#research"><b>Research</b></a></p>
+                    <p><a href="/main_app"><b>App Home</b></a></p> {# Added link to main app #}
+                </div>
+                <div class="footer-section">
+                    <h3>Developer</h3>
+                    <p><b>Varsh Dewangan</b></p> {# Your Name #}
+                    <p><b>Data Scientist</b></p> {# Specific role and context #}
+                </div>
+            </div>
+            <div class="social-links">
+                <h3>Connect with me!</h3>
+                <div class="social-icons-row"> {# This new div will hold your icons in a row #}
+                    <a href="mailto:varshadewangan1605@gmail.com" class="social-link">📧</a> {# Link to your email #}
+                    <a href="https://www.linkedin.com/in/varsha-dewangan-197983256/" target="_blank" class="social-link">💼</a> {# Link to your LinkedIn #}
+                    <a href="https://github.com/Varsha-1605" target="_blank" class="social-link">🐙</a> {# Link to your GitHub #}
+                    <a href="https://www.instagram.com/varshadewangan454/" target="_blank" class="social-link">📸</a> {# Replaced Twitter with Instagram #}
+                </div>
+            </div>
+            <p style="margin-top: 2rem; padding-top: 2rem; border-top: 1px solid #333; color: #ccc;">
+                <b>© 2024 Perceptra AI. All rights reserved.</b>
+            </p>
+        </div>
+    </footer>
+    <script>
+        // Tab functionality (Existing, Modified to clear for Flask output)
+        document.querySelectorAll('.tab').forEach(tab => {
+            tab.addEventListener('click', function() {
+                // Remove active class from all tabs and contents
+                document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
+                document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
+                // Add active class to clicked tab
+                this.classList.add('active');
+                // Show corresponding content
+                const tabId = this.getAttribute('data-tab');
+                document.getElementById(tabId).classList.add('active');
+                // Clear previous results when switching tabs, except for the Flask-driven caption or segment tab
+                // The Flask-driven content for caption and segment should persist until a new upload
+                if (tabId !== 'caption' && tabId !== 'segment') {
+                    // Only clear the analysis tab
+                    document.getElementById('analysisResult').innerHTML = '<p style="color: #666; font-style: italic;">Detailed analysis coming soon from backend...</p>';
+                }
+            });
+        });
+        // Auto-select the segmentation tab if segmentation_image_url is present after a Flask render
+        document.addEventListener('DOMContentLoaded', function() {
+            // Use a slightly different check for Flask template variables which might be 'None' string
+            const segmentationImageUrl = "{{ segmentation_image_url }}";
+            const captionContentExists = document.getElementById('captionResult').innerText.trim() !== 'Upload an image to see the AI-generated caption...' && document.getElementById('captionResult').innerText.trim() !== '';
+            if (segmentationImageUrl && segmentationImageUrl !== 'None') {
+                // Deactivate current active tab
+                document.querySelector('.tab.active')?.classList.remove('active');
+                document.querySelector('.tab-content.active')?.classList.remove('active');
+                // Activate segmentation tab and content
+                document.querySelector('.tab[data-tab="segment"]').classList.add('active');
+                document.getElementById('segment').classList.add('active');
+            } else if (captionContentExists) {
+                // If no segmentation but caption exists, activate caption tab
+                document.querySelector('.tab.active')?.classList.remove('active');
+                document.querySelector('.tab-content.active')?.classList.remove('active');
+                document.querySelector('.tab[data-tab="caption"]').classList.add('active');
+                document.getElementById('caption').classList.add('active');
+            }
+            // If neither has content, the default set in HTML (caption tab active) will apply
+        });
+        // The original JavaScript for file upload simulation (processImage, showResults, drag/drop)
+        // is REMOVED as Flask handles the actual file upload and rendering.
+        // The HTML form now directly submits to Flask.
+        // Smooth scrolling for navigation links (Existing)
+        document.querySelectorAll('a[href^="#"]').forEach(anchor => {
+            anchor.addEventListener('click', function (e) {
+                e.preventDefault();
+                const target = document.querySelector(this.getAttribute('href'));
+                if (target) {
+                    target.scrollIntoView({
+                        behavior: 'smooth',
+                        block: 'start'
+                    });
+                }
+            });
+        });
+        // Navbar scroll effect (Existing)
+        window.addEventListener('scroll', function() {
+            const navbar = document.querySelector('.navbar');
+            if (window.scrollY > 100) {
+                navbar.style.background = 'rgba(0,0,0,0.9)';
+            } else {
+                navbar.style.background = 'rgba(255,255,255,0.1)';
+            }
+        });
+        // Animate metrics when in view (Existing)
+        function animateMetrics() {
+            const metrics = document.querySelectorAll('.metric-value');
+            const observer = new IntersectionObserver((entries) => {
+                entries.forEach(entry => {
+                    if (entry.isIntersecting) {
+                        const target = entry.target;
+                        const finalValue = parseFloat(target.textContent);
+                        let currentValue = 0;
+                        const increment = finalValue / 50;
+                        const timer = setInterval(() => {
+                            currentValue += increment;
+                            if (currentValue >= finalValue) {
+                                currentValue = finalValue;
+                                clearInterval(timer);
+                            }
+                            target.textContent = currentValue.toFixed(1);
+                        }, 30);
+                        observer.unobserve(target);
+                    }
+                });
+            });
+            metrics.forEach(metric => observer.observe(metric));
+        }
+        // Architecture details modal (UPDATED for 3D)
+        let scene, camera, renderer, controls, animationFrameId;
+        function showArchitectureDetails() {
+            // Prevent multiple modals if clicked rapidly
+            if (document.querySelector('.modal')) {
+                return;
+            }
+            const modal = document.createElement('div');
+            modal.className = 'modal'; // Add a class for styling
+            modal.style.cssText = `
+                position: fixed;
+                top: 0;
+                left: 0;
+                width: 100%;
+                height: 100%;
+                background: rgba(0,0,0,0.8);
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                z-index: 9999;
+                backdrop-filter: blur(10px);
+                transition: opacity 0.3s ease;
+                opacity: 0; /* Start hidden for fade-in */
+            `;
+            modal.innerHTML = `
+                <div style="background: white; border-radius: 20px; padding: 2rem; max-width: 90%; max-height: 90vh; overflow: hidden; position: relative; display: flex; flex-direction: column;">
+                    <button id="closeModalBtn" style="position: absolute; top: 1rem; right: 1rem; border: none; background: none; font-size: 2rem; cursor: pointer; color: #666;">×</button>
+                    <h2 style="color: #667eea; margin-bottom: 1.5rem; text-align: center; font-size: 2rem;">3D Architecture Visualization</h2>
+                    <div id="architectureCanvasContainer" class="modal-content-3d" style="flex-grow: 1; min-height: 300px; display: flex; justify-content: center; align-items: center;">
+                        <canvas id="architectureCanvas" style="display: block;"></canvas>
+                        <p id="loadingText" style="color: #ccc; text-align: center; position: absolute;">Loading 3D model...</p>
+                    </div>
+                    <div style="display: flex; flex-wrap: wrap; justify-content: space-around; gap: 1rem; margin-top: 1.5rem; color: #666;">
+                        <div style="flex: 1; min-width: 250px;">
+                            <h3 style="color: #667eea; margin-bottom: 0.5rem;">Caption Branch</h3>
+                            <ul style="padding-left: 1.5rem; list-style-type: disc;">
+                                <li>CNN Feature Extractor</li>
+                                <li>Attention Mechanism</li>
+                                <li>LSTM Decoder</li>
+                            </ul>
+                        </div>
+                        <div style="flex: 1; min-width: 250px;">
+                            <h3 style="color: #667eea; margin-bottom: 0.5rem;">Segmentation Branch</h3>
+                            <ul style="padding-left: 1.5rem; list-style-type: disc;">
+                                <li>U-Net Architecture</li>
+                                <li>Skip Connections</li>
+                                <li>Multi-scale Features</li>
+                            </ul>
+                        </div>
+                    </div>
+                    <p style="text-align: center; margin-top: 1rem; font-size: 0.9em; color: #555;">
+                        Click and drag to rotate the 3D model. Scroll to zoom.
+                    </p>
+                </div>
+            `;
+            document.body.appendChild(modal);
+            // Fade in modal
+            setTimeout(() => modal.style.opacity = '1', 10);
+            // Setup 3D scene after modal is in DOM
+            const canvasContainer = document.getElementById('architectureCanvasContainer');
+            const canvas = document.getElementById('architectureCanvas');
+            const loadingText = document.getElementById('loadingText');
+            // 1. Scene
+            scene = new THREE.Scene();
+            scene.background = new THREE.Color(0x1a1a1a); // Dark background for the 3D space
+            // 2. Camera
+            camera = new THREE.PerspectiveCamera(75, 1, 0.1, 1000); // Set initial aspect to 1, will be updated
+            camera.position.set(0, 0, 10); // Adjust camera position for better view
+            // 3. Renderer
+            renderer = new THREE.WebGLRenderer({ canvas: canvas, antialias: true });
+            renderer.setPixelRatio(window.devicePixelRatio);
+            // setCanvasSize(); // Initial size setup - call it after objects are added for more reliable dimensions
+            // 4. Lights
+            const ambientLight = new THREE.AmbientLight(0x404040); // soft white light
+            scene.add(ambientLight);
+            const directionalLight1 = new THREE.DirectionalLight(0xffffff, 0.7);
+            directionalLight1.position.set(5, 5, 5).normalize();
+            scene.add(directionalLight1);
+            const directionalLight2 = new THREE.DirectionalLight(0xffffff, 0.5);
+            directionalLight2.position.set(-5, -5, -5).normalize();
+            scene.add(directionalLight2);
+            // 5. Controls
+            controls = new OrbitControls(camera, renderer.domElement);
+            controls.enableDamping = true; // an animation loop is required when damping is enabled
+            controls.dampingFactor = 0.05;
+            controls.screenSpacePanning = false;
+            controls.minDistance = 5;
+            controls.maxDistance = 20;
+            // --- Create Model Architecture Elements ---
+            // Colors
+            const encoderColor = 0x667eea; // Blue
+            const attentionColor = 0xfeca57; // Yellow
+            const decoderColor = 0x764ba2; // Purple
+            const segmentationColor = 0x4ecdc4; // Teal
+            const arrowColor = 0xcccccc; // Light grey
+            const boxGeometry = new THREE.BoxGeometry(2, 2, 2);
+            const cylinderGeometry = new THREE.CylinderGeometry(0.5, 0.5, 2, 32);
+            const coneGeometry = new THREE.ConeGeometry(0.7, 1.5, 32);
+            // Encoder (CNN)
+            const encoderMaterial = new THREE.MeshPhongMaterial({ color: encoderColor });
+            const encoder = new THREE.Mesh(boxGeometry, encoderMaterial);
+            encoder.position.set(-4, 0, 0);
+            scene.add(encoder);
+            // Attention Mechanism
+            const attentionMaterial = new THREE.MeshPhongMaterial({ color: attentionColor, emissive: attentionColor, emissiveIntensity: 0.3 });
+            const attention = new THREE.Mesh(coneGeometry, attentionMaterial);
+            attention.rotation.z = -Math.PI / 2; // Point towards decoder
+            attention.position.set(-1.5, 0, 0); // Between encoder and decoder
+            scene.add(attention);
+            // Decoder (LSTM)
+            const decoderMaterial = new THREE.MeshPhongMaterial({ color: decoderColor });
+            const decoder = new THREE.Mesh(boxGeometry, decoderMaterial);
+            decoder.position.set(1.5, 1, 0); // Position slightly up for caption branch
+            scene.add(decoder);
+            // Segmentation Branch (U-Net inspired - another block)
+            const segmentation = new THREE.Mesh(boxGeometry, new THREE.MeshPhongMaterial({ color: segmentationColor }));
+            segmentation.position.set(1.5, -1, 0); // Position slightly down for segmentation branch
+            scene.add(segmentation);
+            // Arrows (data flow)
+            const arrowMaterial = new THREE.MeshBasicMaterial({ color: arrowColor });
+            const arrow1 = new THREE.Mesh(new THREE.BoxGeometry(2.5, 0.2, 0.2), arrowMaterial); // Encoder to Attention
+            arrow1.position.set(-2.75, 0, 0);
+            scene.add(arrow1);
+            const arrow2 = new THREE.Mesh(new THREE.BoxGeometry(2.5, 0.2, 0.2), arrowMaterial); // Attention to Decoder
+            arrow2.position.set(0, 1, 0);
+            arrow2.rotation.z = Math.PI / 4; // Angle it towards decoder
+            scene.add(arrow2);
+            const arrow3 = new THREE.Mesh(new THREE.BoxGeometry(2.5, 0.2, 0.2), arrowMaterial); // Shared Feature to Segmentation
+            arrow3.position.set(0, -1, 0);
+            arrow3.rotation.z = -Math.PI / 4; // Angle it towards segmentation
+            scene.add(arrow3);
+            // Output Caption Arrow/Block
+            const outputCaption = new THREE.Mesh(new THREE.BoxGeometry(1, 0.8, 0.8), decoderMaterial);
+            outputCaption.position.set(4, 1, 0);
+            scene.add(outputCaption);
+            const arrow4 = new THREE.Mesh(new THREE.BoxGeometry(2.5, 0.2, 0.2), arrowMaterial);
+            arrow4.position.set(2.75, 1, 0);
+            scene.add(arrow4);
+            // Output Segmentation Arrow/Block
+            const outputSegmentation = new THREE.Mesh(new THREE.BoxGeometry(1, 0.8, 0.8), new THREE.MeshPhongMaterial({ color: segmentationColor }));
+            outputSegmentation.position.set(4, -1, 0);
+            scene.add(outputSegmentation);
+            const arrow5 = new THREE.Mesh(new THREE.BoxGeometry(2.5, 0.2, 0.2), arrowMaterial);
+            arrow5.position.set(2.75, -1, 0);
+            scene.add(arrow5);
+            // --- Simple Text Labels (using CanvasTexture for better readability) ---
+            function createTextSprite(message, color, fontSize = 60) {
+                const canvas = document.createElement('canvas');
+                const context = canvas.getContext('2d');
+                context.font = `${fontSize}px Arial`;
+                context.fillStyle = color;
+                context.textAlign = 'center';
+                context.textBaseline = 'middle';
+                const metrics = context.measureText(message);
+                canvas.width = metrics.width + 20;
+                canvas.height = fontSize + 20;
+                context.font = `${fontSize}px Arial`;
+                context.fillStyle = color;
+                context.textAlign = 'center';
+                context.textBaseline = 'middle';
+                context.fillText(message, canvas.width / 2, canvas.height / 2);
+                const texture = new THREE.CanvasTexture(canvas);
+                const spriteMaterial = new THREE.SpriteMaterial({ map: texture });
+                const sprite = new THREE.Sprite(spriteMaterial);
+                // Adjust scale based on text length and desired size in 3D
+                sprite.scale.set(canvas.width * 0.01, canvas.height * 0.01, 1);
+                return sprite;
+            }
+            const encoderLabel = createTextSprite("Encoder", "#ffffff");
+            encoderLabel.position.set(encoder.position.x, encoder.position.y + 1.5, encoder.position.z);
+            scene.add(encoderLabel);
+            const attentionLabel = createTextSprite("Attention", "#ffffff");
+            attentionLabel.position.set(attention.position.x, attention.position.y + 1.2, attention.position.z);
+            scene.add(attentionLabel);
+            const decoderLabel = createTextSprite("Decoder", "#ffffff");
+            decoderLabel.position.set(decoder.position.x, decoder.position.y + 1.5, decoder.position.z);
+            scene.add(decoderLabel);
+            const segmentationLabel = createTextSprite("Segmentation", "#ffffff");
+            segmentationLabel.position.set(segmentation.position.x, segmentation.position.y - 1.5, segmentation.position.z);
+            scene.add(segmentationLabel);
+            const outputCaptionLabel = createTextSprite("Caption", "#ffffff");
+            outputCaptionLabel.position.set(outputCaption.position.x, outputCaption.position.y + 0.8, outputCaption.position.z);
+            scene.add(outputCaptionLabel);
+            const outputSegmentationLabel = createTextSprite("Masks", "#ffffff");
+            outputSegmentationLabel.position.set(outputSegmentation.position.x, outputSegmentation.position.y - 0.8, outputSegmentation.position.z);
+            scene.add(outputSegmentationLabel);
+// Inside the showArchitectureDetails function
+            // Set canvas size and hide loading text after all elements are potentially rendered
+            setTimeout(() => {
+                // Ensure the container is ready and has dimensions
+                const canvasContainer = document.getElementById('architectureCanvasContainer');
+                const loadingText = document.getElementById('loadingText');
+                if (canvasContainer && canvasContainer.clientWidth > 0) {
+                    const width = canvasContainer.clientWidth;
+                    const height = canvasContainer.clientHeight;
+                    renderer.setSize(width, height);
+                    camera.aspect = width / height;
+                    camera.updateProjectionMatrix();
+                    // Hide the loading text now that the canvas is sized
+                    if (loadingText) {
+                        loadingText.style.display = 'none';
+                    }
+                    // Start the animation loop
+                    animate();
+                } else {
+                    // Fallback or error for if the container isn't ready
+                    console.error("3D canvas container not ready or has no size.");
+                    if (loadingText) {
+                        loadingText.innerText = "Error loading 3D model.";
+                    }
+                }
+            }, 100); // 100ms delay to allow the DOM to render the modal
+            // 6. Animation Loop
+            function animate() {
+                animationFrameId = requestAnimationFrame(animate);
+                controls.update(); // only required if controls.enableDamping or controls.autoRotate are set to true
+                renderer.render(scene, camera);
+            }
+            // Cleanup function for modal close
+            const closeModal = () => {
+                cancelAnimationFrame(animationFrameId); // Stop the animation loop
+                if (controls) controls.dispose(); // Dispose controls to free up event listeners
+                if (renderer) renderer.dispose(); // Dispose renderer resources
+                modal.remove(); // Remove modal from DOM
+                window.removeEventListener('resize', setCanvasSize); // Remove resize listener
+                // Clear scene to free up memory (optional but good practice)
+                scene.traverse(object => {
+                    if (!object.isMesh) return;
+                    object.geometry.dispose();
+                    if (object.material.isMaterial) {
+                        cleanMaterial(object.material);
+                    } else {
+                        // an array of materials
+                        for (const material of object.material) cleanMaterial(material);
+                    }
+                });
+                scene = null;
+                camera = null;
+                renderer = null;
+                controls = null;
+                animationFrameId = null;
+            };
+            document.getElementById('closeModalBtn').addEventListener('click', closeModal);
+            modal.addEventListener('click', (e) => {
+                if (e.target === modal) { // Close when clicking outside the inner content
+                    closeModal();
+                }
+            });
+            const setCanvasSize = () => {
+                if (canvasContainer && camera && renderer) {
+                    const width = canvasContainer.clientWidth;
+                    const height = canvasContainer.clientHeight;
+                    renderer.setSize(width, height);
+                    camera.aspect = width / height;
+                    camera.updateProjectionMatrix();
+                }
+            };
+            window.addEventListener('resize', setCanvasSize);
+        }
+        // Helper to dispose materials (for cleanup)
+        function cleanMaterial(material) {
+            for (const key of Object.keys(material)) {
+                const value = material[key];
+                if (value && typeof value === 'object' && 'dispose' in value) {
+                    value.dispose();
+                }
+            }
+        }
+        // Network node hover effects (Existing)
+        document.querySelectorAll('.network-node').forEach(node => {
+            node.addEventListener('mouseenter', function() {
+                const info = this.getAttribute('data-info');
+                if (info) {
+                    const tooltip = document.createElement('div');
+                    tooltip.style.cssText = `
+                        position: absolute;
+                        background: rgba(0,0,0,0.9);
+                        color: white;
+                        padding: 1rem;
+                        border-radius: 8px;
+                        font-size: 0.9rem;
+                        max-width: 200px;
+                        z-index: 1000;
+                        top: -60px;
+                        left: 50%;
+                        transform: translateX(-50%);
+                        pointer-events: none;
+                    `;
+                    tooltip.textContent = info;
+                    tooltip.className = 'tooltip';
+                    this.style.position = 'relative';
+                    this.appendChild(tooltip);
+                }
+            });
+            node.addEventListener('mouseleave', function() {
+                const tooltip = this.querySelector('.tooltip');
+                if (tooltip) {
+                    tooltip.remove();
+                }
+            });
+        });
+        // Initialize animations (Existing)
+        document.addEventListener('DOMContentLoaded', function() {
+            animateMetrics();
+            // Add pulse animation to CTA buttons
+            document.querySelectorAll('.btn-primary').forEach(btn => {
+                setInterval(() => {
+                    btn.classList.add('pulse');
+                    setTimeout(() => btn.classList.remove('pulse'), 1000);
+                }, 5000);
+            });
+        });
+        // Add particle animation to hero section (Existing)
+        function createParticles() {
+            const hero = document.querySelector('.hero');
+            for (let i = 0; i < 50; i++) {
+                const particle = document.createElement('div');
+                particle.style.cssText = `
+                    position: absolute;
+                    width: 2px;
+                    height: 2px;
+                    background: rgba(255,255,255,0.5);
+                    border-radius: 50%;
+                    left: ${Math.random() * 100}%;
+                    top: ${Math.random() * 100}%;
+                    animation: float ${5 + Math.random() * 10}s infinite linear;
+                    pointer-events: none;
+                `;
+                hero.appendChild(particle);
+            }
+        }
+        // Initialize particle animation (Existing)
+        createParticles();
+        // Add typing effect to hero text (Existing)
+        function typeWriter(element, text, speed = 50) {
+            let i = 0;
+            element.innerHTML = '';
+            function typing() {
+                if (i < text.length) {
+                    element.innerHTML += text.charAt(i);
+                    i++;
+                    setTimeout(typing, speed);
+                }
+            }
+            typing();
+        }
+        // Mobile menu toggle (if needed) (Existing)
+        function toggleMobileMenu() {
+            const navLinks = document.querySelector('.nav-links');
+            navLinks.classList.toggle('active');
+        }
+        // Add mobile styles (already in your original HTML, moved to style tag) (Existing)
+        const mobileStyles = document.createElement('style');
+        mobileStyles.textContent = `
+            @media (max-width: 768px) {
+                .nav-links.active {
+                    display: flex;
+                    flex-direction: column;
+                    position: absolute;
+                    top: 100%;
+                    left: 0;
+                    width: 100%;
+                    background: rgba(0,0,0,0.9);
+                    padding: 2rem;
+                    backdrop-filter: blur(20px);
+                }
+                .nav-links.active a {
+                    padding: 1rem 0;
+                    border-bottom: 1px solid rgba(255,255,255,0.1);
+                }
+            }
+        `;
+        document.head.appendChild(mobileStyles);
+    </script>
+</body>
+</html>

text_files/bleu_metrics.csv ADDED Viewed

	@@ -0,0 +1,30 @@

+Epoch,BLEU-4
+1,0.0836
+2,0.0923
+3,0.0986
+4,0.0996
+5,0.1011
+6,0.1009
+7,0.1028
+8,0.1029
+10,0.1032
+11,0.1024
+12,0.1011
+13,0.1028
+14,0.1034
+15,0.1042
+16,0.1047
+17,0.1035
+18,0.1054
+19,0.1053
+20,0.1058
+21,0.105
+22,0.1049
+23,0.1051
+24,0.1056
+25,0.1028
+26,0.1031
+27,0.1046
+28,0.1039
+29,0.1032
+30,0.1039

text_files/file.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import re
+import pandas as pd
+import os
+# Define a list of your log file names
+log_files = [
+    'training (2).txt',
+    'training_log_1_18.txt',
+    'training_log_17_27.txt',
+    'training_log_21_30.txt'
+]
+# Create an empty list to store parsed data
+parsed_data = []
+# Regex to capture Epoch, Step, Loss, and Perplexity
+# This regex looks for lines containing 'Epoch [X/Y], Step [A/B], Loss: V, Perplexity: W'
+log_pattern = re.compile(
+    r"Epoch\s\[(\d+)/\d+\],\sStep\s\[(\d+)/\d+\],\sLoss:\s([\d.]+),\sPerplexity:\s([\d.]+)"
+)
+print("Starting log parsing...")
+# Loop through each log file
+for file_name in log_files:
+    if not os.path.exists(file_name):
+        print(f"Warning: File not found - {file_name}. Skipping.")
+        continue
+    print(f"Processing {file_name}...")
+    # with open(file_name, 'r') as f:
+    with open(file_name, 'r', encoding='utf-8') as f:
+        for line in f:
+            match = log_pattern.search(line)
+            if match:
+                # Extracting values. Group 1: Epoch, Group 2: Step, Group 3: Loss, Group 4: Perplexity
+                epoch = int(match.group(1))
+                step = int(match.group(2))
+                loss = float(match.group(3))
+                perplexity = float(match.group(4)) # Correctly assigned to 'perplexity'
+                # Append to our list of dictionaries
+                parsed_data.append({
+                    'Epoch': epoch,
+                    'Step': step,
+                    'Loss': loss,
+                    'Perplexity': perplexity # Ensure this key matches the variable name
+                })
+# Create a Pandas DataFrame from the parsed data
+df = pd.DataFrame(parsed_data)
+# Sort the data by Epoch and Step to ensure correct chronological order
+df_sorted = df.sort_values(by=['Epoch', 'Step']).reset_index(drop=True)
+# Save the DataFrame to a CSV file
+output_csv_file = 'training_metrics.csv'
+df_sorted.to_csv(output_csv_file, index=False)
+print(f"\nSuccessfully parsed logs and saved data to {output_csv_file}")
+print("You can now import this CSV file into Power BI to create your visualizations.")
+print("\nFirst few rows of the generated CSV:")
+print(df_sorted.head())

text_files/scores.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import re
+import pandas as pd
+import os
+# Define a list of your log file names
+log_files = [
+    'training (2).txt',
+    'training_log_1_18.txt',
+    'training_log_17_27.txt',
+    'training_log_21_30.txt'
+]
+# Create an empty list to store parsed BLEU data
+bleu_data = []
+# Regex to capture the Epoch number from training progress lines
+epoch_pattern = re.compile(r"Epoch\s\[(\d+)/\d+],")
+# Regex to capture the BLEU-4 score
+bleu_pattern = re.compile(r"Validation BLEU-4:\s([\d.]+)")
+current_epoch = None # Variable to keep track of the current epoch
+print("Starting BLEU score parsing...")
+# Loop through each log file
+for file_name in log_files:
+    if not os.path.exists(file_name):
+        print(f"Warning: File not found - {file_name}. Skipping.")
+        continue
+    print(f"Processing {file_name} for BLEU scores...")
+    with open(file_name, 'r', encoding='utf-8') as f: # Use UTF-8 encoding
+        for line in f:
+            # Check for epoch line first to update current_epoch
+            epoch_match = epoch_pattern.search(line)
+            if epoch_match:
+                current_epoch = int(epoch_match.group(1))
+            # Check for BLEU score line
+            bleu_match = bleu_pattern.search(line)
+            if bleu_match:
+                bleu_score = float(bleu_match.group(1))
+                # Only add if we have an associated epoch
+                if current_epoch is not None:
+                    bleu_data.append({
+                        'Epoch': current_epoch,
+                        'BLEU-4': bleu_score
+                    })
+                else:
+                    print(f"Warning: Found BLEU score ({bleu_score}) without a preceding epoch in {file_name}. Skipping this entry.")
+# Create a Pandas DataFrame from the parsed BLEU data
+df_bleu = pd.DataFrame(bleu_data)
+# Remove duplicate entries for the same epoch if multiple BLEU scores are logged per epoch
+# (e.g., if validation runs multiple times, take the last one or average, here we take the last one)
+df_bleu_unique = df_bleu.drop_duplicates(subset=['Epoch'], keep='last')
+# Sort the data by Epoch
+df_bleu_sorted = df_bleu_unique.sort_values(by=['Epoch']).reset_index(drop=True)
+# Save the DataFrame to a CSV file
+output_csv_file = 'bleu_metrics.csv'
+df_bleu_sorted.to_csv(output_csv_file, index=False)
+print(f"\nSuccessfully parsed BLEU scores and saved data to {output_csv_file}")
+print("You can now import this CSV file into Power BI to create your BLEU score visualizations.")
+print("\nFirst few rows of the generated BLEU CSV:")
+print(df_bleu_sorted.head())

text_files/training (2).txt ADDED Viewed

	@@ -0,0 +1,335 @@

+2025-06-05 09:14:36,398 - __main__ - INFO -
+--- Starting Model Training ---
+2025-06-05 09:14:36,399 - __main__ - INFO - Starting training process...
+2025-06-05 09:14:36,400 - __main__ - INFO - Using device: cuda
+2025-06-05 09:14:36,405 - __main__ - INFO - Initializing training dataset for vocabulary building...
+2025-06-05 09:14:37,522 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+2025-06-05 09:17:31,012 - __main__ - INFO -
+--- Starting Model Training ---
+2025-06-05 09:17:31,013 - __main__ - INFO - Starting training process...
+2025-06-05 09:17:31,014 - __main__ - INFO - Using device: cuda
+2025-06-05 09:17:31,088 - __main__ - INFO - Loaded vocabulary from /kaggle/input/vocabulary_s/pytorch/default/1/vocabulary.pkl
+2025-06-05 09:17:32,171 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+2025-06-05 09:23:17,287 - __main__ - INFO - Using subset of 200000 samples for the dataset.
+2025-06-05 09:23:17,288 - __main__ - INFO - Dataset size after filtering: 200000 samples.
+2025-06-05 09:23:17,386 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_val2017.json
+2025-06-05 09:23:33,589 - __main__ - INFO - Dataset size after filtering: 25014 samples.
+2025-06-05 09:23:33,590 - __main__ - INFO - Training dataset size: 200000
+2025-06-05 09:23:33,591 - __main__ - INFO - Validation dataset size: 25014
+2025-06-05 09:23:34,957 - __main__ - INFO - ResNet encoder base layers are fine-tuning enabled.
+2025-06-05 09:23:35,415 - __main__ - INFO - No checkpoint found. Starting training from scratch.
+2025-06-05 09:25:12,222 - __main__ - INFO - Epoch [1/9], Step [100/3125], Loss: 5.4706, Perplexity: 237.6090
+2025-06-05 09:26:45,054 - __main__ - INFO - Epoch [1/9], Step [200/3125], Loss: 5.1889, Perplexity: 179.2651
+2025-06-05 09:28:17,121 - __main__ - INFO - Epoch [1/9], Step [300/3125], Loss: 4.7951, Perplexity: 120.9148
+2025-06-05 09:29:49,844 - __main__ - INFO - Epoch [1/9], Step [400/3125], Loss: 4.1438, Perplexity: 63.0419
+2025-06-05 09:31:22,479 - __main__ - INFO - Epoch [1/9], Step [500/3125], Loss: 4.0644, Perplexity: 58.2316
+2025-06-05 09:32:55,263 - __main__ - INFO - Epoch [1/9], Step [600/3125], Loss: 4.1386, Perplexity: 62.7179
+2025-06-05 09:34:28,639 - __main__ - INFO - Epoch [1/9], Step [700/3125], Loss: 4.4148, Perplexity: 82.6655
+2025-06-05 09:36:01,562 - __main__ - INFO - Epoch [1/9], Step [800/3125], Loss: 4.0775, Perplexity: 58.9954
+2025-06-05 09:37:34,470 - __main__ - INFO - Epoch [1/9], Step [900/3125], Loss: 3.8928, Perplexity: 49.0485
+2025-06-05 09:39:07,144 - __main__ - INFO - Epoch [1/9], Step [1000/3125], Loss: 3.7069, Perplexity: 40.7267
+2025-06-05 09:40:39,832 - __main__ - INFO - Epoch [1/9], Step [1100/3125], Loss: 3.7165, Perplexity: 41.1201
+2025-06-05 09:42:12,245 - __main__ - INFO - Epoch [1/9], Step [1200/3125], Loss: 3.6987, Perplexity: 40.3960
+2025-06-05 09:43:44,664 - __main__ - INFO - Epoch [1/9], Step [1300/3125], Loss: 3.8443, Perplexity: 46.7262
+2025-06-05 09:45:16,983 - __main__ - INFO - Epoch [1/9], Step [1400/3125], Loss: 3.7788, Perplexity: 43.7654
+2025-06-05 09:46:49,549 - __main__ - INFO - Epoch [1/9], Step [1500/3125], Loss: 3.9427, Perplexity: 51.5586
+2025-06-05 09:48:22,306 - __main__ - INFO - Epoch [1/9], Step [1600/3125], Loss: 3.9204, Perplexity: 50.4209
+2025-06-05 09:49:55,016 - __main__ - INFO - Epoch [1/9], Step [1700/3125], Loss: 3.6418, Perplexity: 38.1600
+2025-06-05 09:51:27,127 - __main__ - INFO - Epoch [1/9], Step [1800/3125], Loss: 3.5172, Perplexity: 33.6913
+2025-06-05 09:52:59,489 - __main__ - INFO - Epoch [1/9], Step [1900/3125], Loss: 3.6409, Perplexity: 38.1244
+2025-06-05 09:54:31,921 - __main__ - INFO - Epoch [1/9], Step [2000/3125], Loss: 3.2969, Perplexity: 27.0292
+2025-06-05 09:56:04,383 - __main__ - INFO - Epoch [1/9], Step [2100/3125], Loss: 3.4517, Perplexity: 31.5544
+2025-06-05 09:57:36,919 - __main__ - INFO - Epoch [1/9], Step [2200/3125], Loss: 3.4920, Perplexity: 32.8529
+2025-06-05 09:59:10,022 - __main__ - INFO - Epoch [1/9], Step [2300/3125], Loss: 3.7028, Perplexity: 40.5603
+2025-06-05 10:00:42,264 - __main__ - INFO - Epoch [1/9], Step [2400/3125], Loss: 3.6794, Perplexity: 39.6238
+2025-06-05 10:02:14,437 - __main__ - INFO - Epoch [1/9], Step [2500/3125], Loss: 3.4977, Perplexity: 33.0382
+2025-06-05 10:03:47,338 - __main__ - INFO - Epoch [1/9], Step [2600/3125], Loss: 3.4883, Perplexity: 32.7311
+2025-06-05 10:05:20,077 - __main__ - INFO - Epoch [1/9], Step [2700/3125], Loss: 3.4596, Perplexity: 31.8036
+2025-06-05 10:06:52,363 - __main__ - INFO - Epoch [1/9], Step [2800/3125], Loss: 3.4043, Perplexity: 30.0920
+2025-06-05 10:08:24,721 - __main__ - INFO - Epoch [1/9], Step [2900/3125], Loss: 3.1842, Perplexity: 24.1488
+2025-06-05 10:09:56,919 - __main__ - INFO - Epoch [1/9], Step [3000/3125], Loss: 3.2843, Perplexity: 26.6914
+2025-06-05 10:11:29,066 - __main__ - INFO - Epoch [1/9], Step [3100/3125], Loss: 3.1950, Perplexity: 24.4100
+2025-06-05 10:11:52,259 - __main__ - INFO - Epoch 1 Training finished. Avg Loss: 3.8943, Time: 2896.84s
+2025-06-05 10:24:31,901 - __main__ - INFO - Validation Avg Loss: 3.1313, Perplexity: 22.9038
+2025-06-05 10:24:44,857 - __main__ - INFO - Validation BLEU-4: 0.0836
+2025-06-05 10:24:45,632 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.0836.pth
+2025-06-05 10:24:45,633 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 10:24:45,924 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 10:24:46,199 - __main__ - INFO - Python garbage collector run.
+2025-06-05 10:26:22,213 - __main__ - INFO - Epoch [2/9], Step [100/3125], Loss: 3.1401, Perplexity: 23.1065
+2025-06-05 10:27:55,157 - __main__ - INFO - Epoch [2/9], Step [200/3125], Loss: 3.0563, Perplexity: 21.2493
+2025-06-05 10:29:27,968 - __main__ - INFO - Epoch [2/9], Step [300/3125], Loss: 3.2782, Perplexity: 26.5282
+2025-06-05 10:31:00,270 - __main__ - INFO - Epoch [2/9], Step [400/3125], Loss: 3.1834, Perplexity: 24.1295
+2025-06-05 10:32:33,440 - __main__ - INFO - Epoch [2/9], Step [500/3125], Loss: 3.3472, Perplexity: 28.4244
+2025-06-05 10:34:06,422 - __main__ - INFO - Epoch [2/9], Step [600/3125], Loss: 3.1609, Perplexity: 23.5928
+2025-06-05 10:35:39,261 - __main__ - INFO - Epoch [2/9], Step [700/3125], Loss: 2.9984, Perplexity: 20.0538
+2025-06-05 10:37:11,706 - __main__ - INFO - Epoch [2/9], Step [800/3125], Loss: 3.2494, Perplexity: 25.7758
+2025-06-05 10:38:44,368 - __main__ - INFO - Epoch [2/9], Step [900/3125], Loss: 3.1955, Perplexity: 24.4232
+2025-06-05 10:40:17,199 - __main__ - INFO - Epoch [2/9], Step [1000/3125], Loss: 3.4101, Perplexity: 30.2673
+2025-06-05 10:41:49,573 - __main__ - INFO - Epoch [2/9], Step [1100/3125], Loss: 3.3706, Perplexity: 29.0971
+2025-06-05 10:43:21,777 - __main__ - INFO - Epoch [2/9], Step [1200/3125], Loss: 3.1680, Perplexity: 23.7608
+2025-06-05 10:44:54,540 - __main__ - INFO - Epoch [2/9], Step [1300/3125], Loss: 3.0362, Perplexity: 20.8251
+2025-06-05 10:46:27,295 - __main__ - INFO - Epoch [2/9], Step [1400/3125], Loss: 3.0925, Perplexity: 22.0321
+2025-06-05 10:47:59,572 - __main__ - INFO - Epoch [2/9], Step [1500/3125], Loss: 3.1057, Perplexity: 22.3241
+2025-06-05 10:49:32,053 - __main__ - INFO - Epoch [2/9], Step [1600/3125], Loss: 3.3277, Perplexity: 27.8730
+2025-06-05 10:51:04,671 - __main__ - INFO - Epoch [2/9], Step [1700/3125], Loss: 3.1610, Perplexity: 23.5945
+2025-06-05 10:52:37,560 - __main__ - INFO - Epoch [2/9], Step [1800/3125], Loss: 3.2718, Perplexity: 26.3582
+2025-06-05 10:54:10,358 - __main__ - INFO - Epoch [2/9], Step [1900/3125], Loss: 3.1425, Perplexity: 23.1612
+2025-06-05 10:55:43,281 - __main__ - INFO - Epoch [2/9], Step [2000/3125], Loss: 3.0522, Perplexity: 21.1619
+2025-06-05 10:57:15,767 - __main__ - INFO - Epoch [2/9], Step [2100/3125], Loss: 2.9711, Perplexity: 19.5126
+2025-06-05 10:58:48,072 - __main__ - INFO - Epoch [2/9], Step [2200/3125], Loss: 3.0638, Perplexity: 21.4079
+2025-06-05 11:00:20,322 - __main__ - INFO - Epoch [2/9], Step [2300/3125], Loss: 3.1867, Perplexity: 24.2093
+2025-06-05 11:01:53,270 - __main__ - INFO - Epoch [2/9], Step [2400/3125], Loss: 3.0060, Perplexity: 20.2070
+2025-06-05 11:03:25,315 - __main__ - INFO - Epoch [2/9], Step [2500/3125], Loss: 3.1718, Perplexity: 23.8494
+2025-06-05 11:04:57,298 - __main__ - INFO - Epoch [2/9], Step [2600/3125], Loss: 3.0628, Perplexity: 21.3874
+2025-06-05 11:06:29,658 - __main__ - INFO - Epoch [2/9], Step [2700/3125], Loss: 3.3898, Perplexity: 29.6609
+2025-06-05 11:08:01,931 - __main__ - INFO - Epoch [2/9], Step [2800/3125], Loss: 2.7914, Perplexity: 16.3038
+2025-06-05 11:09:34,397 - __main__ - INFO - Epoch [2/9], Step [2900/3125], Loss: 2.8758, Perplexity: 17.7403
+2025-06-05 11:11:06,755 - __main__ - INFO - Epoch [2/9], Step [3000/3125], Loss: 3.0766, Perplexity: 21.6847
+2025-06-05 11:12:39,179 - __main__ - INFO - Epoch [2/9], Step [3100/3125], Loss: 3.0787, Perplexity: 21.7303
+2025-06-05 11:13:02,556 - __main__ - INFO - Epoch 2 Training finished. Avg Loss: 3.1318, Time: 2896.36s
+2025-06-05 11:25:51,359 - __main__ - INFO - Validation Avg Loss: 2.8683, Perplexity: 17.6076
+2025-06-05 11:26:01,678 - __main__ - INFO - Validation BLEU-4: 0.0923
+2025-06-05 11:26:02,402 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.0923.pth
+2025-06-05 11:26:02,404 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 11:26:02,690 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 11:26:03,026 - __main__ - INFO - Python garbage collector run.
+2025-06-05 11:27:38,287 - __main__ - INFO - Epoch [3/9], Step [100/3125], Loss: 3.1375, Perplexity: 23.0468
+2025-06-05 11:29:10,992 - __main__ - INFO - Epoch [3/9], Step [200/3125], Loss: 2.9503, Perplexity: 19.1119
+2025-06-05 11:30:43,374 - __main__ - INFO - Epoch [3/9], Step [300/3125], Loss: 2.9096, Perplexity: 18.3486
+2025-06-05 11:32:15,961 - __main__ - INFO - Epoch [3/9], Step [400/3125], Loss: 2.9213, Perplexity: 18.5648
+2025-06-05 11:33:48,692 - __main__ - INFO - Epoch [3/9], Step [500/3125], Loss: 2.7807, Perplexity: 16.1304
+2025-06-05 11:35:21,035 - __main__ - INFO - Epoch [3/9], Step [600/3125], Loss: 2.9757, Perplexity: 19.6029
+2025-06-05 11:36:53,722 - __main__ - INFO - Epoch [3/9], Step [700/3125], Loss: 2.7691, Perplexity: 15.9448
+2025-06-05 11:38:26,136 - __main__ - INFO - Epoch [3/9], Step [800/3125], Loss: 2.6290, Perplexity: 13.8597
+2025-06-05 11:39:58,718 - __main__ - INFO - Epoch [3/9], Step [900/3125], Loss: 2.7852, Perplexity: 16.2028
+2025-06-05 11:41:31,263 - __main__ - INFO - Epoch [3/9], Step [1000/3125], Loss: 2.9282, Perplexity: 18.6934
+2025-06-05 11:43:04,789 - __main__ - INFO - Epoch [3/9], Step [1100/3125], Loss: 3.0185, Perplexity: 20.4607
+2025-06-05 11:44:37,274 - __main__ - INFO - Epoch [3/9], Step [1200/3125], Loss: 3.1393, Perplexity: 23.0879
+2025-06-05 11:46:10,068 - __main__ - INFO - Epoch [3/9], Step [1300/3125], Loss: 2.9986, Perplexity: 20.0580
+2025-06-05 11:47:42,373 - __main__ - INFO - Epoch [3/9], Step [1400/3125], Loss: 2.8490, Perplexity: 17.2704
+2025-06-05 11:49:14,841 - __main__ - INFO - Epoch [3/9], Step [1500/3125], Loss: 3.1596, Perplexity: 23.5610
+2025-06-05 11:50:47,296 - __main__ - INFO - Epoch [3/9], Step [1600/3125], Loss: 2.9312, Perplexity: 18.7505
+2025-06-05 11:52:19,632 - __main__ - INFO - Epoch [3/9], Step [1700/3125], Loss: 2.8247, Perplexity: 16.8555
+2025-06-05 11:53:52,241 - __main__ - INFO - Epoch [3/9], Step [1800/3125], Loss: 2.9927, Perplexity: 19.9386
+2025-06-05 11:55:24,747 - __main__ - INFO - Epoch [3/9], Step [1900/3125], Loss: 2.7951, Perplexity: 16.3638
+2025-06-05 11:56:57,095 - __main__ - INFO - Epoch [3/9], Step [2000/3125], Loss: 2.8393, Perplexity: 17.1032
+2025-06-05 11:58:29,803 - __main__ - INFO - Epoch [3/9], Step [2100/3125], Loss: 3.0383, Perplexity: 20.8706
+2025-06-05 12:00:01,877 - __main__ - INFO - Epoch [3/9], Step [2200/3125], Loss: 3.1580, Perplexity: 23.5244
+2025-06-05 12:01:34,524 - __main__ - INFO - Epoch [3/9], Step [2300/3125], Loss: 2.9753, Perplexity: 19.5955
+2025-06-05 12:03:06,876 - __main__ - INFO - Epoch [3/9], Step [2400/3125], Loss: 2.7754, Perplexity: 16.0452
+2025-06-05 12:04:39,456 - __main__ - INFO - Epoch [3/9], Step [2500/3125], Loss: 2.5844, Perplexity: 13.2559
+2025-06-05 12:06:12,314 - __main__ - INFO - Epoch [3/9], Step [2600/3125], Loss: 2.9575, Perplexity: 19.2490
+2025-06-05 12:07:44,508 - __main__ - INFO - Epoch [3/9], Step [2700/3125], Loss: 2.7714, Perplexity: 15.9803
+2025-06-05 12:09:16,908 - __main__ - INFO - Epoch [3/9], Step [2800/3125], Loss: 2.6014, Perplexity: 13.4829
+2025-06-05 12:10:49,531 - __main__ - INFO - Epoch [3/9], Step [2900/3125], Loss: 2.9240, Perplexity: 18.6159
+2025-06-05 12:12:22,014 - __main__ - INFO - Epoch [3/9], Step [3000/3125], Loss: 2.8361, Perplexity: 17.0495
+2025-06-05 12:13:54,500 - __main__ - INFO - Epoch [3/9], Step [3100/3125], Loss: 2.9557, Perplexity: 19.2145
+2025-06-05 12:14:17,729 - __main__ - INFO - Epoch 3 Training finished. Avg Loss: 2.9138, Time: 2894.70s
+2025-06-05 12:27:10,365 - __main__ - INFO - Validation Avg Loss: 2.7487, Perplexity: 15.6226
+2025-06-05 12:27:20,942 - __main__ - INFO - Validation BLEU-4: 0.0986
+2025-06-05 12:27:21,682 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.0986.pth
+2025-06-05 12:27:21,683 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 12:27:21,948 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 12:27:22,293 - __main__ - INFO - Python garbage collector run.
+2025-06-05 12:28:57,255 - __main__ - INFO - Epoch [4/9], Step [100/3125], Loss: 2.8362, Perplexity: 17.0513
+2025-06-05 12:30:30,013 - __main__ - INFO - Epoch [4/9], Step [200/3125], Loss: 2.5668, Perplexity: 13.0241
+2025-06-05 12:32:02,681 - __main__ - INFO - Epoch [4/9], Step [300/3125], Loss: 2.7211, Perplexity: 15.1978
+2025-06-05 12:33:35,422 - __main__ - INFO - Epoch [4/9], Step [400/3125], Loss: 2.9763, Perplexity: 19.6153
+2025-06-05 12:35:07,932 - __main__ - INFO - Epoch [4/9], Step [500/3125], Loss: 2.7447, Perplexity: 15.5594
+2025-06-05 12:36:40,894 - __main__ - INFO - Epoch [4/9], Step [600/3125], Loss: 2.6688, Perplexity: 14.4227
+2025-06-05 12:38:13,657 - __main__ - INFO - Epoch [4/9], Step [700/3125], Loss: 2.7560, Perplexity: 15.7370
+2025-06-05 12:39:46,304 - __main__ - INFO - Epoch [4/9], Step [800/3125], Loss: 2.7199, Perplexity: 15.1782
+2025-06-05 12:41:18,843 - __main__ - INFO - Epoch [4/9], Step [900/3125], Loss: 2.9431, Perplexity: 18.9745
+2025-06-05 12:42:51,310 - __main__ - INFO - Epoch [4/9], Step [1000/3125], Loss: 2.7849, Perplexity: 16.1979
+2025-06-05 12:44:24,070 - __main__ - INFO - Epoch [4/9], Step [1100/3125], Loss: 2.6553, Perplexity: 14.2291
+2025-06-05 12:45:56,314 - __main__ - INFO - Epoch [4/9], Step [1200/3125], Loss: 2.7191, Perplexity: 15.1662
+2025-06-05 12:47:28,651 - __main__ - INFO - Epoch [4/9], Step [1300/3125], Loss: 2.7089, Perplexity: 15.0125
+2025-06-05 12:49:01,294 - __main__ - INFO - Epoch [4/9], Step [1400/3125], Loss: 2.9824, Perplexity: 19.7352
+2025-06-05 12:50:34,013 - __main__ - INFO - Epoch [4/9], Step [1500/3125], Loss: 2.7723, Perplexity: 15.9946
+2025-06-05 12:52:06,802 - __main__ - INFO - Epoch [4/9], Step [1600/3125], Loss: 2.8024, Perplexity: 16.4847
+2025-06-05 12:53:39,654 - __main__ - INFO - Epoch [4/9], Step [1700/3125], Loss: 2.7457, Perplexity: 15.5758
+2025-06-05 12:55:12,141 - __main__ - INFO - Epoch [4/9], Step [1800/3125], Loss: 2.9873, Perplexity: 19.8314
+2025-06-05 12:56:44,381 - __main__ - INFO - Epoch [4/9], Step [1900/3125], Loss: 2.6393, Perplexity: 14.0032
+2025-06-05 12:58:16,535 - __main__ - INFO - Epoch [4/9], Step [2000/3125], Loss: 2.6532, Perplexity: 14.1999
+2025-06-05 12:59:49,166 - __main__ - INFO - Epoch [4/9], Step [2100/3125], Loss: 2.7995, Perplexity: 16.4372
+2025-06-05 13:01:21,714 - __main__ - INFO - Epoch [4/9], Step [2200/3125], Loss: 2.7488, Perplexity: 15.6245
+2025-06-05 13:02:53,947 - __main__ - INFO - Epoch [4/9], Step [2300/3125], Loss: 2.8463, Perplexity: 17.2242
+2025-06-05 13:04:26,007 - __main__ - INFO - Epoch [4/9], Step [2400/3125], Loss: 2.6901, Perplexity: 14.7335
+2025-06-05 13:05:58,607 - __main__ - INFO - Epoch [4/9], Step [2500/3125], Loss: 2.8890, Perplexity: 17.9762
+2025-06-05 13:07:31,240 - __main__ - INFO - Epoch [4/9], Step [2600/3125], Loss: 2.6551, Perplexity: 14.2270
+2025-06-05 13:09:03,803 - __main__ - INFO - Epoch [4/9], Step [2700/3125], Loss: 2.8474, Perplexity: 17.2437
+2025-06-05 13:10:36,148 - __main__ - INFO - Epoch [4/9], Step [2800/3125], Loss: 2.7218, Perplexity: 15.2075
+2025-06-05 13:12:08,443 - __main__ - INFO - Epoch [4/9], Step [2900/3125], Loss: 2.6753, Perplexity: 14.5169
+2025-06-05 13:13:41,235 - __main__ - INFO - Epoch [4/9], Step [3000/3125], Loss: 2.8698, Perplexity: 17.6342
+2025-06-05 13:15:13,729 - __main__ - INFO - Epoch [4/9], Step [3100/3125], Loss: 2.5715, Perplexity: 13.0854
+2025-06-05 13:15:37,016 - __main__ - INFO - Epoch 4 Training finished. Avg Loss: 2.7810, Time: 2894.72s
+2025-06-05 13:28:14,233 - __main__ - INFO - Validation Avg Loss: 2.6835, Perplexity: 14.6358
+2025-06-05 13:28:24,559 - __main__ - INFO - Validation BLEU-4: 0.0996
+2025-06-05 13:28:25,299 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.0996.pth
+2025-06-05 13:28:25,300 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 13:28:25,596 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 13:28:25,927 - __main__ - INFO - Python garbage collector run.
+2025-06-05 13:30:01,372 - __main__ - INFO - Epoch [5/9], Step [100/3125], Loss: 2.7858, Perplexity: 16.2124
+2025-06-05 13:31:34,188 - __main__ - INFO - Epoch [5/9], Step [200/3125], Loss: 2.5799, Perplexity: 13.1955
+2025-06-05 13:33:06,552 - __main__ - INFO - Epoch [5/9], Step [300/3125], Loss: 2.7141, Perplexity: 15.0910
+2025-06-05 13:34:39,208 - __main__ - INFO - Epoch [5/9], Step [400/3125], Loss: 2.6322, Perplexity: 13.9042
+2025-06-05 13:36:11,891 - __main__ - INFO - Epoch [5/9], Step [500/3125], Loss: 2.9920, Perplexity: 19.9248
+2025-06-05 13:37:44,341 - __main__ - INFO - Epoch [5/9], Step [600/3125], Loss: 2.5876, Perplexity: 13.2972
+2025-06-05 13:39:16,416 - __main__ - INFO - Epoch [5/9], Step [700/3125], Loss: 2.6221, Perplexity: 13.7644
+2025-06-05 13:40:48,960 - __main__ - INFO - Epoch [5/9], Step [800/3125], Loss: 2.5546, Perplexity: 12.8666
+2025-06-05 13:42:21,593 - __main__ - INFO - Epoch [5/9], Step [900/3125], Loss: 2.7899, Perplexity: 16.2789
+2025-06-05 13:43:54,018 - __main__ - INFO - Epoch [5/9], Step [1000/3125], Loss: 2.7180, Perplexity: 15.1505
+2025-06-05 13:45:26,629 - __main__ - INFO - Epoch [5/9], Step [1100/3125], Loss: 2.6338, Perplexity: 13.9268
+2025-06-05 13:46:59,396 - __main__ - INFO - Epoch [5/9], Step [1200/3125], Loss: 2.9994, Perplexity: 20.0732
+2025-06-05 13:48:31,472 - __main__ - INFO - Epoch [5/9], Step [1300/3125], Loss: 2.5138, Perplexity: 12.3518
+2025-06-05 13:50:03,810 - __main__ - INFO - Epoch [5/9], Step [1400/3125], Loss: 2.7708, Perplexity: 15.9718
+2025-06-05 13:51:36,234 - __main__ - INFO - Epoch [5/9], Step [1500/3125], Loss: 2.6756, Perplexity: 14.5208
+2025-06-05 13:53:08,614 - __main__ - INFO - Epoch [5/9], Step [1600/3125], Loss: 2.6377, Perplexity: 13.9804
+2025-06-05 13:54:41,451 - __main__ - INFO - Epoch [5/9], Step [1700/3125], Loss: 2.6101, Perplexity: 13.6009
+2025-06-05 13:56:14,028 - __main__ - INFO - Epoch [5/9], Step [1800/3125], Loss: 2.6736, Perplexity: 14.4918
+2025-06-05 13:57:46,796 - __main__ - INFO - Epoch [5/9], Step [1900/3125], Loss: 2.6328, Perplexity: 13.9123
+2025-06-05 13:59:19,826 - __main__ - INFO - Epoch [5/9], Step [2000/3125], Loss: 2.5901, Perplexity: 13.3318
+2025-06-05 14:00:52,403 - __main__ - INFO - Epoch [5/9], Step [2100/3125], Loss: 2.9779, Perplexity: 19.6469
+2025-06-05 14:02:25,242 - __main__ - INFO - Epoch [5/9], Step [2200/3125], Loss: 2.5809, Perplexity: 13.2096
+2025-06-05 14:03:58,214 - __main__ - INFO - Epoch [5/9], Step [2300/3125], Loss: 2.7360, Perplexity: 15.4247
+2025-06-05 14:05:30,984 - __main__ - INFO - Epoch [5/9], Step [2400/3125], Loss: 2.5268, Perplexity: 12.5135
+2025-06-05 14:07:04,027 - __main__ - INFO - Epoch [5/9], Step [2500/3125], Loss: 2.8699, Perplexity: 17.6357
+2025-06-05 14:08:36,966 - __main__ - INFO - Epoch [5/9], Step [2600/3125], Loss: 2.7812, Perplexity: 16.1382
+2025-06-05 14:10:09,378 - __main__ - INFO - Epoch [5/9], Step [2700/3125], Loss: 2.6540, Perplexity: 14.2111
+2025-06-05 14:11:41,671 - __main__ - INFO - Epoch [5/9], Step [2800/3125], Loss: 2.6715, Perplexity: 14.4612
+2025-06-05 14:13:14,497 - __main__ - INFO - Epoch [5/9], Step [2900/3125], Loss: 2.8780, Perplexity: 17.7783
+2025-06-05 14:14:47,210 - __main__ - INFO - Epoch [5/9], Step [3000/3125], Loss: 2.6441, Perplexity: 14.0706
+2025-06-05 14:16:19,723 - __main__ - INFO - Epoch [5/9], Step [3100/3125], Loss: 2.6745, Perplexity: 14.5049
+2025-06-05 14:16:43,018 - __main__ - INFO - Epoch 5 Training finished. Avg Loss: 2.6866, Time: 2897.09s
+2025-06-05 14:29:20,136 - __main__ - INFO - Validation Avg Loss: 2.6448, Perplexity: 14.0800
+2025-06-05 14:29:30,680 - __main__ - INFO - Validation BLEU-4: 0.1011
+2025-06-05 14:29:31,398 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1011.pth
+2025-06-05 14:29:32,142 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_5.pth
+2025-06-05 14:29:32,143 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 14:29:32,408 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 14:29:32,753 - __main__ - INFO - Python garbage collector run.
+2025-06-05 14:31:08,760 - __main__ - INFO - Epoch [6/9], Step [100/3125], Loss: 2.9338, Perplexity: 18.7981
+2025-06-05 14:32:41,036 - __main__ - INFO - Epoch [6/9], Step [200/3125], Loss: 2.7460, Perplexity: 15.5799
+2025-06-05 14:34:13,610 - __main__ - INFO - Epoch [6/9], Step [300/3125], Loss: 2.3894, Perplexity: 10.9066
+2025-06-05 14:35:46,857 - __main__ - INFO - Epoch [6/9], Step [400/3125], Loss: 2.6891, Perplexity: 14.7191
+2025-06-05 14:37:19,167 - __main__ - INFO - Epoch [6/9], Step [500/3125], Loss: 2.7032, Perplexity: 14.9273
+2025-06-05 14:38:52,048 - __main__ - INFO - Epoch [6/9], Step [600/3125], Loss: 2.6848, Perplexity: 14.6553
+2025-06-05 14:40:24,881 - __main__ - INFO - Epoch [6/9], Step [700/3125], Loss: 2.4991, Perplexity: 12.1715
+2025-06-05 14:41:57,087 - __main__ - INFO - Epoch [6/9], Step [800/3125], Loss: 2.5794, Perplexity: 13.1892
+2025-06-05 14:43:29,854 - __main__ - INFO - Epoch [6/9], Step [900/3125], Loss: 2.5044, Perplexity: 12.2366
+2025-06-05 14:45:02,445 - __main__ - INFO - Epoch [6/9], Step [1000/3125], Loss: 2.6207, Perplexity: 13.7448
+2025-06-05 14:46:35,344 - __main__ - INFO - Epoch [6/9], Step [1100/3125], Loss: 2.5424, Perplexity: 12.7103
+2025-06-05 14:48:08,048 - __main__ - INFO - Epoch [6/9], Step [1200/3125], Loss: 2.5540, Perplexity: 12.8587
+2025-06-05 14:49:40,487 - __main__ - INFO - Epoch [6/9], Step [1300/3125], Loss: 2.5427, Perplexity: 12.7145
+2025-06-05 14:51:13,035 - __main__ - INFO - Epoch [6/9], Step [1400/3125], Loss: 2.6368, Perplexity: 13.9681
+2025-06-05 14:52:45,059 - __main__ - INFO - Epoch [6/9], Step [1500/3125], Loss: 2.5717, Perplexity: 13.0879
+2025-06-05 14:54:17,291 - __main__ - INFO - Epoch [6/9], Step [1600/3125], Loss: 2.4953, Perplexity: 12.1255
+2025-06-05 14:55:50,488 - __main__ - INFO - Epoch [6/9], Step [1700/3125], Loss: 2.7300, Perplexity: 15.3336
+2025-06-05 14:57:23,442 - __main__ - INFO - Epoch [6/9], Step [1800/3125], Loss: 2.6784, Perplexity: 14.5613
+2025-06-05 14:58:55,865 - __main__ - INFO - Epoch [6/9], Step [1900/3125], Loss: 2.6738, Perplexity: 14.4946
+2025-06-05 15:00:28,266 - __main__ - INFO - Epoch [6/9], Step [2000/3125], Loss: 2.6148, Perplexity: 13.6649
+2025-06-05 15:02:00,818 - __main__ - INFO - Epoch [6/9], Step [2100/3125], Loss: 2.8018, Perplexity: 16.4748
+2025-06-05 15:03:33,616 - __main__ - INFO - Epoch [6/9], Step [2200/3125], Loss: 2.6942, Perplexity: 14.7940
+2025-06-05 15:05:05,991 - __main__ - INFO - Epoch [6/9], Step [2300/3125], Loss: 2.4844, Perplexity: 11.9935
+2025-06-05 15:06:38,662 - __main__ - INFO - Epoch [6/9], Step [2400/3125], Loss: 2.4125, Perplexity: 11.1615
+2025-06-05 15:08:11,014 - __main__ - INFO - Epoch [6/9], Step [2500/3125], Loss: 2.5616, Perplexity: 12.9564
+2025-06-05 15:09:43,766 - __main__ - INFO - Epoch [6/9], Step [2600/3125], Loss: 2.5049, Perplexity: 12.2425
+2025-06-05 15:11:16,830 - __main__ - INFO - Epoch [6/9], Step [2700/3125], Loss: 2.4866, Perplexity: 12.0206
+2025-06-05 15:12:49,025 - __main__ - INFO - Epoch [6/9], Step [2800/3125], Loss: 2.3411, Perplexity: 10.3931
+2025-06-05 15:14:21,512 - __main__ - INFO - Epoch [6/9], Step [2900/3125], Loss: 2.6441, Perplexity: 14.0713
+2025-06-05 15:15:54,097 - __main__ - INFO - Epoch [6/9], Step [3000/3125], Loss: 2.5855, Perplexity: 13.2695
+2025-06-05 15:17:26,931 - __main__ - INFO - Epoch [6/9], Step [3100/3125], Loss: 2.7371, Perplexity: 15.4419
+2025-06-05 15:17:50,166 - __main__ - INFO - Epoch 6 Training finished. Avg Loss: 2.6127, Time: 2897.41s
+2025-06-05 15:30:17,460 - __main__ - INFO - Validation Avg Loss: 2.6184, Perplexity: 13.7138
+2025-06-05 15:30:28,171 - __main__ - INFO - Validation BLEU-4: 0.1009
+2025-06-05 15:30:28,172 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 15:30:28,470 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 15:30:28,816 - __main__ - INFO - Python garbage collector run.
+2025-06-05 15:32:05,141 - __main__ - INFO - Epoch [7/9], Step [100/3125], Loss: 2.6545, Perplexity: 14.2180
+2025-06-05 15:33:37,788 - __main__ - INFO - Epoch [7/9], Step [200/3125], Loss: 2.3907, Perplexity: 10.9206
+2025-06-05 15:35:09,924 - __main__ - INFO - Epoch [7/9], Step [300/3125], Loss: 2.3964, Perplexity: 10.9837
+2025-06-05 15:36:42,633 - __main__ - INFO - Epoch [7/9], Step [400/3125], Loss: 2.2917, Perplexity: 9.8918
+2025-06-05 15:38:15,138 - __main__ - INFO - Epoch [7/9], Step [500/3125], Loss: 2.5460, Perplexity: 12.7565
+2025-06-05 15:39:47,288 - __main__ - INFO - Epoch [7/9], Step [600/3125], Loss: 2.7165, Perplexity: 15.1276
+2025-06-05 15:41:19,984 - __main__ - INFO - Epoch [7/9], Step [700/3125], Loss: 2.3160, Perplexity: 10.1350
+2025-06-05 15:42:52,625 - __main__ - INFO - Epoch [7/9], Step [800/3125], Loss: 2.4257, Perplexity: 11.3097
+2025-06-05 15:44:25,300 - __main__ - INFO - Epoch [7/9], Step [900/3125], Loss: 2.8313, Perplexity: 16.9669
+2025-06-05 15:45:57,862 - __main__ - INFO - Epoch [7/9], Step [1000/3125], Loss: 2.4841, Perplexity: 11.9898
+2025-06-05 15:47:30,346 - __main__ - INFO - Epoch [7/9], Step [1100/3125], Loss: 2.4737, Perplexity: 11.8667
+2025-06-05 15:49:02,664 - __main__ - INFO - Epoch [7/9], Step [1200/3125], Loss: 2.3904, Perplexity: 10.9178
+2025-06-05 15:50:34,955 - __main__ - INFO - Epoch [7/9], Step [1300/3125], Loss: 2.4815, Perplexity: 11.9595
+2025-06-05 15:52:07,231 - __main__ - INFO - Epoch [7/9], Step [1400/3125], Loss: 2.7324, Perplexity: 15.3697
+2025-06-05 15:53:39,430 - __main__ - INFO - Epoch [7/9], Step [1500/3125], Loss: 2.7363, Perplexity: 15.4298
+2025-06-05 15:55:11,826 - __main__ - INFO - Epoch [7/9], Step [1600/3125], Loss: 2.7751, Perplexity: 16.0397
+2025-06-05 15:56:44,380 - __main__ - INFO - Epoch [7/9], Step [1700/3125], Loss: 2.5598, Perplexity: 12.9329
+2025-06-05 15:58:16,561 - __main__ - INFO - Epoch [7/9], Step [1800/3125], Loss: 2.7369, Perplexity: 15.4388
+2025-06-05 15:59:49,080 - __main__ - INFO - Epoch [7/9], Step [1900/3125], Loss: 2.8117, Perplexity: 16.6374
+2025-06-05 16:01:21,995 - __main__ - INFO - Epoch [7/9], Step [2000/3125], Loss: 2.6360, Perplexity: 13.9572
+2025-06-05 16:02:54,730 - __main__ - INFO - Epoch [7/9], Step [2100/3125], Loss: 2.4383, Perplexity: 11.4533
+2025-06-05 16:04:28,093 - __main__ - INFO - Epoch [7/9], Step [2200/3125], Loss: 2.7142, Perplexity: 15.0932
+2025-06-05 16:06:01,476 - __main__ - INFO - Epoch [7/9], Step [2300/3125], Loss: 2.4590, Perplexity: 11.6927
+2025-06-05 16:07:34,763 - __main__ - INFO - Epoch [7/9], Step [2400/3125], Loss: 2.6457, Perplexity: 14.0935
+2025-06-05 16:09:07,778 - __main__ - INFO - Epoch [7/9], Step [2500/3125], Loss: 2.6708, Perplexity: 14.4518
+2025-06-05 16:10:40,473 - __main__ - INFO - Epoch [7/9], Step [2600/3125], Loss: 2.5663, Perplexity: 13.0179
+2025-06-05 16:12:12,943 - __main__ - INFO - Epoch [7/9], Step [2700/3125], Loss: 2.8020, Perplexity: 16.4769
+2025-06-05 16:13:45,226 - __main__ - INFO - Epoch [7/9], Step [2800/3125], Loss: 2.6069, Perplexity: 13.5576
+2025-06-05 16:15:17,969 - __main__ - INFO - Epoch [7/9], Step [2900/3125], Loss: 2.5213, Perplexity: 12.4453
+2025-06-05 16:16:51,084 - __main__ - INFO - Epoch [7/9], Step [3000/3125], Loss: 2.5426, Perplexity: 12.7128
+2025-06-05 16:18:23,560 - __main__ - INFO - Epoch [7/9], Step [3100/3125], Loss: 2.6069, Perplexity: 13.5576
+2025-06-05 16:18:46,982 - __main__ - INFO - Epoch 7 Training finished. Avg Loss: 2.5503, Time: 2898.16s
+2025-06-05 16:31:32,517 - __main__ - INFO - Validation Avg Loss: 2.6021, Perplexity: 13.4918
+2025-06-05 16:31:42,995 - __main__ - INFO - Validation BLEU-4: 0.1028
+2025-06-05 16:31:43,704 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1028.pth
+2025-06-05 16:31:43,705 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 16:31:43,969 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 16:31:44,301 - __main__ - INFO - Python garbage collector run.
+2025-06-05 16:33:19,208 - __main__ - INFO - Epoch [8/9], Step [100/3125], Loss: 2.5949, Perplexity: 13.3954
+2025-06-05 16:34:52,087 - __main__ - INFO - Epoch [8/9], Step [200/3125], Loss: 2.3867, Perplexity: 10.8772
+2025-06-05 16:36:24,295 - __main__ - INFO - Epoch [8/9], Step [300/3125], Loss: 2.4636, Perplexity: 11.7471
+2025-06-05 16:37:56,189 - __main__ - INFO - Epoch [8/9], Step [400/3125], Loss: 2.5906, Perplexity: 13.3381
+2025-06-05 16:39:29,764 - __main__ - INFO - Epoch [8/9], Step [500/3125], Loss: 2.5785, Perplexity: 13.1772
+2025-06-05 16:41:02,645 - __main__ - INFO - Epoch [8/9], Step [600/3125], Loss: 2.5902, Perplexity: 13.3330
+2025-06-05 16:42:35,744 - __main__ - INFO - Epoch [8/9], Step [700/3125], Loss: 2.6504, Perplexity: 14.1599
+2025-06-05 16:44:08,175 - __main__ - INFO - Epoch [8/9], Step [800/3125], Loss: 2.5812, Perplexity: 13.2125
+2025-06-05 16:45:41,068 - __main__ - INFO - Epoch [8/9], Step [900/3125], Loss: 2.3477, Perplexity: 10.4617
+2025-06-05 16:47:13,040 - __main__ - INFO - Epoch [8/9], Step [1000/3125], Loss: 2.5416, Perplexity: 12.7004
+2025-06-05 16:48:45,442 - __main__ - INFO - Epoch [8/9], Step [1100/3125], Loss: 2.5581, Perplexity: 12.9117
+2025-06-05 16:50:18,029 - __main__ - INFO - Epoch [8/9], Step [1200/3125], Loss: 2.6781, Perplexity: 14.5574
+2025-06-05 16:51:51,068 - __main__ - INFO - Epoch [8/9], Step [1300/3125], Loss: 2.4678, Perplexity: 11.7967
+2025-06-05 16:53:23,767 - __main__ - INFO - Epoch [8/9], Step [1400/3125], Loss: 2.7964, Perplexity: 16.3863
+2025-06-05 16:54:56,092 - __main__ - INFO - Epoch [8/9], Step [1500/3125], Loss: 2.5617, Perplexity: 12.9583
+2025-06-05 16:56:28,564 - __main__ - INFO - Epoch [8/9], Step [1600/3125], Loss: 2.4647, Perplexity: 11.7597
+2025-06-05 16:58:01,422 - __main__ - INFO - Epoch [8/9], Step [1700/3125], Loss: 2.5186, Perplexity: 12.4108
+2025-06-05 16:59:34,614 - __main__ - INFO - Epoch [8/9], Step [1800/3125], Loss: 2.7458, Perplexity: 15.5764
+2025-06-05 17:01:06,892 - __main__ - INFO - Epoch [8/9], Step [1900/3125], Loss: 2.4218, Perplexity: 11.2661
+2025-06-05 17:02:39,718 - __main__ - INFO - Epoch [8/9], Step [2000/3125], Loss: 2.4996, Perplexity: 12.1774
+2025-06-05 17:04:12,656 - __main__ - INFO - Epoch [8/9], Step [2100/3125], Loss: 2.4715, Perplexity: 11.8400
+2025-06-05 17:05:45,350 - __main__ - INFO - Epoch [8/9], Step [2200/3125], Loss: 2.7304, Perplexity: 15.3395
+2025-06-05 17:07:18,313 - __main__ - INFO - Epoch [8/9], Step [2300/3125], Loss: 2.6088, Perplexity: 13.5826
+2025-06-05 17:08:50,891 - __main__ - INFO - Epoch [8/9], Step [2400/3125], Loss: 2.2952, Perplexity: 9.9266
+2025-06-05 17:10:23,321 - __main__ - INFO - Epoch [8/9], Step [2500/3125], Loss: 2.6801, Perplexity: 14.5865
+2025-06-05 17:11:56,233 - __main__ - INFO - Epoch [8/9], Step [2600/3125], Loss: 2.6453, Perplexity: 14.0877
+2025-06-05 17:13:28,949 - __main__ - INFO - Epoch [8/9], Step [2700/3125], Loss: 2.4980, Perplexity: 12.1576
+2025-06-05 17:15:01,500 - __main__ - INFO - Epoch [8/9], Step [2800/3125], Loss: 2.4855, Perplexity: 12.0065
+2025-06-05 17:16:33,983 - __main__ - INFO - Epoch [8/9], Step [2900/3125], Loss: 2.7647, Perplexity: 15.8748
+2025-06-05 17:18:06,726 - __main__ - INFO - Epoch [8/9], Step [3000/3125], Loss: 2.3954, Perplexity: 10.9722
+2025-06-05 17:19:40,090 - __main__ - INFO - Epoch [8/9], Step [3100/3125], Loss: 2.5672, Perplexity: 13.0291
+2025-06-05 17:20:03,575 - __main__ - INFO - Epoch 8 Training finished. Avg Loss: 2.4984, Time: 2899.27s
+2025-06-05 17:32:41,641 - __main__ - INFO - Validation Avg Loss: 2.5887, Perplexity: 13.3131
+2025-06-05 17:32:52,092 - __main__ - INFO - Validation BLEU-4: 0.1029
+2025-06-05 17:32:52,835 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1029.pth
+2025-06-05 17:32:52,836 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-05 17:32:53,135 - __main__ - INFO - CUDA cache emptied.
+2025-06-05 17:32:53,477 - __main__ - INFO - Python garbage collector run.
+2025-06-05 17:34:28,617 - __main__ - INFO - Epoch [9/9], Step [100/3125], Loss: 2.4885, Perplexity: 12.0428
+2025-06-05 17:36:01,256 - __main__ - INFO - Epoch [9/9], Step [200/3125], Loss: 2.4464, Perplexity: 11.5461
+2025-06-05 17:37:33,875 - __main__ - INFO - Epoch [9/9], Step [300/3125], Loss: 2.5567, Perplexity: 12.8926
+2025-06-05 17:39:06,254 - __main__ - INFO - Epoch [9/9], Step [400/3125], Loss: 2.5434, Perplexity: 12.7234
+2025-06-05 17:40:38,946 - __main__ - INFO - Epoch [9/9], Step [500/3125], Loss: 2.4282, Perplexity: 11.3380
+2025-06-05 17:42:11,097 - __main__ - INFO - Epoch [9/9], Step [600/3125], Loss: 2.4907, Perplexity: 12.0691
+2025-06-05 17:43:43,481 - __main__ - INFO - Epoch [9/9], Step [700/3125], Loss: 2.5079, Perplexity: 12.2789
+2025-06-05 17:45:15,684 - __main__ - INFO - Epoch [9/9], Step [800/3125], Loss: 2.4735, Perplexity: 11.8636
+2025-06-05 17:46:48,773 - __main__ - INFO - Epoch [9/9], Step [900/3125], Loss: 2.5014, Perplexity: 12.2001
+2025-06-05 17:48:21,463 - __main__ - INFO - Epoch [9/9], Step [1000/3125], Loss: 2.7705, Perplexity: 15.9662
+2025-06-05 17:49:53,960 - __main__ - INFO - Epoch [9/9], Step [1100/3125], Loss: 2.6952, Perplexity: 14.8082

text_files/training_log_17_27.txt ADDED Viewed

	@@ -0,0 +1,806 @@

+2025-06-10 07:24:28,182 - __main__ - INFO - Previous notebook output found at: /kaggle/input/part-2-10-19/output. Copying to ./output...
+2025-06-10 07:25:23,506 - __main__ - INFO - Previous output copied successfully to current working directory for resumption.
+2025-06-10 07:25:23,507 - __main__ - INFO -
+--- Starting Model Training ---
+2025-06-10 07:25:23,508 - __main__ - INFO - Starting training process...
+2025-06-10 07:25:23,508 - __main__ - INFO - Using device: cuda
+2025-06-10 07:25:23,511 - __main__ - WARNING - Vocabulary source not found at /kaggle/input/vocabulary_s/pytorch/default/1/vocabulary.pkl. Will build new vocabulary.
+2025-06-10 07:25:23,512 - __main__ - INFO - Building new vocabulary from training dataset...
+2025-06-10 07:25:24,512 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+Processing annotations: 100%|██████████| 591753/591753 [04:09<00:00, 2370.69it/s]
+2025-06-10 07:29:34,159 - __main__ - INFO - Dataset size after filtering: 591753 samples.
+2025-06-10 07:29:34,204 - __main__ - INFO - Building vocabulary...
+Counting word frequencies: 100%|██████████| 591753/591753 [00:01<00:00, 340892.04it/s]
+2025-06-10 07:29:35,947 - __main__ - INFO - Vocabulary size: 14030
+2025-06-10 07:29:36,090 - __main__ - INFO - New vocabulary built.
+2025-06-10 07:29:36,102 - __main__ - INFO - Saved newly built vocabulary to ./output/vocabulary.pkl
+2025-06-10 07:29:37,007 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+Processing annotations: 100%|██████████| 591753/591753 [00:58<00:00, 10197.00it/s]
+2025-06-10 07:30:35,215 - __main__ - INFO - Using subset of 200000 samples for the dataset.
+2025-06-10 07:30:35,216 - __main__ - INFO - Dataset size after filtering: 200000 samples.
+2025-06-10 07:30:35,288 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_val2017.json
+Processing annotations: 100%|██████████| 25014/25014 [00:12<00:00, 2021.11it/s]
+2025-06-10 07:30:47,667 - __main__ - INFO - Dataset size after filtering: 25014 samples.
+2025-06-10 07:30:47,668 - __main__ - INFO - Training dataset size: 200000
+2025-06-10 07:30:47,669 - __main__ - INFO - Validation dataset size: 25014
+Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
+100%|██████████| 97.8M/97.8M [00:00<00:00, 212MB/s]
+2025-06-10 07:30:48,719 - __main__ - INFO - ResNet encoder base layers are fine-tuning enabled.
+2025-06-10 07:30:49,052 - __main__ - INFO - Attempting to resume training from: ./output/best_model_bleu0.1047.pth
+2025-06-10 07:30:49,453 - __main__ - INFO - Resumed training from epoch 16. Best validation score so far: 0.1047
+Epoch 17 Training:   3%|▎         | 99/3125 [01:32<48:35,  1.04it/s]
+2025-06-10 07:32:22,936 - __main__ - INFO - Epoch [17/27], Step [100/3125], Loss: 2.3526, Perplexity: 10.5128
+Epoch 17 Training:   6%|▋         | 199/3125 [03:11<48:08,  1.01it/s]
+2025-06-10 07:34:01,500 - __main__ - INFO - Epoch [17/27], Step [200/3125], Loss: 2.4604, Perplexity: 11.7090
+Epoch 17 Training:  10%|▉         | 299/3125 [04:49<46:11,  1.02it/s]
+2025-06-10 07:35:40,055 - __main__ - INFO - Epoch [17/27], Step [300/3125], Loss: 2.5760, Perplexity: 13.1449
+Epoch 17 Training:  13%|█▎        | 399/3125 [06:27<44:41,  1.02it/s]
+2025-06-10 07:37:18,437 - __main__ - INFO - Epoch [17/27], Step [400/3125], Loss: 2.5386, Perplexity: 12.6615
+Epoch 17 Training:  16%|█▌        | 499/3125 [08:06<43:17,  1.01it/s]
+2025-06-10 07:38:57,172 - __main__ - INFO - Epoch [17/27], Step [500/3125], Loss: 2.4724, Perplexity: 11.8508
+Epoch 17 Training:  19%|█▉        | 599/3125 [09:45<41:23,  1.02it/s]
+2025-06-10 07:40:35,800 - __main__ - INFO - Epoch [17/27], Step [600/3125], Loss: 3.0153, Perplexity: 20.3955
+Epoch 17 Training:  22%|██▏       | 699/3125 [11:24<39:21,  1.03it/s]
+2025-06-10 07:42:14,841 - __main__ - INFO - Epoch [17/27], Step [700/3125], Loss: 2.6407, Perplexity: 14.0232
+Epoch 17 Training:  26%|██▌       | 799/3125 [13:03<39:44,  1.03s/it]
+2025-06-10 07:43:53,867 - __main__ - INFO - Epoch [17/27], Step [800/3125], Loss: 2.5756, Perplexity: 13.1391
+Epoch 17 Training:  29%|██▉       | 899/3125 [14:42<36:54,  1.01it/s]
+2025-06-10 07:45:32,847 - __main__ - INFO - Epoch [17/27], Step [900/3125], Loss: 2.2050, Perplexity: 9.0706
+Epoch 17 Training:  32%|███▏      | 999/3125 [16:21<35:00,  1.01it/s]
+2025-06-10 07:47:11,912 - __main__ - INFO - Epoch [17/27], Step [1000/3125], Loss: 2.3957, Perplexity: 10.9755
+Epoch 17 Training:  35%|███▌      | 1099/3125 [18:00<33:09,  1.02it/s]
+2025-06-10 07:48:50,896 - __main__ - INFO - Epoch [17/27], Step [1100/3125], Loss: 2.2961, Perplexity: 9.9352
+Epoch 17 Training:  38%|███▊      | 1199/3125 [19:39<31:44,  1.01it/s]
+2025-06-10 07:50:30,069 - __main__ - INFO - Epoch [17/27], Step [1200/3125], Loss: 2.2552, Perplexity: 9.5371
+Epoch 17 Training:  42%|████▏     | 1299/3125 [21:18<29:46,  1.02it/s]
+2025-06-10 07:52:09,332 - __main__ - INFO - Epoch [17/27], Step [1300/3125], Loss: 2.5000, Perplexity: 12.1825
+Epoch 17 Training:  45%|████▍     | 1399/3125 [22:57<28:03,  1.02it/s]
+2025-06-10 07:53:47,838 - __main__ - INFO - Epoch [17/27], Step [1400/3125], Loss: 2.7185, Perplexity: 15.1579
+Epoch 17 Training:  48%|████▊     | 1499/3125 [24:36<26:48,  1.01it/s]
+2025-06-10 07:55:26,722 - __main__ - INFO - Epoch [17/27], Step [1500/3125], Loss: 2.5459, Perplexity: 12.7545
+Epoch 17 Training:  51%|█████     | 1599/3125 [26:15<24:49,  1.02it/s]
+2025-06-10 07:57:05,699 - __main__ - INFO - Epoch [17/27], Step [1600/3125], Loss: 2.3916, Perplexity: 10.9307
+Epoch 17 Training:  54%|█████▍    | 1699/3125 [27:54<23:47,  1.00s/it]
+2025-06-10 07:58:44,675 - __main__ - INFO - Epoch [17/27], Step [1700/3125], Loss: 2.5494, Perplexity: 12.7994
+Epoch 17 Training:  58%|█████▊    | 1799/3125 [29:33<22:05,  1.00it/s]
+2025-06-10 08:00:23,441 - __main__ - INFO - Epoch [17/27], Step [1800/3125], Loss: 2.4654, Perplexity: 11.7682
+Epoch 17 Training:  61%|██████    | 1899/3125 [31:11<20:10,  1.01it/s]
+2025-06-10 08:02:02,062 - __main__ - INFO - Epoch [17/27], Step [1900/3125], Loss: 2.6649, Perplexity: 14.3659
+Epoch 17 Training:  64%|██████▍   | 1999/3125 [32:50<18:28,  1.02it/s]
+2025-06-10 08:03:40,967 - __main__ - INFO - Epoch [17/27], Step [2000/3125], Loss: 2.2949, Perplexity: 9.9233
+Epoch 17 Training:  67%|██████▋   | 2099/3125 [34:29<16:53,  1.01it/s]
+2025-06-10 08:05:20,196 - __main__ - INFO - Epoch [17/27], Step [2100/3125], Loss: 2.6354, Perplexity: 13.9489
+Epoch 17 Training:  70%|███████   | 2199/3125 [36:09<15:11,  1.02it/s]
+2025-06-10 08:06:59,515 - __main__ - INFO - Epoch [17/27], Step [2200/3125], Loss: 2.5667, Perplexity: 13.0228
+Epoch 17 Training:  74%|███████▎  | 2299/3125 [37:47<13:35,  1.01it/s]
+2025-06-10 08:08:38,388 - __main__ - INFO - Epoch [17/27], Step [2300/3125], Loss: 2.4153, Perplexity: 11.1936
+Epoch 17 Training:  77%|███████▋  | 2399/3125 [39:27<12:04,  1.00it/s]
+2025-06-10 08:10:17,655 - __main__ - INFO - Epoch [17/27], Step [2400/3125], Loss: 2.5093, Perplexity: 12.2963
+Epoch 17 Training:  80%|███████▉  | 2499/3125 [41:06<10:11,  1.02it/s]
+2025-06-10 08:11:57,014 - __main__ - INFO - Epoch [17/27], Step [2500/3125], Loss: 2.3697, Perplexity: 10.6938
+Epoch 17 Training:  83%|████████▎ | 2599/3125 [42:45<08:38,  1.01it/s]
+2025-06-10 08:13:36,192 - __main__ - INFO - Epoch [17/27], Step [2600/3125], Loss: 2.5674, Perplexity: 13.0320
+Epoch 17 Training:  86%|████████▋ | 2699/3125 [44:24<07:02,  1.01it/s]
+2025-06-10 08:15:14,991 - __main__ - INFO - Epoch [17/27], Step [2700/3125], Loss: 2.4103, Perplexity: 11.1374
+Epoch 17 Training:  90%|████████▉ | 2799/3125 [46:03<05:24,  1.00it/s]
+2025-06-10 08:16:54,025 - __main__ - INFO - Epoch [17/27], Step [2800/3125], Loss: 2.4239, Perplexity: 11.2903
+Epoch 17 Training:  93%|█████████▎| 2899/3125 [47:43<03:42,  1.02it/s]
+2025-06-10 08:18:33,461 - __main__ - INFO - Epoch [17/27], Step [2900/3125], Loss: 2.6061, Perplexity: 13.5467
+Epoch 17 Training:  96%|█████████▌| 2999/3125 [49:21<02:05,  1.00it/s]
+2025-06-10 08:20:11,942 - __main__ - INFO - Epoch [17/27], Step [3000/3125], Loss: 2.3188, Perplexity: 10.1634
+Epoch 17 Training:  99%|█████████▉| 3099/3125 [51:00<00:25,  1.01it/s]
+2025-06-10 08:21:51,151 - __main__ - INFO - Epoch [17/27], Step [3100/3125], Loss: 2.3833, Perplexity: 10.8402
+Epoch 17 Training: 100%|██████████| 3125/3125 [51:26<00:00,  1.01it/s]
+2025-06-10 08:22:15,805 - __main__ - INFO - Epoch 17 Training finished. Avg Loss: 2.5127, Time: 3086.35s
+Validation: 100%|██████████| 391/391 [11:34<00:00,  1.78s/it]
+2025-06-10 08:33:50,046 - __main__ - INFO - Validation Avg Loss: 2.4818, Perplexity: 11.9623
+2025-06-10 08:34:01,432 - __main__ - INFO - Validation BLEU-4: 0.1035
+2025-06-10 08:34:01,433 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 08:34:01,713 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 08:34:01,945 - __main__ - INFO - Python garbage collector run.
+Epoch 18 Training:   3%|▎         | 99/3125 [01:40<48:46,  1.03it/s]
+2025-06-10 08:35:43,887 - __main__ - INFO - Epoch [18/27], Step [100/3125], Loss: 2.5242, Perplexity: 12.4803
+Epoch 18 Training:   6%|▋         | 199/3125 [03:20<47:57,  1.02it/s]
+2025-06-10 08:37:22,956 - __main__ - INFO - Epoch [18/27], Step [200/3125], Loss: 2.5674, Perplexity: 13.0321
+Epoch 18 Training:  10%|▉         | 299/3125 [04:59<46:50,  1.01it/s]
+2025-06-10 08:39:02,407 - __main__ - INFO - Epoch [18/27], Step [300/3125], Loss: 2.4194, Perplexity: 11.2394
+Epoch 18 Training:  13%|█▎        | 399/3125 [06:38<44:52,  1.01it/s]
+2025-06-10 08:40:41,610 - __main__ - INFO - Epoch [18/27], Step [400/3125], Loss: 2.8139, Perplexity: 16.6747
+Epoch 18 Training:  16%|█▌        | 499/3125 [08:17<42:37,  1.03it/s]
+2025-06-10 08:42:20,012 - __main__ - INFO - Epoch [18/27], Step [500/3125], Loss: 2.3331, Perplexity: 10.3095
+Epoch 18 Training:  19%|█▉        | 599/3125 [09:55<41:13,  1.02it/s]
+2025-06-10 08:43:58,740 - __main__ - INFO - Epoch [18/27], Step [600/3125], Loss: 2.3093, Perplexity: 10.0678
+Epoch 18 Training:  22%|██▏       | 699/3125 [11:35<39:36,  1.02it/s]
+2025-06-10 08:45:38,180 - __main__ - INFO - Epoch [18/27], Step [700/3125], Loss: 2.3935, Perplexity: 10.9519
+Epoch 18 Training:  26%|██▌       | 799/3125 [13:13<37:36,  1.03it/s]
+2025-06-10 08:47:16,432 - __main__ - INFO - Epoch [18/27], Step [800/3125], Loss: 2.4676, Perplexity: 11.7936
+Epoch 18 Training:  29%|██▉       | 899/3125 [14:52<36:09,  1.03it/s]
+2025-06-10 08:48:55,636 - __main__ - INFO - Epoch [18/27], Step [900/3125], Loss: 2.4923, Perplexity: 12.0890
+Epoch 18 Training:  32%|███▏      | 999/3125 [16:31<34:34,  1.02it/s]
+2025-06-10 08:50:34,668 - __main__ - INFO - Epoch [18/27], Step [1000/3125], Loss: 2.3460, Perplexity: 10.4438
+Epoch 18 Training:  35%|███▌      | 1099/3125 [18:10<33:19,  1.01it/s]
+2025-06-10 08:52:13,854 - __main__ - INFO - Epoch [18/27], Step [1100/3125], Loss: 2.3353, Perplexity: 10.3324
+Epoch 18 Training:  38%|███▊      | 1199/3125 [19:50<31:49,  1.01it/s]
+2025-06-10 08:53:53,254 - __main__ - INFO - Epoch [18/27], Step [1200/3125], Loss: 2.6501, Perplexity: 14.1561
+Epoch 18 Training:  42%|████▏     | 1299/3125 [21:30<30:17,  1.00it/s]
+2025-06-10 08:55:33,187 - __main__ - INFO - Epoch [18/27], Step [1300/3125], Loss: 2.5033, Perplexity: 12.2222
+Epoch 18 Training:  45%|████▍     | 1399/3125 [23:09<28:19,  1.02it/s]
+2025-06-10 08:57:12,293 - __main__ - INFO - Epoch [18/27], Step [1400/3125], Loss: 2.5552, Perplexity: 12.8738
+Epoch 18 Training:  48%|████▊     | 1499/3125 [24:48<26:55,  1.01it/s]
+2025-06-10 08:58:51,613 - __main__ - INFO - Epoch [18/27], Step [1500/3125], Loss: 2.1927, Perplexity: 8.9596
+Epoch 18 Training:  51%|█████     | 1599/3125 [26:27<25:18,  1.00it/s]
+2025-06-10 09:00:30,661 - __main__ - INFO - Epoch [18/27], Step [1600/3125], Loss: 2.4970, Perplexity: 12.1465
+Epoch 18 Training:  54%|█████▍    | 1699/3125 [28:06<23:17,  1.02it/s]
+2025-06-10 09:02:09,514 - __main__ - INFO - Epoch [18/27], Step [1700/3125], Loss: 2.4935, Perplexity: 12.1037
+Epoch 18 Training:  58%|█████▊    | 1799/3125 [29:45<21:57,  1.01it/s]
+2025-06-10 09:03:48,633 - __main__ - INFO - Epoch [18/27], Step [1800/3125], Loss: 2.4783, Perplexity: 11.9213
+Epoch 18 Training:  61%|██████    | 1899/3125 [31:24<20:28,  1.00s/it]
+2025-06-10 09:05:27,649 - __main__ - INFO - Epoch [18/27], Step [1900/3125], Loss: 2.3436, Perplexity: 10.4189
+Epoch 18 Training:  64%|██████▍   | 1999/3125 [33:03<18:17,  1.03it/s]
+2025-06-10 09:07:06,719 - __main__ - INFO - Epoch [18/27], Step [2000/3125], Loss: 2.5853, Perplexity: 13.2677
+Epoch 18 Training:  67%|██████▋   | 2099/3125 [34:42<16:47,  1.02it/s]
+2025-06-10 09:08:45,283 - __main__ - INFO - Epoch [18/27], Step [2100/3125], Loss: 2.5161, Perplexity: 12.3804
+Epoch 18 Training:  70%|███████   | 2199/3125 [36:21<15:34,  1.01s/it]
+2025-06-10 09:10:24,189 - __main__ - INFO - Epoch [18/27], Step [2200/3125], Loss: 2.6254, Perplexity: 13.8102
+Epoch 18 Training:  74%|███████▎  | 2299/3125 [38:00<13:23,  1.03it/s]
+2025-06-10 09:12:03,789 - __main__ - INFO - Epoch [18/27], Step [2300/3125], Loss: 2.4902, Perplexity: 12.0631
+Epoch 18 Training:  77%|███████▋  | 2399/3125 [39:39<11:57,  1.01it/s]
+2025-06-10 09:13:42,698 - __main__ - INFO - Epoch [18/27], Step [2400/3125], Loss: 2.4329, Perplexity: 11.3913
+Epoch 18 Training:  80%|███████▉  | 2499/3125 [41:18<10:27,  1.00s/it]
+2025-06-10 09:15:21,802 - __main__ - INFO - Epoch [18/27], Step [2500/3125], Loss: 2.4294, Perplexity: 11.3520
+Epoch 18 Training:  83%|████████▎ | 2599/3125 [42:58<08:45,  1.00it/s]
+2025-06-10 09:17:01,212 - __main__ - INFO - Epoch [18/27], Step [2600/3125], Loss: 2.5354, Perplexity: 12.6210
+Epoch 18 Training:  86%|████████▋ | 2699/3125 [44:37<06:51,  1.04it/s]
+2025-06-10 09:18:40,318 - __main__ - INFO - Epoch [18/27], Step [2700/3125], Loss: 2.4877, Perplexity: 12.0333
+Epoch 18 Training:  90%|████████▉ | 2799/3125 [46:16<05:21,  1.01it/s]
+2025-06-10 09:20:19,433 - __main__ - INFO - Epoch [18/27], Step [2800/3125], Loss: 2.4673, Perplexity: 11.7911
+Epoch 18 Training:  93%|█████████▎| 2899/3125 [47:56<03:50,  1.02s/it]
+2025-06-10 09:21:59,119 - __main__ - INFO - Epoch [18/27], Step [2900/3125], Loss: 2.3145, Perplexity: 10.1199
+Epoch 18 Training:  96%|█████████▌| 2999/3125 [49:35<02:05,  1.01it/s]
+2025-06-10 09:23:38,775 - __main__ - INFO - Epoch [18/27], Step [3000/3125], Loss: 2.5194, Perplexity: 12.4214
+Epoch 18 Training:  99%|█████████▉| 3099/3125 [51:14<00:25,  1.02it/s]
+2025-06-10 09:25:17,977 - __main__ - INFO - Epoch [18/27], Step [3100/3125], Loss: 2.6512, Perplexity: 14.1717
+Epoch 18 Training: 100%|██████████| 3125/3125 [51:40<00:00,  1.01it/s]
+2025-06-10 09:25:42,823 - __main__ - INFO - Epoch 18 Training finished. Avg Loss: 2.4430, Time: 3100.88s
+Validation: 100%|██████████| 391/391 [11:45<00:00,  1.80s/it]
+2025-06-10 09:37:27,944 - __main__ - INFO - Validation Avg Loss: 2.4807, Perplexity: 11.9500
+2025-06-10 09:37:37,250 - __main__ - INFO - Validation BLEU-4: 0.1054
+2025-06-10 09:37:37,896 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1054.pth
+2025-06-10 09:37:37,897 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 09:37:38,181 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 09:37:38,517 - __main__ - INFO - Python garbage collector run.
+Epoch 19 Training:   3%|▎         | 99/3125 [01:40<49:53,  1.01it/s]
+2025-06-10 09:39:20,412 - __main__ - INFO - Epoch [19/27], Step [100/3125], Loss: 2.3789, Perplexity: 10.7934
+Epoch 19 Training:   6%|▋         | 199/3125 [03:20<47:55,  1.02it/s]
+2025-06-10 09:40:59,969 - __main__ - INFO - Epoch [19/27], Step [200/3125], Loss: 2.4119, Perplexity: 11.1547
+Epoch 19 Training:  10%|▉         | 299/3125 [04:59<46:40,  1.01it/s]
+2025-06-10 09:42:39,188 - __main__ - INFO - Epoch [19/27], Step [300/3125], Loss: 2.4709, Perplexity: 11.8328
+Epoch 19 Training:  13%|█▎        | 399/3125 [06:38<45:26,  1.00s/it]
+2025-06-10 09:44:18,027 - __main__ - INFO - Epoch [19/27], Step [400/3125], Loss: 2.3014, Perplexity: 9.9881
+Epoch 19 Training:  16%|█▌        | 499/3125 [08:17<43:21,  1.01it/s]
+2025-06-10 09:45:56,904 - __main__ - INFO - Epoch [19/27], Step [500/3125], Loss: 2.5112, Perplexity: 12.3191
+Epoch 19 Training:  19%|█▉        | 599/3125 [09:56<42:18,  1.00s/it]
+2025-06-10 09:47:35,893 - __main__ - INFO - Epoch [19/27], Step [600/3125], Loss: 2.4034, Perplexity: 11.0610
+Epoch 19 Training:  22%|██▏       | 699/3125 [11:35<39:49,  1.02it/s]
+2025-06-10 09:49:15,424 - __main__ - INFO - Epoch [19/27], Step [700/3125], Loss: 2.4421, Perplexity: 11.4974
+Epoch 19 Training:  26%|██▌       | 799/3125 [13:15<37:59,  1.02it/s]
+2025-06-10 09:50:54,632 - __main__ - INFO - Epoch [19/27], Step [800/3125], Loss: 2.4358, Perplexity: 11.4253
+Epoch 19 Training:  29%|██▉       | 899/3125 [14:53<36:45,  1.01it/s]
+2025-06-10 09:52:33,475 - __main__ - INFO - Epoch [19/27], Step [900/3125], Loss: 2.4511, Perplexity: 11.6007
+Epoch 19 Training:  32%|███▏      | 999/3125 [16:33<35:05,  1.01it/s]
+2025-06-10 09:54:13,109 - __main__ - INFO - Epoch [19/27], Step [1000/3125], Loss: 2.3752, Perplexity: 10.7536
+Epoch 19 Training:  35%|███▌      | 1099/3125 [18:12<33:12,  1.02it/s]
+2025-06-10 09:55:52,508 - __main__ - INFO - Epoch [19/27], Step [1100/3125], Loss: 2.2818, Perplexity: 9.7943
+Epoch 19 Training:  38%|███▊      | 1199/3125 [19:52<32:15,  1.01s/it]
+2025-06-10 09:57:31,557 - __main__ - INFO - Epoch [19/27], Step [1200/3125], Loss: 2.5260, Perplexity: 12.5039
+Epoch 19 Training:  42%|████▏     | 1299/3125 [21:31<29:53,  1.02it/s]
+2025-06-10 09:59:10,940 - __main__ - INFO - Epoch [19/27], Step [1300/3125], Loss: 2.2350, Perplexity: 9.3461
+Epoch 19 Training:  45%|████▍     | 1399/3125 [23:10<28:52,  1.00s/it]
+2025-06-10 10:00:49,938 - __main__ - INFO - Epoch [19/27], Step [1400/3125], Loss: 2.5668, Perplexity: 13.0242
+Epoch 19 Training:  48%|████▊     | 1499/3125 [24:49<27:07,  1.00s/it]
+2025-06-10 10:02:29,195 - __main__ - INFO - Epoch [19/27], Step [1500/3125], Loss: 2.4380, Perplexity: 11.4499
+Epoch 19 Training:  51%|█████     | 1599/3125 [26:28<24:53,  1.02it/s]
+2025-06-10 10:04:08,447 - __main__ - INFO - Epoch [19/27], Step [1600/3125], Loss: 2.1895, Perplexity: 8.9306
+Epoch 19 Training:  54%|█████▍    | 1699/3125 [28:08<23:49,  1.00s/it]
+2025-06-10 10:05:47,701 - __main__ - INFO - Epoch [19/27], Step [1700/3125], Loss: 2.3286, Perplexity: 10.2640
+Epoch 19 Training:  58%|█████▊    | 1799/3125 [29:47<21:43,  1.02it/s]
+2025-06-10 10:07:26,770 - __main__ - INFO - Epoch [19/27], Step [1800/3125], Loss: 2.3863, Perplexity: 10.8737
+Epoch 19 Training:  61%|██████    | 1899/3125 [31:26<20:06,  1.02it/s]
+2025-06-10 10:09:06,544 - __main__ - INFO - Epoch [19/27], Step [1900/3125], Loss: 2.4867, Perplexity: 12.0217
+Epoch 19 Training:  64%|██████▍   | 1999/3125 [33:06<18:29,  1.02it/s]
+2025-06-10 10:10:45,538 - __main__ - INFO - Epoch [19/27], Step [2000/3125], Loss: 2.3649, Perplexity: 10.6430
+Epoch 19 Training:  67%|██████▋   | 2099/3125 [34:44<16:54,  1.01it/s]
+2025-06-10 10:12:24,330 - __main__ - INFO - Epoch [19/27], Step [2100/3125], Loss: 2.3546, Perplexity: 10.5342
+Epoch 19 Training:  70%|███████   | 2199/3125 [36:24<15:24,  1.00it/s]
+2025-06-10 10:14:03,624 - __main__ - INFO - Epoch [19/27], Step [2200/3125], Loss: 2.3941, Perplexity: 10.9579
+Epoch 19 Training:  74%|███████▎  | 2299/3125 [38:02<13:28,  1.02it/s]
+2025-06-10 10:15:42,466 - __main__ - INFO - Epoch [19/27], Step [2300/3125], Loss: 2.1938, Perplexity: 8.9695
+Epoch 19 Training:  77%|███████▋  | 2399/3125 [39:41<12:09,  1.00s/it]
+2025-06-10 10:17:21,479 - __main__ - INFO - Epoch [19/27], Step [2400/3125], Loss: 2.4041, Perplexity: 11.0679
+Epoch 19 Training:  80%|███████▉  | 2499/3125 [41:20<10:23,  1.00it/s]
+2025-06-10 10:19:00,375 - __main__ - INFO - Epoch [19/27], Step [2500/3125], Loss: 2.5131, Perplexity: 12.3433
+Epoch 19 Training:  83%|████████▎ | 2599/3125 [43:00<08:52,  1.01s/it]
+2025-06-10 10:20:39,698 - __main__ - INFO - Epoch [19/27], Step [2600/3125], Loss: 2.2205, Perplexity: 9.2123
+Epoch 19 Training:  86%|████████▋ | 2699/3125 [44:39<07:02,  1.01it/s]
+2025-06-10 10:22:19,043 - __main__ - INFO - Epoch [19/27], Step [2700/3125], Loss: 2.2930, Perplexity: 9.9044
+Epoch 19 Training:  90%|████████▉ | 2799/3125 [46:18<05:21,  1.01it/s]
+2025-06-10 10:23:58,189 - __main__ - INFO - Epoch [19/27], Step [2800/3125], Loss: 2.5673, Perplexity: 13.0307
+Epoch 19 Training:  93%|█████████▎| 2899/3125 [47:58<03:44,  1.01it/s]
+2025-06-10 10:25:37,764 - __main__ - INFO - Epoch [19/27], Step [2900/3125], Loss: 2.4287, Perplexity: 11.3445
+Epoch 19 Training:  96%|█████████▌| 2999/3125 [49:37<02:05,  1.00it/s]
+2025-06-10 10:27:17,138 - __main__ - INFO - Epoch [19/27], Step [3000/3125], Loss: 2.3494, Perplexity: 10.4788
+Epoch 19 Training:  99%|█████████▉| 3099/3125 [51:17<00:25,  1.02it/s]
+2025-06-10 10:28:56,764 - __main__ - INFO - Epoch [19/27], Step [3100/3125], Loss: 2.6807, Perplexity: 14.5951
+Epoch 19 Training: 100%|██████████| 3125/3125 [51:43<00:00,  1.01it/s]
+2025-06-10 10:29:21,832 - __main__ - INFO - Epoch 19 Training finished. Avg Loss: 2.3931, Time: 3103.31s
+Validation: 100%|██████████| 391/391 [11:40<00:00,  1.79s/it]
+2025-06-10 10:41:02,308 - __main__ - INFO - Validation Avg Loss: 2.4842, Perplexity: 11.9914
+2025-06-10 10:41:11,596 - __main__ - INFO - Validation BLEU-4: 0.1053
+2025-06-10 10:41:11,597 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 10:41:11,861 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 10:41:12,194 - __main__ - INFO - Python garbage collector run.
+Epoch 20 Training:   3%|▎         | 99/3125 [01:41<49:26,  1.02it/s]
+2025-06-10 10:42:54,894 - __main__ - INFO - Epoch [20/27], Step [100/3125], Loss: 2.2876, Perplexity: 9.8510
+Epoch 20 Training:   6%|▋         | 199/3125 [03:21<48:40,  1.00it/s]
+2025-06-10 10:44:34,311 - __main__ - INFO - Epoch [20/27], Step [200/3125], Loss: 2.3219, Perplexity: 10.1953
+Epoch 20 Training:  10%|▉         | 299/3125 [05:00<45:40,  1.03it/s]
+2025-06-10 10:46:13,375 - __main__ - INFO - Epoch [20/27], Step [300/3125], Loss: 2.3180, Perplexity: 10.1558
+Epoch 20 Training:  13%|█▎        | 399/3125 [06:38<44:53,  1.01it/s]
+2025-06-10 10:47:52,154 - __main__ - INFO - Epoch [20/27], Step [400/3125], Loss: 2.2775, Perplexity: 9.7521
+Epoch 20 Training:  16%|█▌        | 499/3125 [08:18<43:42,  1.00it/s]
+2025-06-10 10:49:31,634 - __main__ - INFO - Epoch [20/27], Step [500/3125], Loss: 2.3627, Perplexity: 10.6199
+Epoch 20 Training:  19%|█▉        | 599/3125 [09:57<41:42,  1.01it/s]
+2025-06-10 10:51:10,386 - __main__ - INFO - Epoch [20/27], Step [600/3125], Loss: 2.5840, Perplexity: 13.2502
+Epoch 20 Training:  22%|██▏       | 699/3125 [11:36<39:30,  1.02it/s]
+2025-06-10 10:52:49,194 - __main__ - INFO - Epoch [20/27], Step [700/3125], Loss: 2.3991, Perplexity: 11.0130
+Epoch 20 Training:  26%|██▌       | 799/3125 [13:15<38:23,  1.01it/s]
+2025-06-10 10:54:28,248 - __main__ - INFO - Epoch [20/27], Step [800/3125], Loss: 2.3200, Perplexity: 10.1755
+Epoch 20 Training:  29%|██▉       | 899/3125 [14:54<36:31,  1.02it/s]
+2025-06-10 10:56:07,780 - __main__ - INFO - Epoch [20/27], Step [900/3125], Loss: 2.1218, Perplexity: 8.3458
+Epoch 20 Training:  32%|███▏      | 999/3125 [16:33<34:45,  1.02it/s]
+2025-06-10 10:57:46,489 - __main__ - INFO - Epoch [20/27], Step [1000/3125], Loss: 2.3538, Perplexity: 10.5252
+Epoch 20 Training:  35%|███▌      | 1099/3125 [18:12<33:27,  1.01it/s]
+2025-06-10 10:59:25,361 - __main__ - INFO - Epoch [20/27], Step [1100/3125], Loss: 2.2220, Perplexity: 9.2257
+Epoch 20 Training:  38%|███▊      | 1199/3125 [19:51<31:32,  1.02it/s]
+2025-06-10 11:01:04,348 - __main__ - INFO - Epoch [20/27], Step [1200/3125], Loss: 2.2933, Perplexity: 9.9077
+Epoch 20 Training:  42%|████▏     | 1299/3125 [21:30<29:52,  1.02it/s]
+2025-06-10 11:02:43,634 - __main__ - INFO - Epoch [20/27], Step [1300/3125], Loss: 2.4057, Perplexity: 11.0867
+Epoch 20 Training:  45%|████▍     | 1399/3125 [23:09<28:58,  1.01s/it]
+2025-06-10 11:04:22,597 - __main__ - INFO - Epoch [20/27], Step [1400/3125], Loss: 2.4458, Perplexity: 11.5401
+Epoch 20 Training:  48%|████▊     | 1499/3125 [24:48<26:41,  1.02it/s]
+2025-06-10 11:06:02,052 - __main__ - INFO - Epoch [20/27], Step [1500/3125], Loss: 2.2830, Perplexity: 9.8058
+Epoch 20 Training:  51%|█████     | 1599/3125 [26:28<25:03,  1.02it/s]
+2025-06-10 11:07:41,484 - __main__ - INFO - Epoch [20/27], Step [1600/3125], Loss: 2.3211, Perplexity: 10.1873
+Epoch 20 Training:  54%|█████▍    | 1699/3125 [28:07<23:31,  1.01it/s]
+2025-06-10 11:09:20,482 - __main__ - INFO - Epoch [20/27], Step [1700/3125], Loss: 2.4251, Perplexity: 11.3038
+Epoch 20 Training:  58%|█████▊    | 1799/3125 [29:46<22:06,  1.00s/it]
+2025-06-10 11:10:59,993 - __main__ - INFO - Epoch [20/27], Step [1800/3125], Loss: 2.3395, Perplexity: 10.3759
+Epoch 20 Training:  61%|██████    | 1899/3125 [31:25<20:16,  1.01it/s]
+2025-06-10 11:12:39,167 - __main__ - INFO - Epoch [20/27], Step [1900/3125], Loss: 2.5910, Perplexity: 13.3425
+Epoch 20 Training:  64%|██████▍   | 1999/3125 [33:05<18:59,  1.01s/it]
+2025-06-10 11:14:18,557 - __main__ - INFO - Epoch [20/27], Step [2000/3125], Loss: 2.2993, Perplexity: 9.9669
+Epoch 20 Training:  67%|██████▋   | 2099/3125 [34:44<17:14,  1.01s/it]
+2025-06-10 11:15:57,832 - __main__ - INFO - Epoch [20/27], Step [2100/3125], Loss: 2.1527, Perplexity: 8.6081
+Epoch 20 Training:  70%|███████   | 2199/3125 [36:24<15:03,  1.02it/s]
+2025-06-10 11:17:37,282 - __main__ - INFO - Epoch [20/27], Step [2200/3125], Loss: 2.3867, Perplexity: 10.8779
+Epoch 20 Training:  74%|███████▎  | 2299/3125 [38:03<13:54,  1.01s/it]
+2025-06-10 11:19:16,726 - __main__ - INFO - Epoch [20/27], Step [2300/3125], Loss: 2.3479, Perplexity: 10.4636
+Epoch 20 Training:  77%|███████▋  | 2399/3125 [39:42<11:56,  1.01it/s]
+2025-06-10 11:20:56,083 - __main__ - INFO - Epoch [20/27], Step [2400/3125], Loss: 2.2340, Perplexity: 9.3375
+Epoch 20 Training:  80%|███████▉  | 2499/3125 [41:22<10:21,  1.01it/s]
+2025-06-10 11:22:35,563 - __main__ - INFO - Epoch [20/27], Step [2500/3125], Loss: 2.3098, Perplexity: 10.0721
+Epoch 20 Training:  83%|████████▎ | 2599/3125 [43:01<08:31,  1.03it/s]
+2025-06-10 11:24:14,728 - __main__ - INFO - Epoch [20/27], Step [2600/3125], Loss: 2.3198, Perplexity: 10.1732
+Epoch 20 Training:  86%|████████▋ | 2699/3125 [44:41<07:05,  1.00it/s]
+2025-06-10 11:25:54,571 - __main__ - INFO - Epoch [20/27], Step [2700/3125], Loss: 2.3959, Perplexity: 10.9776
+Epoch 20 Training:  90%|████████▉ | 2799/3125 [46:19<05:23,  1.01it/s]
+2025-06-10 11:27:32,978 - __main__ - INFO - Epoch [20/27], Step [2800/3125], Loss: 2.7605, Perplexity: 15.8080
+Epoch 20 Training:  93%|█████████▎| 2899/3125 [47:58<03:45,  1.00it/s]
+2025-06-10 11:29:12,086 - __main__ - INFO - Epoch [20/27], Step [2900/3125], Loss: 2.4974, Perplexity: 12.1514
+Epoch 20 Training:  96%|█████████▌| 2999/3125 [49:37<02:03,  1.02it/s]
+2025-06-10 11:30:50,993 - __main__ - INFO - Epoch [20/27], Step [3000/3125], Loss: 2.4413, Perplexity: 11.4875
+Epoch 20 Training:  99%|█████████▉| 3099/3125 [51:17<00:26,  1.00s/it]
+2025-06-10 11:32:30,473 - __main__ - INFO - Epoch [20/27], Step [3100/3125], Loss: 2.3338, Perplexity: 10.3173
+Epoch 20 Training: 100%|██████████| 3125/3125 [51:43<00:00,  1.01it/s]
+2025-06-10 11:32:55,543 - __main__ - INFO - Epoch 20 Training finished. Avg Loss: 2.3498, Time: 3103.35s
+Validation: 100%|██████████| 391/391 [11:41<00:00,  1.79s/it]
+2025-06-10 11:44:37,028 - __main__ - INFO - Validation Avg Loss: 2.4874, Perplexity: 12.0298
+2025-06-10 11:44:46,221 - __main__ - INFO - Validation BLEU-4: 0.1058
+2025-06-10 11:44:46,914 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1058.pth
+2025-06-10 11:44:47,586 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_20.pth
+2025-06-10 11:44:47,587 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 11:44:47,853 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 11:44:48,186 - __main__ - INFO - Python garbage collector run.
+Epoch 21 Training:   3%|▎         | 99/3125 [01:40<48:40,  1.04it/s]
+2025-06-10 11:46:29,972 - __main__ - INFO - Epoch [21/27], Step [100/3125], Loss: 2.1393, Perplexity: 8.4933
+Epoch 21 Training:   6%|▋         | 199/3125 [03:20<47:37,  1.02it/s]
+2025-06-10 11:48:09,315 - __main__ - INFO - Epoch [21/27], Step [200/3125], Loss: 2.3351, Perplexity: 10.3309
+Epoch 21 Training:  10%|▉         | 299/3125 [04:59<47:30,  1.01s/it]
+2025-06-10 11:49:48,545 - __main__ - INFO - Epoch [21/27], Step [300/3125], Loss: 2.0899, Perplexity: 8.0842
+Epoch 21 Training:  13%|█▎        | 399/3125 [06:38<44:37,  1.02it/s]
+2025-06-10 11:51:27,798 - __main__ - INFO - Epoch [21/27], Step [400/3125], Loss: 2.1491, Perplexity: 8.5775
+Epoch 21 Training:  16%|█▌        | 499/3125 [08:17<43:33,  1.00it/s]
+2025-06-10 11:53:06,975 - __main__ - INFO - Epoch [21/27], Step [500/3125], Loss: 2.2872, Perplexity: 9.8477
+Epoch 21 Training:  19%|█▉        | 599/3125 [09:56<41:09,  1.02it/s]
+2025-06-10 11:54:45,864 - __main__ - INFO - Epoch [21/27], Step [600/3125], Loss: 2.4272, Perplexity: 11.3273
+Epoch 21 Training:  22%|██▏       | 699/3125 [11:35<39:33,  1.02it/s]
+2025-06-10 11:56:24,485 - __main__ - INFO - Epoch [21/27], Step [700/3125], Loss: 2.1416, Perplexity: 8.5130
+Epoch 21 Training:  26%|██▌       | 799/3125 [13:14<37:58,  1.02it/s]
+2025-06-10 11:58:03,794 - __main__ - INFO - Epoch [21/27], Step [800/3125], Loss: 2.3899, Perplexity: 10.9122
+Epoch 21 Training:  29%|██▉       | 899/3125 [14:54<36:35,  1.01it/s]
+2025-06-10 11:59:43,688 - __main__ - INFO - Epoch [21/27], Step [900/3125], Loss: 2.3071, Perplexity: 10.0450
+Epoch 21 Training:  32%|███▏      | 999/3125 [16:33<34:56,  1.01it/s]
+2025-06-10 12:01:22,531 - __main__ - INFO - Epoch [21/27], Step [1000/3125], Loss: 2.1995, Perplexity: 9.0203
+Epoch 21 Training:  35%|███▌      | 1099/3125 [18:12<33:03,  1.02it/s]
+2025-06-10 12:03:01,611 - __main__ - INFO - Epoch [21/27], Step [1100/3125], Loss: 2.3339, Perplexity: 10.3183
+Epoch 21 Training:  38%|███▊      | 1199/3125 [19:51<31:55,  1.01it/s]
+2025-06-10 12:04:40,369 - __main__ - INFO - Epoch [21/27], Step [1200/3125], Loss: 2.4029, Perplexity: 11.0547
+Epoch 21 Training:  42%|████▏     | 1299/3125 [21:30<29:42,  1.02it/s]
+2025-06-10 12:06:19,291 - __main__ - INFO - Epoch [21/27], Step [1300/3125], Loss: 2.2694, Perplexity: 9.6740
+Epoch 21 Training:  45%|████▍     | 1399/3125 [23:09<29:09,  1.01s/it]
+2025-06-10 12:07:58,672 - __main__ - INFO - Epoch [21/27], Step [1400/3125], Loss: 2.1527, Perplexity: 8.6082
+Epoch 21 Training:  48%|████▊     | 1499/3125 [24:48<27:00,  1.00it/s]
+2025-06-10 12:09:37,343 - __main__ - INFO - Epoch [21/27], Step [1500/3125], Loss: 2.2131, Perplexity: 9.1442
+Epoch 21 Training:  51%|█████     | 1599/3125 [26:26<25:42,  1.01s/it]
+2025-06-10 12:11:16,155 - __main__ - INFO - Epoch [21/27], Step [1600/3125], Loss: 2.1473, Perplexity: 8.5615
+Epoch 21 Training:  54%|█████▍    | 1699/3125 [28:05<23:07,  1.03it/s]
+2025-06-10 12:12:55,135 - __main__ - INFO - Epoch [21/27], Step [1700/3125], Loss: 2.2490, Perplexity: 9.4778
+Epoch 21 Training:  58%|█████▊    | 1799/3125 [29:45<21:42,  1.02it/s]
+2025-06-10 12:14:34,267 - __main__ - INFO - Epoch [21/27], Step [1800/3125], Loss: 2.3484, Perplexity: 10.4684
+Epoch 21 Training:  61%|██████    | 1899/3125 [31:24<20:08,  1.01it/s]
+2025-06-10 12:16:13,587 - __main__ - INFO - Epoch [21/27], Step [1900/3125], Loss: 2.3690, Perplexity: 10.6865
+Epoch 21 Training:  64%|██████▍   | 1999/3125 [33:03<18:07,  1.04it/s]
+2025-06-10 12:17:52,694 - __main__ - INFO - Epoch [21/27], Step [2000/3125], Loss: 2.1483, Perplexity: 8.5702
+Epoch 21 Training:  67%|██████▋   | 2099/3125 [34:42<17:02,  1.00it/s]
+2025-06-10 12:19:31,958 - __main__ - INFO - Epoch [21/27], Step [2100/3125], Loss: 2.4676, Perplexity: 11.7938
+Epoch 21 Training:  70%|███████   | 2199/3125 [36:21<15:05,  1.02it/s]
+2025-06-10 12:21:10,625 - __main__ - INFO - Epoch [21/27], Step [2200/3125], Loss: 2.5696, Perplexity: 13.0607
+Epoch 21 Training:  74%|███████▎  | 2299/3125 [38:00<13:55,  1.01s/it]
+2025-06-10 12:22:49,585 - __main__ - INFO - Epoch [21/27], Step [2300/3125], Loss: 2.3604, Perplexity: 10.5955
+Epoch 21 Training:  77%|███████▋  | 2399/3125 [39:39<11:56,  1.01it/s]
+2025-06-10 12:24:28,578 - __main__ - INFO - Epoch [21/27], Step [2400/3125], Loss: 2.4663, Perplexity: 11.7783
+Epoch 21 Training:  80%|███████▉  | 2499/3125 [41:18<10:26,  1.00s/it]
+2025-06-10 12:26:07,929 - __main__ - INFO - Epoch [21/27], Step [2500/3125], Loss: 2.4053, Perplexity: 11.0819
+Epoch 21 Training:  83%|████████▎ | 2599/3125 [42:57<08:41,  1.01it/s]
+2025-06-10 12:27:46,767 - __main__ - INFO - Epoch [21/27], Step [2600/3125], Loss: 2.3557, Perplexity: 10.5451
+Epoch 21 Training:  86%|████████▋ | 2699/3125 [44:36<06:58,  1.02it/s]
+2025-06-10 12:29:25,781 - __main__ - INFO - Epoch [21/27], Step [2700/3125], Loss: 2.2723, Perplexity: 9.7018
+Epoch 21 Training:  90%|████████▉ | 2799/3125 [46:15<05:16,  1.03it/s]
+2025-06-10 12:31:04,767 - __main__ - INFO - Epoch [21/27], Step [2800/3125], Loss: 2.2502, Perplexity: 9.4895
+Epoch 21 Training:  93%|█████████▎| 2899/3125 [47:54<03:45,  1.00it/s]
+2025-06-10 12:32:43,808 - __main__ - INFO - Epoch [21/27], Step [2900/3125], Loss: 2.4847, Perplexity: 11.9973
+Epoch 21 Training:  96%|█████████▌| 2999/3125 [49:33<02:02,  1.03it/s]
+2025-06-10 12:34:22,740 - __main__ - INFO - Epoch [21/27], Step [3000/3125], Loss: 2.2226, Perplexity: 9.2317
+Epoch 21 Training:  99%|█████████▉| 3099/3125 [51:13<00:25,  1.03it/s]
+2025-06-10 12:36:02,355 - __main__ - INFO - Epoch [21/27], Step [3100/3125], Loss: 2.1203, Perplexity: 8.3340
+Epoch 21 Training: 100%|██████████| 3125/3125 [51:39<00:00,  1.01it/s]
+2025-06-10 12:36:27,232 - __main__ - INFO - Epoch 21 Training finished. Avg Loss: 2.3104, Time: 3099.04s
+Validation: 100%|██████████| 391/391 [11:42<00:00,  1.80s/it]
+2025-06-10 12:48:09,648 - __main__ - INFO - Validation Avg Loss: 2.4976, Perplexity: 12.1533
+2025-06-10 12:48:18,948 - __main__ - INFO - Validation BLEU-4: 0.1039
+2025-06-10 12:48:18,949 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 12:48:19,238 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 12:48:19,573 - __main__ - INFO - Python garbage collector run.
+Epoch 22 Training:   3%|▎         | 99/3125 [01:41<49:49,  1.01it/s]
+2025-06-10 12:50:01,678 - __main__ - INFO - Epoch [22/27], Step [100/3125], Loss: 2.0841, Perplexity: 8.0371
+Epoch 22 Training:   6%|▋         | 199/3125 [03:20<48:17,  1.01it/s]
+2025-06-10 12:51:40,759 - __main__ - INFO - Epoch [22/27], Step [200/3125], Loss: 2.2103, Perplexity: 9.1183
+Epoch 22 Training:  10%|▉         | 299/3125 [04:59<47:06,  1.00s/it]
+2025-06-10 12:53:19,913 - __main__ - INFO - Epoch [22/27], Step [300/3125], Loss: 2.2659, Perplexity: 9.6397
+Epoch 22 Training:  13%|█▎        | 399/3125 [06:38<44:47,  1.01it/s]
+2025-06-10 12:54:58,861 - __main__ - INFO - Epoch [22/27], Step [400/3125], Loss: 2.4005, Perplexity: 11.0286
+Epoch 22 Training:  16%|█▌        | 499/3125 [08:17<42:50,  1.02it/s]
+2025-06-10 12:56:37,699 - __main__ - INFO - Epoch [22/27], Step [500/3125], Loss: 2.1922, Perplexity: 8.9553
+Epoch 22 Training:  19%|█▉        | 599/3125 [09:56<41:26,  1.02it/s]
+2025-06-10 12:58:16,583 - __main__ - INFO - Epoch [22/27], Step [600/3125], Loss: 2.3171, Perplexity: 10.1463
+Epoch 22 Training:  22%|██▏       | 699/3125 [11:35<40:48,  1.01s/it]
+2025-06-10 12:59:56,327 - __main__ - INFO - Epoch [22/27], Step [700/3125], Loss: 2.1564, Perplexity: 8.6396
+Epoch 22 Training:  26%|██▌       | 799/3125 [13:14<38:34,  1.01it/s]
+2025-06-10 13:01:35,149 - __main__ - INFO - Epoch [22/27], Step [800/3125], Loss: 2.3937, Perplexity: 10.9535
+Epoch 22 Training:  29%|██▉       | 899/3125 [14:53<37:06,  1.00s/it]
+2025-06-10 13:03:14,015 - __main__ - INFO - Epoch [22/27], Step [900/3125], Loss: 2.3248, Perplexity: 10.2245
+Epoch 22 Training:  32%|███▏      | 999/3125 [16:32<34:50,  1.02it/s]
+2025-06-10 13:04:52,893 - __main__ - INFO - Epoch [22/27], Step [1000/3125], Loss: 2.0701, Perplexity: 7.9253
+Epoch 22 Training:  35%|███▌      | 1099/3125 [18:11<34:07,  1.01s/it]
+2025-06-10 13:06:32,051 - __main__ - INFO - Epoch [22/27], Step [1100/3125], Loss: 2.1329, Perplexity: 8.4391
+Epoch 22 Training:  38%|███▊      | 1199/3125 [19:50<32:07,  1.00s/it]
+2025-06-10 13:08:11,551 - __main__ - INFO - Epoch [22/27], Step [1200/3125], Loss: 2.1940, Perplexity: 8.9706
+Epoch 22 Training:  42%|████▏     | 1299/3125 [21:29<30:18,  1.00it/s]
+2025-06-10 13:09:50,488 - __main__ - INFO - Epoch [22/27], Step [1300/3125], Loss: 2.4257, Perplexity: 11.3102
+Epoch 22 Training:  45%|████▍     | 1399/3125 [23:08<28:25,  1.01it/s]
+2025-06-10 13:11:29,499 - __main__ - INFO - Epoch [22/27], Step [1400/3125], Loss: 2.2594, Perplexity: 9.5774
+Epoch 22 Training:  48%|████▊     | 1499/3125 [24:48<26:36,  1.02it/s]
+2025-06-10 13:13:09,191 - __main__ - INFO - Epoch [22/27], Step [1500/3125], Loss: 2.2953, Perplexity: 9.9279
+Epoch 22 Training:  51%|█████     | 1599/3125 [26:27<25:30,  1.00s/it]
+2025-06-10 13:14:48,399 - __main__ - INFO - Epoch [22/27], Step [1600/3125], Loss: 2.3245, Perplexity: 10.2215
+Epoch 22 Training:  54%|█████▍    | 1699/3125 [28:07<23:44,  1.00it/s]
+2025-06-10 13:16:27,617 - __main__ - INFO - Epoch [22/27], Step [1700/3125], Loss: 2.3146, Perplexity: 10.1208
+Epoch 22 Training:  58%|█████▊    | 1799/3125 [29:46<22:01,  1.00it/s]
+2025-06-10 13:18:07,329 - __main__ - INFO - Epoch [22/27], Step [1800/3125], Loss: 2.3918, Perplexity: 10.9335
+Epoch 22 Training:  61%|██████    | 1899/3125 [31:25<20:35,  1.01s/it]
+2025-06-10 13:19:46,339 - __main__ - INFO - Epoch [22/27], Step [1900/3125], Loss: 2.4785, Perplexity: 11.9235
+Epoch 22 Training:  64%|██████▍   | 1999/3125 [33:05<18:42,  1.00it/s]
+2025-06-10 13:21:25,781 - __main__ - INFO - Epoch [22/27], Step [2000/3125], Loss: 2.2433, Perplexity: 9.4248
+Epoch 22 Training:  67%|██████▋   | 2099/3125 [34:44<17:05,  1.00it/s]
+2025-06-10 13:23:05,202 - __main__ - INFO - Epoch [22/27], Step [2100/3125], Loss: 2.3151, Perplexity: 10.1256
+Epoch 22 Training:  70%|███████   | 2199/3125 [36:23<15:14,  1.01it/s]
+2025-06-10 13:24:44,554 - __main__ - INFO - Epoch [22/27], Step [2200/3125], Loss: 2.1862, Perplexity: 8.9009
+Epoch 22 Training:  74%|███████▎  | 2299/3125 [38:03<13:35,  1.01it/s]
+2025-06-10 13:26:23,951 - __main__ - INFO - Epoch [22/27], Step [2300/3125], Loss: 2.4228, Perplexity: 11.2773
+Epoch 22 Training:  77%|███████▋  | 2399/3125 [39:42<12:02,  1.00it/s]
+2025-06-10 13:28:03,210 - __main__ - INFO - Epoch [22/27], Step [2400/3125], Loss: 2.2062, Perplexity: 9.0816
+Epoch 22 Training:  80%|███████▉  | 2499/3125 [41:21<10:24,  1.00it/s]
+2025-06-10 13:29:42,178 - __main__ - INFO - Epoch [22/27], Step [2500/3125], Loss: 2.1243, Perplexity: 8.3674
+Epoch 22 Training:  83%|████████▎ | 2599/3125 [43:00<08:43,  1.00it/s]
+2025-06-10 13:31:21,309 - __main__ - INFO - Epoch [22/27], Step [2600/3125], Loss: 2.3792, Perplexity: 10.7965
+Epoch 22 Training:  86%|████████▋ | 2699/3125 [44:39<06:57,  1.02it/s]
+2025-06-10 13:32:59,845 - __main__ - INFO - Epoch [22/27], Step [2700/3125], Loss: 2.3259, Perplexity: 10.2359
+Epoch 22 Training:  90%|████████▉ | 2799/3125 [46:18<05:31,  1.02s/it]
+2025-06-10 13:34:39,330 - __main__ - INFO - Epoch [22/27], Step [2800/3125], Loss: 2.4015, Perplexity: 11.0399
+Epoch 22 Training:  93%|█████████▎| 2899/3125 [47:58<03:46,  1.00s/it]
+2025-06-10 13:36:18,749 - __main__ - INFO - Epoch [22/27], Step [2900/3125], Loss: 2.5032, Perplexity: 12.2218
+Epoch 22 Training:  96%|█████████▌| 2999/3125 [49:37<02:06,  1.00s/it]
+2025-06-10 13:37:57,726 - __main__ - INFO - Epoch [22/27], Step [3000/3125], Loss: 2.1186, Perplexity: 8.3195
+Epoch 22 Training:  99%|█████████▉| 3099/3125 [51:16<00:25,  1.02it/s]
+2025-06-10 13:39:36,968 - __main__ - INFO - Epoch [22/27], Step [3100/3125], Loss: 2.3263, Perplexity: 10.2401
+Epoch 22 Training: 100%|██████████| 3125/3125 [51:42<00:00,  1.01it/s]
+2025-06-10 13:40:02,249 - __main__ - INFO - Epoch 22 Training finished. Avg Loss: 2.2757, Time: 3102.67s
+Validation: 100%|██████████| 391/391 [11:48<00:00,  1.81s/it]
+2025-06-10 13:51:50,314 - __main__ - INFO - Validation Avg Loss: 2.5026, Perplexity: 12.2138
+2025-06-10 13:51:59,691 - __main__ - INFO - Validation BLEU-4: 0.1045
+2025-06-10 13:51:59,691 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 13:51:59,966 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 13:52:00,297 - __main__ - INFO - Python garbage collector run.
+Epoch 23 Training:   3%|▎         | 99/3125 [01:41<49:12,  1.02it/s]
+2025-06-10 13:53:42,343 - __main__ - INFO - Epoch [23/27], Step [100/3125], Loss: 2.2735, Perplexity: 9.7134
+Epoch 23 Training:   6%|▋         | 199/3125 [03:20<47:52,  1.02it/s]
+2025-06-10 13:55:21,824 - __main__ - INFO - Epoch [23/27], Step [200/3125], Loss: 2.1850, Perplexity: 8.8906
+Epoch 23 Training:  10%|▉         | 299/3125 [04:59<46:41,  1.01it/s]
+2025-06-10 13:57:00,821 - __main__ - INFO - Epoch [23/27], Step [300/3125], Loss: 2.2903, Perplexity: 9.8783
+Epoch 23 Training:  13%|█▎        | 399/3125 [06:38<44:17,  1.03it/s]
+2025-06-10 13:58:39,930 - __main__ - INFO - Epoch [23/27], Step [400/3125], Loss: 2.4636, Perplexity: 11.7469
+Epoch 23 Training:  16%|█▌        | 499/3125 [08:17<43:03,  1.02it/s]
+2025-06-10 14:00:19,257 - __main__ - INFO - Epoch [23/27], Step [500/3125], Loss: 2.2620, Perplexity: 9.6028
+Epoch 23 Training:  19%|█▉        | 599/3125 [09:57<41:46,  1.01it/s]
+2025-06-10 14:01:58,718 - __main__ - INFO - Epoch [23/27], Step [600/3125], Loss: 2.2474, Perplexity: 9.4631
+Epoch 23 Training:  22%|██▏       | 699/3125 [11:36<39:22,  1.03it/s]
+2025-06-10 14:03:37,859 - __main__ - INFO - Epoch [23/27], Step [700/3125], Loss: 2.1001, Perplexity: 8.1667
+Epoch 23 Training:  26%|██▌       | 799/3125 [13:15<39:03,  1.01s/it]
+2025-06-10 14:05:16,383 - __main__ - INFO - Epoch [23/27], Step [800/3125], Loss: 2.1119, Perplexity: 8.2638
+Epoch 23 Training:  29%|██▉       | 899/3125 [14:54<37:51,  1.02s/it]
+2025-06-10 14:06:55,867 - __main__ - INFO - Epoch [23/27], Step [900/3125], Loss: 2.2211, Perplexity: 9.2171
+Epoch 23 Training:  32%|███▏      | 999/3125 [16:34<35:23,  1.00it/s]
+2025-06-10 14:08:35,387 - __main__ - INFO - Epoch [23/27], Step [1000/3125], Loss: 2.2420, Perplexity: 9.4120
+Epoch 23 Training:  35%|███▌      | 1099/3125 [18:13<34:31,  1.02s/it]
+2025-06-10 14:10:14,380 - __main__ - INFO - Epoch [23/27], Step [1100/3125], Loss: 2.1189, Perplexity: 8.3216
+Epoch 23 Training:  38%|███▊      | 1199/3125 [19:52<31:17,  1.03it/s]
+2025-06-10 14:11:53,614 - __main__ - INFO - Epoch [23/27], Step [1200/3125], Loss: 2.1247, Perplexity: 8.3700
+Epoch 23 Training:  42%|████▏     | 1299/3125 [21:31<29:57,  1.02it/s]
+2025-06-10 14:13:32,955 - __main__ - INFO - Epoch [23/27], Step [1300/3125], Loss: 2.1127, Perplexity: 8.2704
+Epoch 23 Training:  45%|████▍     | 1399/3125 [23:10<28:32,  1.01it/s]
+2025-06-10 14:15:12,031 - __main__ - INFO - Epoch [23/27], Step [1400/3125], Loss: 2.1020, Perplexity: 8.1822
+Epoch 23 Training:  48%|████▊     | 1499/3125 [24:49<27:27,  1.01s/it]
+2025-06-10 14:16:51,107 - __main__ - INFO - Epoch [23/27], Step [1500/3125], Loss: 2.2269, Perplexity: 9.2714
+Epoch 23 Training:  51%|█████     | 1599/3125 [26:29<25:11,  1.01it/s]
+2025-06-10 14:18:30,314 - __main__ - INFO - Epoch [23/27], Step [1600/3125], Loss: 2.1121, Perplexity: 8.2657
+Epoch 23 Training:  54%|█████▍    | 1699/3125 [28:08<23:24,  1.02it/s]
+2025-06-10 14:20:09,761 - __main__ - INFO - Epoch [23/27], Step [1700/3125], Loss: 1.9466, Perplexity: 7.0051
+Epoch 23 Training:  58%|█████▊    | 1799/3125 [29:47<21:55,  1.01it/s]
+2025-06-10 14:21:48,846 - __main__ - INFO - Epoch [23/27], Step [1800/3125], Loss: 2.2108, Perplexity: 9.1232
+Epoch 23 Training:  61%|██████    | 1899/3125 [31:26<20:11,  1.01it/s]
+2025-06-10 14:23:28,059 - __main__ - INFO - Epoch [23/27], Step [1900/3125], Loss: 2.2285, Perplexity: 9.2856
+Epoch 23 Training:  64%|██████▍   | 1999/3125 [33:05<18:24,  1.02it/s]
+2025-06-10 14:25:06,867 - __main__ - INFO - Epoch [23/27], Step [2000/3125], Loss: 2.2417, Perplexity: 9.4092
+Epoch 23 Training:  67%|██████▋   | 2099/3125 [34:44<16:53,  1.01it/s]
+2025-06-10 14:26:45,881 - __main__ - INFO - Epoch [23/27], Step [2100/3125], Loss: 2.2783, Perplexity: 9.7597
+Epoch 23 Training:  70%|███████   | 2199/3125 [36:24<15:29,  1.00s/it]
+2025-06-10 14:28:25,408 - __main__ - INFO - Epoch [23/27], Step [2200/3125], Loss: 2.1256, Perplexity: 8.3781
+Epoch 23 Training:  74%|███████▎  | 2299/3125 [38:03<13:27,  1.02it/s]
+2025-06-10 14:30:04,667 - __main__ - INFO - Epoch [23/27], Step [2300/3125], Loss: 2.4059, Perplexity: 11.0888
+Epoch 23 Training:  77%|███████▋  | 2399/3125 [39:42<11:53,  1.02it/s]
+2025-06-10 14:31:44,014 - __main__ - INFO - Epoch [23/27], Step [2400/3125], Loss: 2.2476, Perplexity: 9.4653
+Epoch 23 Training:  80%|███████▉  | 2499/3125 [41:22<10:48,  1.04s/it]
+2025-06-10 14:33:23,465 - __main__ - INFO - Epoch [23/27], Step [2500/3125], Loss: 1.9663, Perplexity: 7.1443
+Epoch 23 Training:  83%|████████▎ | 2599/3125 [43:01<08:45,  1.00it/s]
+2025-06-10 14:35:02,976 - __main__ - INFO - Epoch [23/27], Step [2600/3125], Loss: 2.2005, Perplexity: 9.0293
+Epoch 23 Training:  86%|████████▋ | 2699/3125 [44:40<07:02,  1.01it/s]
+2025-06-10 14:36:42,183 - __main__ - INFO - Epoch [23/27], Step [2700/3125], Loss: 2.2762, Perplexity: 9.7399
+Epoch 23 Training:  90%|████████▉ | 2799/3125 [46:19<05:18,  1.02it/s]
+2025-06-10 14:38:21,135 - __main__ - INFO - Epoch [23/27], Step [2800/3125], Loss: 2.3507, Perplexity: 10.4930
+Epoch 23 Training:  93%|█████████▎| 2899/3125 [47:59<03:43,  1.01it/s]
+2025-06-10 14:40:00,701 - __main__ - INFO - Epoch [23/27], Step [2900/3125], Loss: 2.1719, Perplexity: 8.7753
+Epoch 23 Training:  96%|█████████▌| 2999/3125 [49:38<02:03,  1.02it/s]
+2025-06-10 14:41:39,941 - __main__ - INFO - Epoch [23/27], Step [3000/3125], Loss: 2.1339, Perplexity: 8.4474
+Epoch 23 Training:  99%|█████████▉| 3099/3125 [51:18<00:25,  1.02it/s]
+2025-06-10 14:43:19,557 - __main__ - INFO - Epoch [23/27], Step [3100/3125], Loss: 2.3432, Perplexity: 10.4140
+Epoch 23 Training: 100%|██████████| 3125/3125 [51:44<00:00,  1.01it/s]
+2025-06-10 14:43:44,527 - __main__ - INFO - Epoch 23 Training finished. Avg Loss: 2.2447, Time: 3104.23s
+Validation: 100%|██████████| 391/391 [11:44<00:00,  1.80s/it]
+2025-06-10 14:55:28,836 - __main__ - INFO - Validation Avg Loss: 2.5057, Perplexity: 12.2519
+2025-06-10 14:55:38,160 - __main__ - INFO - Validation BLEU-4: 0.1043
+2025-06-10 14:55:38,161 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 14:55:38,437 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 14:55:38,774 - __main__ - INFO - Python garbage collector run.
+Epoch 24 Training:   3%|▎         | 99/3125 [01:40<48:49,  1.03it/s]
+2025-06-10 14:57:20,220 - __main__ - INFO - Epoch [24/27], Step [100/3125], Loss: 2.3766, Perplexity: 10.7685
+Epoch 24 Training:   6%|▋         | 199/3125 [03:19<48:06,  1.01it/s]
+2025-06-10 14:58:59,374 - __main__ - INFO - Epoch [24/27], Step [200/3125], Loss: 2.2492, Perplexity: 9.4799
+Epoch 24 Training:  10%|▉         | 299/3125 [04:58<46:43,  1.01it/s]
+2025-06-10 15:00:38,692 - __main__ - INFO - Epoch [24/27], Step [300/3125], Loss: 2.0896, Perplexity: 8.0816
+Epoch 24 Training:  13%|█▎        | 399/3125 [06:38<44:56,  1.01it/s]
+2025-06-10 15:02:17,779 - __main__ - INFO - Epoch [24/27], Step [400/3125], Loss: 2.1230, Perplexity: 8.3560
+Epoch 24 Training:  16%|█▌        | 499/3125 [08:17<43:40,  1.00it/s]
+2025-06-10 15:03:56,858 - __main__ - INFO - Epoch [24/27], Step [500/3125], Loss: 2.2929, Perplexity: 9.9039
+Epoch 24 Training:  19%|█▉        | 599/3125 [09:56<42:13,  1.00s/it]
+2025-06-10 15:05:35,920 - __main__ - INFO - Epoch [24/27], Step [600/3125], Loss: 2.2186, Perplexity: 9.1948
+Epoch 24 Training:  22%|██▏       | 699/3125 [11:35<39:55,  1.01it/s]
+2025-06-10 15:07:15,231 - __main__ - INFO - Epoch [24/27], Step [700/3125], Loss: 2.3652, Perplexity: 10.6457
+Epoch 24 Training:  26%|██▌       | 799/3125 [13:15<39:08,  1.01s/it]
+2025-06-10 15:08:54,829 - __main__ - INFO - Epoch [24/27], Step [800/3125], Loss: 2.4291, Perplexity: 11.3490
+Epoch 24 Training:  29%|██▉       | 899/3125 [14:54<37:18,  1.01s/it]
+2025-06-10 15:10:34,047 - __main__ - INFO - Epoch [24/27], Step [900/3125], Loss: 2.0992, Perplexity: 8.1598
+Epoch 24 Training:  32%|███▏      | 999/3125 [16:33<35:14,  1.01it/s]
+2025-06-10 15:12:13,251 - __main__ - INFO - Epoch [24/27], Step [1000/3125], Loss: 2.2295, Perplexity: 9.2953
+Epoch 24 Training:  35%|███▌      | 1099/3125 [18:12<33:32,  1.01it/s]
+2025-06-10 15:13:52,348 - __main__ - INFO - Epoch [24/27], Step [1100/3125], Loss: 2.2423, Perplexity: 9.4153
+Epoch 24 Training:  38%|███▊      | 1199/3125 [19:51<31:34,  1.02it/s]
+2025-06-10 15:15:31,270 - __main__ - INFO - Epoch [24/27], Step [1200/3125], Loss: 2.1982, Perplexity: 9.0089
+Epoch 24 Training:  42%|████▏     | 1299/3125 [21:30<30:16,  1.00it/s]
+2025-06-10 15:17:10,504 - __main__ - INFO - Epoch [24/27], Step [1300/3125], Loss: 2.3898, Perplexity: 10.9117
+Epoch 24 Training:  45%|████▍     | 1399/3125 [23:10<29:03,  1.01s/it]
+2025-06-10 15:18:49,778 - __main__ - INFO - Epoch [24/27], Step [1400/3125], Loss: 2.4188, Perplexity: 11.2319
+Epoch 24 Training:  48%|████▊     | 1499/3125 [24:48<26:26,  1.02it/s]
+2025-06-10 15:20:28,636 - __main__ - INFO - Epoch [24/27], Step [1500/3125], Loss: 2.2394, Perplexity: 9.3875
+Epoch 24 Training:  51%|█████     | 1599/3125 [26:27<24:59,  1.02it/s]
+2025-06-10 15:22:07,698 - __main__ - INFO - Epoch [24/27], Step [1600/3125], Loss: 2.3871, Perplexity: 10.8814
+Epoch 24 Training:  54%|█████▍    | 1699/3125 [28:07<23:57,  1.01s/it]
+2025-06-10 15:23:46,949 - __main__ - INFO - Epoch [24/27], Step [1700/3125], Loss: 2.4062, Perplexity: 11.0917
+Epoch 24 Training:  58%|█████▊    | 1799/3125 [29:45<21:37,  1.02it/s]
+2025-06-10 15:25:25,232 - __main__ - INFO - Epoch [24/27], Step [1800/3125], Loss: 2.2069, Perplexity: 9.0873
+Epoch 24 Training:  61%|██████    | 1899/3125 [31:24<19:42,  1.04it/s]
+2025-06-10 15:27:04,432 - __main__ - INFO - Epoch [24/27], Step [1900/3125], Loss: 2.1929, Perplexity: 8.9615
+Epoch 24 Training:  64%|██████▍   | 1999/3125 [33:04<19:03,  1.02s/it]
+2025-06-10 15:28:43,830 - __main__ - INFO - Epoch [24/27], Step [2000/3125], Loss: 2.2991, Perplexity: 9.9652
+Epoch 24 Training:  67%|██████▋   | 2099/3125 [34:43<16:56,  1.01it/s]
+2025-06-10 15:30:22,979 - __main__ - INFO - Epoch [24/27], Step [2100/3125], Loss: 2.2047, Perplexity: 9.0674
+Epoch 24 Training:  70%|███████   | 2199/3125 [36:22<15:04,  1.02it/s]
+2025-06-10 15:32:01,941 - __main__ - INFO - Epoch [24/27], Step [2200/3125], Loss: 2.1788, Perplexity: 8.8360
+Epoch 24 Training:  74%|███████▎  | 2299/3125 [38:01<13:39,  1.01it/s]
+2025-06-10 15:33:41,424 - __main__ - INFO - Epoch [24/27], Step [2300/3125], Loss: 2.3368, Perplexity: 10.3484
+Epoch 24 Training:  77%|███████▋  | 2399/3125 [39:41<12:17,  1.02s/it]
+2025-06-10 15:35:20,892 - __main__ - INFO - Epoch [24/27], Step [2400/3125], Loss: 2.3685, Perplexity: 10.6811
+Epoch 24 Training:  80%|███████▉  | 2499/3125 [41:20<10:39,  1.02s/it]
+2025-06-10 15:37:00,224 - __main__ - INFO - Epoch [24/27], Step [2500/3125], Loss: 2.1582, Perplexity: 8.6555
+Epoch 24 Training:  83%|████████▎ | 2599/3125 [42:59<08:50,  1.01s/it]
+2025-06-10 15:38:39,540 - __main__ - INFO - Epoch [24/27], Step [2600/3125], Loss: 2.2999, Perplexity: 9.9734
+Epoch 24 Training:  86%|████████▋ | 2699/3125 [44:39<07:09,  1.01s/it]
+2025-06-10 15:40:19,212 - __main__ - INFO - Epoch [24/27], Step [2700/3125], Loss: 2.1185, Perplexity: 8.3184
+Epoch 24 Training:  90%|████████▉ | 2799/3125 [46:19<05:28,  1.01s/it]
+2025-06-10 15:41:59,136 - __main__ - INFO - Epoch [24/27], Step [2800/3125], Loss: 2.5695, Perplexity: 13.0598
+Epoch 24 Training:  93%|█████████▎| 2899/3125 [47:58<03:46,  1.00s/it]
+2025-06-10 15:43:37,941 - __main__ - INFO - Epoch [24/27], Step [2900/3125], Loss: 2.2560, Perplexity: 9.5450
+Epoch 24 Training:  96%|█████████▌| 2999/3125 [49:37<02:04,  1.01it/s]
+2025-06-10 15:45:17,249 - __main__ - INFO - Epoch [24/27], Step [3000/3125], Loss: 2.3286, Perplexity: 10.2635
+Epoch 24 Training:  99%|█████████▉| 3099/3125 [51:16<00:25,  1.02it/s]
+2025-06-10 15:46:56,315 - __main__ - INFO - Epoch [24/27], Step [3100/3125], Loss: 2.2094, Perplexity: 9.1106
+Epoch 24 Training: 100%|██████████| 3125/3125 [51:42<00:00,  1.01it/s]
+2025-06-10 15:47:21,178 - __main__ - INFO - Epoch 24 Training finished. Avg Loss: 2.2160, Time: 3102.40s
+Validation: 100%|██████████| 391/391 [11:40<00:00,  1.79s/it]
+2025-06-10 15:59:02,154 - __main__ - INFO - Validation Avg Loss: 2.5189, Perplexity: 12.4151
+2025-06-10 15:59:11,517 - __main__ - INFO - Validation BLEU-4: 0.1045
+2025-06-10 15:59:11,518 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 15:59:11,789 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 15:59:12,121 - __main__ - INFO - Python garbage collector run.
+Epoch 25 Training:   3%|▎         | 99/3125 [01:40<50:05,  1.01it/s]
+2025-06-10 16:00:53,982 - __main__ - INFO - Epoch [25/27], Step [100/3125], Loss: 2.0074, Perplexity: 7.4441
+Epoch 25 Training:   6%|▋         | 199/3125 [03:20<48:42,  1.00it/s]
+2025-06-10 16:02:33,427 - __main__ - INFO - Epoch [25/27], Step [200/3125], Loss: 2.0555, Perplexity: 7.8107
+Epoch 25 Training:  10%|▉         | 299/3125 [04:59<46:02,  1.02it/s]
+2025-06-10 16:04:12,803 - __main__ - INFO - Epoch [25/27], Step [300/3125], Loss: 2.3032, Perplexity: 10.0063
+Epoch 25 Training:  13%|█▎        | 399/3125 [06:38<44:27,  1.02it/s]
+2025-06-10 16:05:51,698 - __main__ - INFO - Epoch [25/27], Step [400/3125], Loss: 2.1707, Perplexity: 8.7648
+Epoch 25 Training:  16%|█▌        | 499/3125 [08:17<43:04,  1.02it/s]
+2025-06-10 16:07:30,853 - __main__ - INFO - Epoch [25/27], Step [500/3125], Loss: 2.1961, Perplexity: 8.9898
+Epoch 25 Training:  19%|█▉        | 599/3125 [09:56<41:10,  1.02it/s]
+2025-06-10 16:09:10,070 - __main__ - INFO - Epoch [25/27], Step [600/3125], Loss: 2.0564, Perplexity: 7.8181
+Epoch 25 Training:  22%|██▏       | 699/3125 [11:36<39:34,  1.02it/s]
+2025-06-10 16:10:49,506 - __main__ - INFO - Epoch [25/27], Step [700/3125], Loss: 2.0773, Perplexity: 7.9832
+Epoch 25 Training:  26%|██▌       | 799/3125 [13:15<38:18,  1.01it/s]
+2025-06-10 16:12:28,561 - __main__ - INFO - Epoch [25/27], Step [800/3125], Loss: 2.3222, Perplexity: 10.1984
+Epoch 25 Training:  29%|██▉       | 899/3125 [14:54<36:41,  1.01it/s]
+2025-06-10 16:14:07,966 - __main__ - INFO - Epoch [25/27], Step [900/3125], Loss: 2.1284, Perplexity: 8.4017
+Epoch 25 Training:  32%|███▏      | 999/3125 [16:34<35:14,  1.01it/s]
+2025-06-10 16:15:47,098 - __main__ - INFO - Epoch [25/27], Step [1000/3125], Loss: 2.1676, Perplexity: 8.7370
+Epoch 25 Training:  35%|███▌      | 1099/3125 [18:13<33:31,  1.01it/s]
+2025-06-10 16:17:26,645 - __main__ - INFO - Epoch [25/27], Step [1100/3125], Loss: 2.2209, Perplexity: 9.2159
+Epoch 25 Training:  38%|███▊      | 1199/3125 [19:52<32:17,  1.01s/it]
+2025-06-10 16:19:05,907 - __main__ - INFO - Epoch [25/27], Step [1200/3125], Loss: 2.1686, Perplexity: 8.7457
+Epoch 25 Training:  42%|████▏     | 1299/3125 [21:31<30:14,  1.01it/s]
+2025-06-10 16:20:45,059 - __main__ - INFO - Epoch [25/27], Step [1300/3125], Loss: 2.3130, Perplexity: 10.1044
+Epoch 25 Training:  45%|████▍     | 1399/3125 [23:11<28:17,  1.02it/s]
+2025-06-10 16:22:24,187 - __main__ - INFO - Epoch [25/27], Step [1400/3125], Loss: 2.2171, Perplexity: 9.1806
+Epoch 25 Training:  48%|████▊     | 1499/3125 [24:50<26:56,  1.01it/s]
+2025-06-10 16:24:03,698 - __main__ - INFO - Epoch [25/27], Step [1500/3125], Loss: 2.2474, Perplexity: 9.4627
+Epoch 25 Training:  51%|█████     | 1599/3125 [26:29<25:54,  1.02s/it]
+2025-06-10 16:25:42,883 - __main__ - INFO - Epoch [25/27], Step [1600/3125], Loss: 2.2844, Perplexity: 9.8197
+Epoch 25 Training:  54%|█████▍    | 1699/3125 [28:09<23:26,  1.01it/s]
+2025-06-10 16:27:22,497 - __main__ - INFO - Epoch [25/27], Step [1700/3125], Loss: 2.2792, Perplexity: 9.7693
+Epoch 25 Training:  58%|█████▊    | 1799/3125 [29:48<21:54,  1.01it/s]
+2025-06-10 16:29:01,803 - __main__ - INFO - Epoch [25/27], Step [1800/3125], Loss: 2.2396, Perplexity: 9.3897
+Epoch 25 Training:  61%|██████    | 1899/3125 [31:27<20:22,  1.00it/s]
+2025-06-10 16:30:40,803 - __main__ - INFO - Epoch [25/27], Step [1900/3125], Loss: 2.0225, Perplexity: 7.5576
+Epoch 25 Training:  64%|██████▍   | 1999/3125 [33:06<18:29,  1.01it/s]
+2025-06-10 16:32:19,976 - __main__ - INFO - Epoch [25/27], Step [2000/3125], Loss: 2.2360, Perplexity: 9.3554
+Epoch 25 Training:  67%|██████▋   | 2099/3125 [34:45<16:40,  1.03it/s]
+2025-06-10 16:33:58,927 - __main__ - INFO - Epoch [25/27], Step [2100/3125], Loss: 2.0710, Perplexity: 7.9326
+Epoch 25 Training:  70%|███████   | 2199/3125 [36:25<15:18,  1.01it/s]
+2025-06-10 16:35:38,376 - __main__ - INFO - Epoch [25/27], Step [2200/3125], Loss: 2.3347, Perplexity: 10.3267
+Epoch 25 Training:  74%|███████▎  | 2299/3125 [38:04<13:50,  1.01s/it]
+2025-06-10 16:37:17,584 - __main__ - INFO - Epoch [25/27], Step [2300/3125], Loss: 2.0350, Perplexity: 7.6525
+Epoch 25 Training:  77%|███████▋  | 2399/3125 [39:44<12:17,  1.02s/it]
+2025-06-10 16:38:57,181 - __main__ - INFO - Epoch [25/27], Step [2400/3125], Loss: 2.2684, Perplexity: 9.6638
+Epoch 25 Training:  80%|███████▉  | 2499/3125 [41:23<10:17,  1.01it/s]
+2025-06-10 16:40:36,758 - __main__ - INFO - Epoch [25/27], Step [2500/3125], Loss: 2.2548, Perplexity: 9.5332
+Epoch 25 Training:  83%|████████▎ | 2599/3125 [43:03<08:45,  1.00it/s]
+2025-06-10 16:42:16,315 - __main__ - INFO - Epoch [25/27], Step [2600/3125], Loss: 2.0938, Perplexity: 8.1161
+Epoch 25 Training:  86%|████████▋ | 2699/3125 [44:42<07:05,  1.00it/s]
+2025-06-10 16:43:55,636 - __main__ - INFO - Epoch [25/27], Step [2700/3125], Loss: 2.1231, Perplexity: 8.3569
+Epoch 25 Training:  90%|████████▉ | 2799/3125 [46:21<05:27,  1.01s/it]
+2025-06-10 16:45:34,688 - __main__ - INFO - Epoch [25/27], Step [2800/3125], Loss: 2.1211, Perplexity: 8.3405
+Epoch 25 Training:  93%|█████████▎| 2899/3125 [48:00<03:45,  1.00it/s]
+2025-06-10 16:47:13,853 - __main__ - INFO - Epoch [25/27], Step [2900/3125], Loss: 2.2003, Perplexity: 9.0279
+Epoch 25 Training:  96%|█████████▌| 2999/3125 [49:40<02:04,  1.01it/s]
+2025-06-10 16:48:53,644 - __main__ - INFO - Epoch [25/27], Step [3000/3125], Loss: 2.2365, Perplexity: 9.3608
+Epoch 25 Training:  99%|█████████▉| 3099/3125 [51:19<00:25,  1.03it/s]
+2025-06-10 16:50:32,426 - __main__ - INFO - Epoch [25/27], Step [3100/3125], Loss: 2.1602, Perplexity: 8.6729
+Epoch 25 Training: 100%|██████████| 3125/3125 [51:45<00:00,  1.01it/s]
+2025-06-10 16:50:57,431 - __main__ - INFO - Epoch 25 Training finished. Avg Loss: 2.1887, Time: 3105.31s
+Validation: 100%|██████████| 391/391 [11:43<00:00,  1.80s/it]
+2025-06-10 17:02:40,897 - __main__ - INFO - Validation Avg Loss: 2.5252, Perplexity: 12.4939
+2025-06-10 17:02:50,176 - __main__ - INFO - Validation BLEU-4: 0.1035
+2025-06-10 17:02:50,820 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_25.pth
+2025-06-10 17:02:50,821 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 17:02:51,113 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 17:02:51,455 - __main__ - INFO - Python garbage collector run.
+Epoch 26 Training:   3%|▎         | 99/3125 [01:40<49:39,  1.02it/s]
+2025-06-10 17:04:33,276 - __main__ - INFO - Epoch [26/27], Step [100/3125], Loss: 2.1935, Perplexity: 8.9668
+Epoch 26 Training:   6%|▋         | 199/3125 [03:19<48:02,  1.02it/s]
+2025-06-10 17:06:11,935 - __main__ - INFO - Epoch [26/27], Step [200/3125], Loss: 1.9728, Perplexity: 7.1905
+Epoch 26 Training:  10%|▉         | 299/3125 [04:59<46:10,  1.02it/s]
+2025-06-10 17:07:51,541 - __main__ - INFO - Epoch [26/27], Step [300/3125], Loss: 2.2561, Perplexity: 9.5454
+Epoch 26 Training:  13%|█▎        | 399/3125 [06:37<44:23,  1.02it/s]
+2025-06-10 17:09:30,184 - __main__ - INFO - Epoch [26/27], Step [400/3125], Loss: 2.0493, Perplexity: 7.7628
+Epoch 26 Training:  16%|█▌        | 499/3125 [08:17<43:07,  1.02it/s]
+2025-06-10 17:11:09,536 - __main__ - INFO - Epoch [26/27], Step [500/3125], Loss: 2.2319, Perplexity: 9.3180
+Epoch 26 Training:  19%|█▉        | 599/3125 [09:55<41:37,  1.01it/s]
+2025-06-10 17:12:48,359 - __main__ - INFO - Epoch [26/27], Step [600/3125], Loss: 2.1954, Perplexity: 8.9836
+Epoch 26 Training:  22%|██▏       | 699/3125 [11:35<40:20,  1.00it/s]
+2025-06-10 17:14:27,587 - __main__ - INFO - Epoch [26/27], Step [700/3125], Loss: 2.1557, Perplexity: 8.6338
+Epoch 26 Training:  26%|██▌       | 799/3125 [13:14<38:08,  1.02it/s]
+2025-06-10 17:16:06,635 - __main__ - INFO - Epoch [26/27], Step [800/3125], Loss: 2.4767, Perplexity: 11.9019
+Epoch 26 Training:  29%|██▉       | 899/3125 [14:53<37:20,  1.01s/it]
+2025-06-10 17:17:46,247 - __main__ - INFO - Epoch [26/27], Step [900/3125], Loss: 2.2038, Perplexity: 9.0594
+Epoch 26 Training:  32%|███▏      | 999/3125 [16:32<34:44,  1.02it/s]
+2025-06-10 17:19:24,943 - __main__ - INFO - Epoch [26/27], Step [1000/3125], Loss: 2.1657, Perplexity: 8.7203
+Epoch 26 Training:  35%|███▌      | 1099/3125 [18:11<33:20,  1.01it/s]
+2025-06-10 17:21:03,864 - __main__ - INFO - Epoch [26/27], Step [1100/3125], Loss: 2.0512, Perplexity: 7.7773
+Epoch 26 Training:  38%|███▊      | 1199/3125 [19:50<32:41,  1.02s/it]
+2025-06-10 17:22:42,855 - __main__ - INFO - Epoch [26/27], Step [1200/3125], Loss: 2.2466, Perplexity: 9.4556
+Epoch 26 Training:  42%|████▏     | 1299/3125 [21:29<30:16,  1.01it/s]
+2025-06-10 17:24:21,894 - __main__ - INFO - Epoch [26/27], Step [1300/3125], Loss: 2.1989, Perplexity: 9.0151
+Epoch 26 Training:  45%|████▍     | 1399/3125 [23:09<28:32,  1.01it/s]
+2025-06-10 17:26:01,435 - __main__ - INFO - Epoch [26/27], Step [1400/3125], Loss: 2.2580, Perplexity: 9.5638
+Epoch 26 Training:  48%|████▊     | 1499/3125 [24:48<26:59,  1.00it/s]
+2025-06-10 17:27:40,602 - __main__ - INFO - Epoch [26/27], Step [1500/3125], Loss: 2.3685, Perplexity: 10.6812
+Epoch 26 Training:  51%|█████     | 1599/3125 [26:27<24:55,  1.02it/s]
+2025-06-10 17:29:19,701 - __main__ - INFO - Epoch [26/27], Step [1600/3125], Loss: 2.1787, Perplexity: 8.8347
+Epoch 26 Training:  54%|█████▍    | 1699/3125 [28:06<23:58,  1.01s/it]
+2025-06-10 17:30:59,005 - __main__ - INFO - Epoch [26/27], Step [1700/3125], Loss: 2.2104, Perplexity: 9.1193
+Epoch 26 Training:  58%|█████▊    | 1799/3125 [29:45<21:55,  1.01it/s]
+2025-06-10 17:32:38,342 - __main__ - INFO - Epoch [26/27], Step [1800/3125], Loss: 2.0694, Perplexity: 7.9204
+Epoch 26 Training:  61%|██████    | 1899/3125 [31:25<20:23,  1.00it/s]
+2025-06-10 17:34:17,536 - __main__ - INFO - Epoch [26/27], Step [1900/3125], Loss: 2.1163, Perplexity: 8.3004
+Epoch 26 Training:  64%|██████▍   | 1999/3125 [33:03<18:49,  1.00s/it]
+2025-06-10 17:35:56,419 - __main__ - INFO - Epoch [26/27], Step [2000/3125], Loss: 2.2775, Perplexity: 9.7523
+Epoch 26 Training:  67%|██████▋   | 2099/3125 [34:42<16:56,  1.01it/s]
+2025-06-10 17:37:35,002 - __main__ - INFO - Epoch [26/27], Step [2100/3125], Loss: 2.4668, Perplexity: 11.7850
+Epoch 26 Training:  70%|███████   | 2199/3125 [36:21<15:04,  1.02it/s]
+2025-06-10 17:39:13,937 - __main__ - INFO - Epoch [26/27], Step [2200/3125], Loss: 2.0802, Perplexity: 8.0062
+Epoch 26 Training:  74%|███████▎  | 2299/3125 [38:00<14:06,  1.02s/it]
+2025-06-10 17:40:53,145 - __main__ - INFO - Epoch [26/27], Step [2300/3125], Loss: 2.2296, Perplexity: 9.2964
+Epoch 26 Training:  77%|███████▋  | 2399/3125 [39:39<12:02,  1.00it/s]
+2025-06-10 17:42:32,450 - __main__ - INFO - Epoch [26/27], Step [2400/3125], Loss: 2.0793, Perplexity: 7.9986
+Epoch 26 Training:  80%|███████▉  | 2499/3125 [41:19<10:05,  1.03it/s]
+2025-06-10 17:44:11,811 - __main__ - INFO - Epoch [26/27], Step [2500/3125], Loss: 2.1663, Perplexity: 8.7259
+Epoch 26 Training:  83%|████████▎ | 2599/3125 [42:58<08:47,  1.00s/it]
+2025-06-10 17:45:51,064 - __main__ - INFO - Epoch [26/27], Step [2600/3125], Loss: 1.9672, Perplexity: 7.1505
+Epoch 26 Training:  86%|████████▋ | 2699/3125 [44:37<06:58,  1.02it/s]
+2025-06-10 17:47:30,464 - __main__ - INFO - Epoch [26/27], Step [2700/3125], Loss: 2.3145, Perplexity: 10.1195
+Epoch 26 Training:  90%|████████▉ | 2799/3125 [46:17<05:26,  1.00s/it]
+2025-06-10 17:49:09,938 - __main__ - INFO - Epoch [26/27], Step [2800/3125], Loss: 2.1893, Perplexity: 8.9292
+Epoch 26 Training:  93%|█████████▎| 2899/3125 [47:56<03:43,  1.01it/s]
+2025-06-10 17:50:48,764 - __main__ - INFO - Epoch [26/27], Step [2900/3125], Loss: 2.0229, Perplexity: 7.5600
+Epoch 26 Training:  96%|█████████▌| 2999/3125 [49:35<02:03,  1.02it/s]
+2025-06-10 17:52:27,535 - __main__ - INFO - Epoch [26/27], Step [3000/3125], Loss: 2.4255, Perplexity: 11.3075
+Epoch 26 Training:  99%|█████████▉| 3099/3125 [51:14<00:25,  1.01it/s]
+2025-06-10 17:54:07,103 - __main__ - INFO - Epoch [26/27], Step [3100/3125], Loss: 2.2570, Perplexity: 9.5549
+Epoch 26 Training: 100%|██████████| 3125/3125 [51:40<00:00,  1.01it/s]
+2025-06-10 17:54:31,850 - __main__ - INFO - Epoch 26 Training finished. Avg Loss: 2.1649, Time: 3100.39s
+Validation: 100%|██████████| 391/391 [11:39<00:00,  1.79s/it]
+2025-06-10 18:06:11,797 - __main__ - INFO - Validation Avg Loss: 2.5364, Perplexity: 12.6343
+2025-06-10 18:06:21,178 - __main__ - INFO - Validation BLEU-4: 0.1028
+2025-06-10 18:06:21,179 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 18:06:21,442 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 18:06:21,779 - __main__ - INFO - Python garbage collector run.
+Epoch 27 Training:   3%|▎         | 99/3125 [01:40<50:33,  1.00s/it]
+2025-06-10 18:08:03,665 - __main__ - INFO - Epoch [27/27], Step [100/3125], Loss: 2.0213, Perplexity: 7.5483
+Epoch 27 Training:   6%|▋         | 199/3125 [03:20<48:20,  1.01it/s]
+2025-06-10 18:09:42,825 - __main__ - INFO - Epoch [27/27], Step [200/3125], Loss: 2.0965, Perplexity: 8.1376
+Epoch 27 Training:  10%|▉         | 299/3125 [04:59<46:24,  1.01it/s]
+2025-06-10 18:11:21,959 - __main__ - INFO - Epoch [27/27], Step [300/3125], Loss: 2.1239, Perplexity: 8.3635
+Epoch 27 Training:  13%|█▎        | 399/3125 [06:37<44:53,  1.01it/s]
+2025-06-10 18:13:00,610 - __main__ - INFO - Epoch [27/27], Step [400/3125], Loss: 2.0620, Perplexity: 7.8614
+Epoch 27 Training:  16%|█▌        | 499/3125 [08:17<43:45,  1.00it/s]
+2025-06-10 18:14:40,303 - __main__ - INFO - Epoch [27/27], Step [500/3125], Loss: 2.2273, Perplexity: 9.2751
+Epoch 27 Training:  19%|█▉        | 599/3125 [09:56<42:36,  1.01s/it]
+2025-06-10 18:16:19,474 - __main__ - INFO - Epoch [27/27], Step [600/3125], Loss: 1.9186, Perplexity: 6.8115
+Epoch 27 Training:  22%|██▏       | 699/3125 [11:35<39:33,  1.02it/s]
+2025-06-10 18:17:58,184 - __main__ - INFO - Epoch [27/27], Step [700/3125], Loss: 2.1434, Perplexity: 8.5282
+Epoch 27 Training:  26%|██▌       | 799/3125 [13:14<38:11,  1.02it/s]
+2025-06-10 18:19:37,100 - __main__ - INFO - Epoch [27/27], Step [800/3125], Loss: 2.0196, Perplexity: 7.5353
+Epoch 27 Training:  29%|██▉       | 899/3125 [14:52<36:30,  1.02it/s]
+2025-06-10 18:21:15,710 - __main__ - INFO - Epoch [27/27], Step [900/3125], Loss: 1.9703, Perplexity: 7.1727
+Epoch 27 Training:  32%|███▏      | 999/3125 [16:32<34:55,  1.01it/s]
+2025-06-10 18:22:55,193 - __main__ - INFO - Epoch [27/27], Step [1000/3125], Loss: 1.9125, Perplexity: 6.7700
+Epoch 27 Training:  35%|███▌      | 1099/3125 [18:11<32:38,  1.03it/s]
+2025-06-10 18:24:34,007 - __main__ - INFO - Epoch [27/27], Step [1100/3125], Loss: 2.1512, Perplexity: 8.5948
+Epoch 27 Training:  38%|███▊      | 1199/3125 [19:50<31:47,  1.01it/s]
+2025-06-10 18:26:13,603 - __main__ - INFO - Epoch [27/27], Step [1200/3125], Loss: 2.1597, Perplexity: 8.6683
+Epoch 27 Training:  42%|████▏     | 1299/3125 [21:30<30:53,  1.02s/it]
+2025-06-10 18:27:53,207 - __main__ - INFO - Epoch [27/27], Step [1300/3125], Loss: 2.1628, Perplexity: 8.6955
+Epoch 27 Training:  45%|████▍     | 1399/3125 [23:08<28:29,  1.01it/s]
+2025-06-10 18:29:31,577 - __main__ - INFO - Epoch [27/27], Step [1400/3125], Loss: 2.1293, Perplexity: 8.4093
+Epoch 27 Training:  48%|████▊     | 1499/3125 [24:47<27:13,  1.00s/it]
+2025-06-10 18:31:10,700 - __main__ - INFO - Epoch [27/27], Step [1500/3125], Loss: 1.9977, Perplexity: 7.3723
+Epoch 27 Training:  51%|█████     | 1599/3125 [26:26<25:40,  1.01s/it]
+2025-06-10 18:32:49,710 - __main__ - INFO - Epoch [27/27], Step [1600/3125], Loss: 2.1225, Perplexity: 8.3524
+Epoch 27 Training:  54%|█████▍    | 1699/3125 [28:06<23:27,  1.01it/s]
+2025-06-10 18:34:28,810 - __main__ - INFO - Epoch [27/27], Step [1700/3125], Loss: 2.0431, Perplexity: 7.7143
+Epoch 27 Training:  58%|█████▊    | 1799/3125 [29:45<21:41,  1.02it/s]
+2025-06-10 18:36:08,315 - __main__ - INFO - Epoch [27/27], Step [1800/3125], Loss: 2.1295, Perplexity: 8.4103
+Epoch 27 Training:  61%|██████    | 1899/3125 [31:24<20:10,  1.01it/s]
+2025-06-10 18:37:47,406 - __main__ - INFO - Epoch [27/27], Step [1900/3125], Loss: 2.1814, Perplexity: 8.8590
+Epoch 27 Training:  64%|██████▍   | 1999/3125 [33:03<18:23,  1.02it/s]
+2025-06-10 18:39:26,578 - __main__ - INFO - Epoch [27/27], Step [2000/3125], Loss: 2.1035, Perplexity: 8.1949
+Epoch 27 Training:  67%|██████▋   | 2099/3125 [34:43<17:19,  1.01s/it]
+2025-06-10 18:41:05,955 - __main__ - INFO - Epoch [27/27], Step [2100/3125], Loss: 2.2474, Perplexity: 9.4627
+Epoch 27 Training:  70%|███████   | 2199/3125 [36:22<15:35,  1.01s/it]
+2025-06-10 18:42:45,066 - __main__ - INFO - Epoch [27/27], Step [2200/3125], Loss: 2.1740, Perplexity: 8.7938
+Epoch 27 Training:  74%|███████▎  | 2299/3125 [38:02<13:55,  1.01s/it]
+2025-06-10 18:44:24,893 - __main__ - INFO - Epoch [27/27], Step [2300/3125], Loss: 2.0490, Perplexity: 7.7599
+Epoch 27 Training:  77%|███████▋  | 2399/3125 [39:41<12:00,  1.01it/s]
+2025-06-10 18:46:04,439 - __main__ - INFO - Epoch [27/27], Step [2400/3125], Loss: 2.0989, Perplexity: 8.1575
+Epoch 27 Training:  80%|███████▉  | 2499/3125 [41:21<10:33,  1.01s/it]
+2025-06-10 18:47:44,059 - __main__ - INFO - Epoch [27/27], Step [2500/3125], Loss: 2.1561, Perplexity: 8.6375
+Epoch 27 Training:  83%|████████▎ | 2599/3125 [43:00<08:38,  1.01it/s]
+2025-06-10 18:49:22,937 - __main__ - INFO - Epoch [27/27], Step [2600/3125], Loss: 2.0021, Perplexity: 7.4043
+Epoch 27 Training:  86%|████████▋ | 2699/3125 [44:38<07:00,  1.01it/s]
+2025-06-10 18:51:01,628 - __main__ - INFO - Epoch [27/27], Step [2700/3125], Loss: 2.1921, Perplexity: 8.9542
+Epoch 27 Training:  90%|████████▉ | 2799/3125 [46:18<05:24,  1.01it/s]
+2025-06-10 18:52:41,490 - __main__ - INFO - Epoch [27/27], Step [2800/3125], Loss: 2.0942, Perplexity: 8.1186
+Epoch 27 Training:  93%|█████████▎| 2899/3125 [47:58<03:52,  1.03s/it]
+2025-06-10 18:54:20,942 - __main__ - INFO - Epoch [27/27], Step [2900/3125], Loss: 2.0986, Perplexity: 8.1548
+Epoch 27 Training:  96%|█████████▌| 2999/3125 [49:37<02:03,  1.02it/s]
+2025-06-10 18:56:00,291 - __main__ - INFO - Epoch [27/27], Step [3000/3125], Loss: 2.0972, Perplexity: 8.1434
+Epoch 27 Training:  99%|█████████▉| 3099/3125 [51:16<00:25,  1.02it/s]
+2025-06-10 18:57:39,235 - __main__ - INFO - Epoch [27/27], Step [3100/3125], Loss: 2.3328, Perplexity: 10.3071
+Epoch 27 Training: 100%|██████████| 3125/3125 [51:42<00:00,  1.01it/s]
+2025-06-10 18:58:04,262 - __main__ - INFO - Epoch 27 Training finished. Avg Loss: 2.1058, Time: 3102.48s
+Validation: 100%|██████████| 391/391 [11:40<00:00,  1.79s/it]
+2025-06-10 19:09:45,225 - __main__ - INFO - Validation Avg Loss: 2.5443, Perplexity: 12.7347
+2025-06-10 19:09:54,459 - __main__ - INFO - Validation BLEU-4: 0.1039
+2025-06-10 19:09:54,460 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 19:09:54,737 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 19:09:55,085 - __main__ - INFO - Python garbage collector run.
+2025-06-10 19:09:55,086 - __main__ - INFO - Training complete.
+2025-06-10 19:09:55,087 - __main__ - INFO - Model Training Complete!

text_files/training_log_1_18.txt ADDED Viewed

	@@ -0,0 +1,668 @@

+2025-06-09 20:01:36,610 - __main__ - INFO - Previous notebook output found at: /kaggle/input/my-image-captioning-model-epochs-1-9/output. Copying to ./output...
+2025-06-09 20:02:22,087 - __main__ - INFO - Previous output copied successfully to current working directory for resumption.
+2025-06-09 20:02:22,088 - __main__ - INFO -
+--- Starting Model Training ---
+2025-06-09 20:02:22,089 - __main__ - INFO - Starting training process...
+2025-06-09 20:02:22,090 - __main__ - INFO - Using device: cuda
+2025-06-09 20:02:22,104 - __main__ - WARNING - Vocabulary source not found at /kaggle/input/vocabulary_s/pytorch/default/1/vocabulary.pkl. Will build new vocabulary.
+2025-06-09 20:02:22,104 - __main__ - INFO - Building new vocabulary from training dataset...
+2025-06-09 20:02:23,082 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+Processing annotations: 100%|██████████| 591753/591753 [05:53<00:00, 1674.75it/s]
+2025-06-09 20:08:16,457 - __main__ - INFO - Dataset size after filtering: 591753 samples.
+2025-06-09 20:08:16,503 - __main__ - INFO - Building vocabulary...
+Counting word frequencies: 100%|██████████| 591753/591753 [00:01<00:00, 342838.45it/s]
+2025-06-09 20:08:18,236 - __main__ - INFO - Vocabulary size: 14030
+2025-06-09 20:08:18,377 - __main__ - INFO - New vocabulary built.
+2025-06-09 20:08:18,388 - __main__ - INFO - Saved newly built vocabulary to ./output/vocabulary.pkl
+2025-06-09 20:08:19,255 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+Processing annotations: 100%|██████████| 591753/591753 [01:01<00:00, 9579.54it/s]
+2025-06-09 20:09:21,207 - __main__ - INFO - Using subset of 200000 samples for the dataset.
+2025-06-09 20:09:21,207 - __main__ - INFO - Dataset size after filtering: 200000 samples.
+2025-06-09 20:09:21,308 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_val2017.json
+Processing annotations: 100%|██████████| 25014/25014 [00:12<00:00, 2065.00it/s]
+2025-06-09 20:09:33,425 - __main__ - INFO - Dataset size after filtering: 25014 samples.
+2025-06-09 20:09:33,426 - __main__ - INFO - Training dataset size: 200000
+2025-06-09 20:09:33,427 - __main__ - INFO - Validation dataset size: 25014
+Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
+100%|██████████| 97.8M/97.8M [00:00<00:00, 182MB/s]
+2025-06-09 20:09:34,636 - __main__ - INFO - ResNet encoder base layers are fine-tuning enabled.
+2025-06-09 20:09:34,982 - __main__ - INFO - Attempting to resume training from: ./output/best_model_bleu0.1019.pth
+2025-06-09 20:09:35,384 - __main__ - INFO - Resumed training from epoch 9. Best validation score so far: 0.1019
+Epoch 10 Training:   3%|▎         | 99/3125 [01:34<48:01,  1.05it/s]
+2025-06-09 20:11:10,913 - __main__ - INFO - Epoch [10/18], Step [100/3125], Loss: 2.6771, Perplexity: 14.5422
+Epoch 10 Training:   6%|▋         | 199/3125 [03:13<48:34,  1.00it/s]
+2025-06-09 20:12:50,072 - __main__ - INFO - Epoch [10/18], Step [200/3125], Loss: 2.5936, Perplexity: 13.3779
+Epoch 10 Training:  10%|▉         | 299/3125 [04:52<47:27,  1.01s/it]
+2025-06-09 20:14:28,848 - __main__ - INFO - Epoch [10/18], Step [300/3125], Loss: 2.7310, Perplexity: 15.3478
+Epoch 10 Training:  13%|█▎        | 399/3125 [06:32<45:05,  1.01it/s]
+2025-06-09 20:16:08,515 - __main__ - INFO - Epoch [10/18], Step [400/3125], Loss: 2.9600, Perplexity: 19.2978
+Epoch 10 Training:  16%|█▌        | 499/3125 [08:11<43:37,  1.00it/s]
+2025-06-09 20:17:47,682 - __main__ - INFO - Epoch [10/18], Step [500/3125], Loss: 2.6822, Perplexity: 14.6177
+Epoch 10 Training:  19%|█▉        | 599/3125 [09:50<41:42,  1.01it/s]
+2025-06-09 20:19:26,696 - __main__ - INFO - Epoch [10/18], Step [600/3125], Loss: 2.7823, Perplexity: 16.1564
+Epoch 10 Training:  22%|██▏       | 699/3125 [11:29<39:36,  1.02it/s]
+2025-06-09 20:21:05,763 - __main__ - INFO - Epoch [10/18], Step [700/3125], Loss: 2.5442, Perplexity: 12.7334
+Epoch 10 Training:  26%|██▌       | 799/3125 [13:08<38:48,  1.00s/it]
+2025-06-09 20:22:44,621 - __main__ - INFO - Epoch [10/18], Step [800/3125], Loss: 2.6036, Perplexity: 13.5120
+Epoch 10 Training:  29%|██▉       | 899/3125 [14:47<36:19,  1.02it/s]
+2025-06-09 20:24:23,734 - __main__ - INFO - Epoch [10/18], Step [900/3125], Loss: 2.4600, Perplexity: 11.7049
+Epoch 10 Training:  32%|███▏      | 999/3125 [16:26<35:25,  1.00it/s]
+2025-06-09 20:26:02,980 - __main__ - INFO - Epoch [10/18], Step [1000/3125], Loss: 2.8091, Perplexity: 16.5947
+Epoch 10 Training:  35%|███▌      | 1099/3125 [18:05<33:06,  1.02it/s]
+2025-06-09 20:27:41,993 - __main__ - INFO - Epoch [10/18], Step [1100/3125], Loss: 2.6680, Perplexity: 14.4108
+Epoch 10 Training:  38%|███▊      | 1199/3125 [19:44<31:27,  1.02it/s]
+2025-06-09 20:29:20,927 - __main__ - INFO - Epoch [10/18], Step [1200/3125], Loss: 2.7080, Perplexity: 14.9991
+Epoch 10 Training:  42%|████▏     | 1299/3125 [21:23<30:11,  1.01it/s]
+2025-06-09 20:30:59,882 - __main__ - INFO - Epoch [10/18], Step [1300/3125], Loss: 2.6393, Perplexity: 14.0032
+Epoch 10 Training:  45%|████▍     | 1399/3125 [23:02<28:14,  1.02it/s]
+2025-06-09 20:32:39,221 - __main__ - INFO - Epoch [10/18], Step [1400/3125], Loss: 2.7220, Perplexity: 15.2114
+Epoch 10 Training:  48%|████▊     | 1499/3125 [24:41<26:35,  1.02it/s]
+2025-06-09 20:34:17,806 - __main__ - INFO - Epoch [10/18], Step [1500/3125], Loss: 2.9185, Perplexity: 18.5139
+Epoch 10 Training:  51%|█████     | 1599/3125 [26:20<24:57,  1.02it/s]
+2025-06-09 20:35:57,128 - __main__ - INFO - Epoch [10/18], Step [1600/3125], Loss: 2.7292, Perplexity: 15.3205
+Epoch 10 Training:  54%|█████▍    | 1699/3125 [27:59<23:47,  1.00s/it]
+2025-06-09 20:37:36,373 - __main__ - INFO - Epoch [10/18], Step [1700/3125], Loss: 2.7339, Perplexity: 15.3932
+Epoch 10 Training:  58%|█████▊    | 1799/3125 [29:39<22:09,  1.00s/it]
+2025-06-09 20:39:15,864 - __main__ - INFO - Epoch [10/18], Step [1800/3125], Loss: 2.4978, Perplexity: 12.1557
+Epoch 10 Training:  61%|██████    | 1899/3125 [31:18<20:28,  1.00s/it]
+2025-06-09 20:40:55,324 - __main__ - INFO - Epoch [10/18], Step [1900/3125], Loss: 2.5602, Perplexity: 12.9386
+Epoch 10 Training:  64%|██████▍   | 1999/3125 [32:58<18:32,  1.01it/s]
+2025-06-09 20:42:35,029 - __main__ - INFO - Epoch [10/18], Step [2000/3125], Loss: 2.6032, Perplexity: 13.5067
+Epoch 10 Training:  67%|██████▋   | 2099/3125 [34:38<17:02,  1.00it/s]
+2025-06-09 20:44:14,464 - __main__ - INFO - Epoch [10/18], Step [2100/3125], Loss: 2.6049, Perplexity: 13.5293
+Epoch 10 Training:  70%|███████   | 2199/3125 [36:17<15:36,  1.01s/it]
+2025-06-09 20:45:53,664 - __main__ - INFO - Epoch [10/18], Step [2200/3125], Loss: 2.8925, Perplexity: 18.0390
+Epoch 10 Training:  74%|███████▎  | 2299/3125 [37:56<13:32,  1.02it/s]
+2025-06-09 20:47:33,241 - __main__ - INFO - Epoch [10/18], Step [2300/3125], Loss: 2.6391, Perplexity: 14.0000
+Epoch 10 Training:  77%|███████▋  | 2399/3125 [39:36<11:56,  1.01it/s]
+2025-06-09 20:49:12,664 - __main__ - INFO - Epoch [10/18], Step [2400/3125], Loss: 2.6457, Perplexity: 14.0939
+Epoch 10 Training:  80%|███████▉  | 2499/3125 [41:15<10:22,  1.01it/s]
+2025-06-09 20:50:51,988 - __main__ - INFO - Epoch [10/18], Step [2500/3125], Loss: 2.6442, Perplexity: 14.0721
+Epoch 10 Training:  83%|████████▎ | 2599/3125 [42:55<08:47,  1.00s/it]
+2025-06-09 20:52:31,912 - __main__ - INFO - Epoch [10/18], Step [2600/3125], Loss: 2.5336, Perplexity: 12.5989
+Epoch 10 Training:  86%|████████▋ | 2699/3125 [44:34<07:09,  1.01s/it]
+2025-06-09 20:54:11,352 - __main__ - INFO - Epoch [10/18], Step [2700/3125], Loss: 2.6224, Perplexity: 13.7686
+Epoch 10 Training:  90%|████████▉ | 2799/3125 [46:14<05:18,  1.02it/s]
+2025-06-09 20:55:50,824 - __main__ - INFO - Epoch [10/18], Step [2800/3125], Loss: 2.5839, Perplexity: 13.2486
+Epoch 10 Training:  93%|█████████▎| 2899/3125 [47:53<03:42,  1.02it/s]
+2025-06-09 20:57:30,088 - __main__ - INFO - Epoch [10/18], Step [2900/3125], Loss: 2.5405, Perplexity: 12.6863
+Epoch 10 Training:  96%|█████████▌| 2999/3125 [49:32<02:04,  1.01it/s]
+2025-06-09 20:59:09,355 - __main__ - INFO - Epoch [10/18], Step [3000/3125], Loss: 2.7648, Perplexity: 15.8761
+Epoch 10 Training:  99%|█████████▉| 3099/3125 [51:12<00:25,  1.03it/s]
+2025-06-09 21:00:48,884 - __main__ - INFO - Epoch [10/18], Step [3100/3125], Loss: 2.7951, Perplexity: 16.3647
+Epoch 10 Training: 100%|██████████| 3125/3125 [51:38<00:00,  1.01it/s]
+2025-06-09 21:01:14,014 - __main__ - INFO - Epoch 10 Training finished. Avg Loss: 2.6342, Time: 3098.63s
+Validation: 100%|██████████| 391/391 [12:13<00:00,  1.88s/it]
+2025-06-09 21:13:27,613 - __main__ - INFO - Validation Avg Loss: 2.5347, Perplexity: 12.6126
+2025-06-09 21:13:40,932 - __main__ - INFO - Validation BLEU-4: 0.1032
+2025-06-09 21:13:41,690 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1032.pth
+2025-06-09 21:13:42,369 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_10.pth
+2025-06-09 21:13:42,369 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-09 21:13:42,654 - __main__ - INFO - CUDA cache emptied.
+2025-06-09 21:13:42,912 - __main__ - INFO - Python garbage collector run.
+Epoch 11 Training:   3%|▎         | 99/3125 [01:42<50:15,  1.00it/s]
+2025-06-09 21:15:26,371 - __main__ - INFO - Epoch [11/18], Step [100/3125], Loss: 2.3691, Perplexity: 10.6873
+Epoch 11 Training:   6%|▋         | 199/3125 [03:22<48:47,  1.00s/it]
+2025-06-09 21:17:06,315 - __main__ - INFO - Epoch [11/18], Step [200/3125], Loss: 2.4723, Perplexity: 11.8502
+Epoch 11 Training:  10%|▉         | 299/3125 [05:01<47:02,  1.00it/s]
+2025-06-09 21:18:45,667 - __main__ - INFO - Epoch [11/18], Step [300/3125], Loss: 2.5049, Perplexity: 12.2420
+Epoch 11 Training:  13%|█▎        | 399/3125 [06:41<45:29,  1.00s/it]
+2025-06-09 21:20:25,533 - __main__ - INFO - Epoch [11/18], Step [400/3125], Loss: 2.5821, Perplexity: 13.2247
+Epoch 11 Training:  16%|█▌        | 499/3125 [08:21<43:37,  1.00it/s]
+2025-06-09 21:22:05,674 - __main__ - INFO - Epoch [11/18], Step [500/3125], Loss: 2.9378, Perplexity: 18.8750
+Epoch 11 Training:  19%|█▉        | 599/3125 [10:01<41:51,  1.01it/s]
+2025-06-09 21:23:45,828 - __main__ - INFO - Epoch [11/18], Step [600/3125], Loss: 2.4629, Perplexity: 11.7391
+Epoch 11 Training:  22%|██▏       | 699/3125 [11:41<40:02,  1.01it/s]
+2025-06-09 21:25:25,654 - __main__ - INFO - Epoch [11/18], Step [700/3125], Loss: 2.6155, Perplexity: 13.6746
+Epoch 11 Training:  26%|██▌       | 799/3125 [13:21<38:43,  1.00it/s]
+2025-06-09 21:27:05,330 - __main__ - INFO - Epoch [11/18], Step [800/3125], Loss: 2.4537, Perplexity: 11.6314
+Epoch 11 Training:  29%|██▉       | 899/3125 [15:01<38:04,  1.03s/it]
+2025-06-09 21:28:45,178 - __main__ - INFO - Epoch [11/18], Step [900/3125], Loss: 2.4852, Perplexity: 12.0030
+Epoch 11 Training:  32%|███▏      | 999/3125 [16:40<34:52,  1.02it/s]
+2025-06-09 21:30:24,859 - __main__ - INFO - Epoch [11/18], Step [1000/3125], Loss: 2.6182, Perplexity: 13.7113
+Epoch 11 Training:  35%|███▌      | 1099/3125 [18:20<33:24,  1.01it/s]
+2025-06-09 21:32:04,811 - __main__ - INFO - Epoch [11/18], Step [1100/3125], Loss: 2.3901, Perplexity: 10.9150
+Epoch 11 Training:  38%|███▊      | 1199/3125 [20:00<31:51,  1.01it/s]
+2025-06-09 21:33:44,653 - __main__ - INFO - Epoch [11/18], Step [1200/3125], Loss: 2.6152, Perplexity: 13.6705
+Epoch 11 Training:  42%|████▏     | 1299/3125 [21:40<30:49,  1.01s/it]
+2025-06-09 21:35:24,568 - __main__ - INFO - Epoch [11/18], Step [1300/3125], Loss: 2.7108, Perplexity: 15.0420
+Epoch 11 Training:  45%|████▍     | 1399/3125 [23:20<28:14,  1.02it/s]
+2025-06-09 21:37:04,192 - __main__ - INFO - Epoch [11/18], Step [1400/3125], Loss: 2.5307, Perplexity: 12.5626
+Epoch 11 Training:  48%|████▊     | 1499/3125 [25:00<27:04,  1.00it/s]
+2025-06-09 21:38:43,960 - __main__ - INFO - Epoch [11/18], Step [1500/3125], Loss: 2.5410, Perplexity: 12.6921
+Epoch 11 Training:  51%|█████     | 1599/3125 [26:39<25:55,  1.02s/it]
+2025-06-09 21:40:23,752 - __main__ - INFO - Epoch [11/18], Step [1600/3125], Loss: 2.5365, Perplexity: 12.6349
+Epoch 11 Training:  54%|█████▍    | 1699/3125 [28:19<23:49,  1.00s/it]
+2025-06-09 21:42:03,735 - __main__ - INFO - Epoch [11/18], Step [1700/3125], Loss: 2.8819, Perplexity: 17.8483
+Epoch 11 Training:  58%|█████▊    | 1799/3125 [29:59<22:08,  1.00s/it]
+2025-06-09 21:43:43,540 - __main__ - INFO - Epoch [11/18], Step [1800/3125], Loss: 2.5707, Perplexity: 13.0754
+Epoch 11 Training:  61%|██████    | 1899/3125 [31:39<20:19,  1.01it/s]
+2025-06-09 21:45:23,225 - __main__ - INFO - Epoch [11/18], Step [1900/3125], Loss: 2.4161, Perplexity: 11.2024
+Epoch 11 Training:  64%|██████▍   | 1999/3125 [33:19<18:37,  1.01it/s]
+2025-06-09 21:47:02,925 - __main__ - INFO - Epoch [11/18], Step [2000/3125], Loss: 2.5705, Perplexity: 13.0728
+Epoch 11 Training:  67%|██████▋   | 2099/3125 [34:58<17:16,  1.01s/it]
+2025-06-09 21:48:42,726 - __main__ - INFO - Epoch [11/18], Step [2100/3125], Loss: 2.4674, Perplexity: 11.7918
+Epoch 11 Training:  70%|███████   | 2199/3125 [36:38<15:29,  1.00s/it]
+2025-06-09 21:50:22,947 - __main__ - INFO - Epoch [11/18], Step [2200/3125], Loss: 2.5938, Perplexity: 13.3808
+Epoch 11 Training:  74%|███████▎  | 2299/3125 [38:18<13:40,  1.01it/s]
+2025-06-09 21:52:02,823 - __main__ - INFO - Epoch [11/18], Step [2300/3125], Loss: 2.6236, Perplexity: 13.7849
+Epoch 11 Training:  77%|███████▋  | 2399/3125 [39:58<12:09,  1.00s/it]
+2025-06-09 21:53:42,643 - __main__ - INFO - Epoch [11/18], Step [2400/3125], Loss: 2.3626, Perplexity: 10.6186
+Epoch 11 Training:  80%|███████▉  | 2499/3125 [41:38<10:29,  1.00s/it]
+2025-06-09 21:55:22,679 - __main__ - INFO - Epoch [11/18], Step [2500/3125], Loss: 2.7776, Perplexity: 16.0800
+Epoch 11 Training:  83%|████████▎ | 2599/3125 [43:18<08:43,  1.00it/s]
+2025-06-09 21:57:02,100 - __main__ - INFO - Epoch [11/18], Step [2600/3125], Loss: 2.3201, Perplexity: 10.1771
+Epoch 11 Training:  86%|████████▋ | 2699/3125 [44:58<07:02,  1.01it/s]
+2025-06-09 21:58:41,997 - __main__ - INFO - Epoch [11/18], Step [2700/3125], Loss: 2.7016, Perplexity: 14.9040
+Epoch 11 Training:  90%|████████▉ | 2799/3125 [46:38<05:25,  1.00it/s]
+2025-06-09 22:00:22,129 - __main__ - INFO - Epoch [11/18], Step [2800/3125], Loss: 2.5685, Perplexity: 13.0467
+Epoch 11 Training:  93%|█████████▎| 2899/3125 [48:18<03:46,  1.00s/it]
+2025-06-09 22:02:02,186 - __main__ - INFO - Epoch [11/18], Step [2900/3125], Loss: 2.5174, Perplexity: 12.3969
+Epoch 11 Training:  96%|█████████▌| 2999/3125 [49:58<02:05,  1.00it/s]
+2025-06-09 22:03:42,251 - __main__ - INFO - Epoch [11/18], Step [3000/3125], Loss: 2.4016, Perplexity: 11.0412
+Epoch 11 Training:  99%|█████████▉| 3099/3125 [51:38<00:25,  1.01it/s]
+2025-06-09 22:05:22,387 - __main__ - INFO - Epoch [11/18], Step [3100/3125], Loss: 2.7063, Perplexity: 14.9733
+Epoch 11 Training: 100%|██████████| 3125/3125 [52:04<00:00,  1.00it/s]
+2025-06-09 22:05:47,454 - __main__ - INFO - Epoch 11 Training finished. Avg Loss: 2.5565, Time: 3124.54s
+Validation: 100%|██████████| 391/391 [13:01<00:00,  2.00s/it]
+2025-06-09 22:18:48,931 - __main__ - INFO - Validation Avg Loss: 2.5207, Perplexity: 12.4378
+2025-06-09 22:18:59,521 - __main__ - INFO - Validation BLEU-4: 0.1024
+2025-06-09 22:18:59,522 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-09 22:18:59,794 - __main__ - INFO - CUDA cache emptied.
+2025-06-09 22:19:00,165 - __main__ - INFO - Python garbage collector run.
+Epoch 12 Training:   3%|▎         | 99/3125 [01:42<49:19,  1.02it/s]
+2025-06-09 22:20:43,554 - __main__ - INFO - Epoch [12/18], Step [100/3125], Loss: 2.4403, Perplexity: 11.4770
+Epoch 12 Training:   6%|▋         | 199/3125 [03:22<48:16,  1.01it/s]
+2025-06-09 22:22:23,419 - __main__ - INFO - Epoch [12/18], Step [200/3125], Loss: 2.3241, Perplexity: 10.2174
+Epoch 12 Training:  10%|▉         | 299/3125 [05:02<46:39,  1.01it/s]
+2025-06-09 22:24:03,388 - __main__ - INFO - Epoch [12/18], Step [300/3125], Loss: 2.3308, Perplexity: 10.2864
+Epoch 12 Training:  13%|█▎        | 399/3125 [06:41<46:12,  1.02s/it]
+2025-06-09 22:25:43,123 - __main__ - INFO - Epoch [12/18], Step [400/3125], Loss: 2.5223, Perplexity: 12.4575
+Epoch 12 Training:  16%|█▌        | 499/3125 [08:22<43:29,  1.01it/s]
+2025-06-09 22:27:23,173 - __main__ - INFO - Epoch [12/18], Step [500/3125], Loss: 2.4984, Perplexity: 12.1631
+Epoch 12 Training:  19%|█▉        | 599/3125 [10:02<42:15,  1.00s/it]
+2025-06-09 22:29:03,434 - __main__ - INFO - Epoch [12/18], Step [600/3125], Loss: 2.4451, Perplexity: 11.5312
+Epoch 12 Training:  22%|██▏       | 699/3125 [11:42<39:33,  1.02it/s]
+2025-06-09 22:30:43,225 - __main__ - INFO - Epoch [12/18], Step [700/3125], Loss: 2.3790, Perplexity: 10.7946
+Epoch 12 Training:  26%|██▌       | 799/3125 [13:22<38:02,  1.02it/s]
+2025-06-09 22:32:23,192 - __main__ - INFO - Epoch [12/18], Step [800/3125], Loss: 2.3688, Perplexity: 10.6844
+Epoch 12 Training:  29%|██▉       | 899/3125 [15:01<36:38,  1.01it/s]
+2025-06-09 22:34:02,897 - __main__ - INFO - Epoch [12/18], Step [900/3125], Loss: 2.5906, Perplexity: 13.3372
+Epoch 12 Training:  32%|███▏      | 999/3125 [16:41<34:54,  1.02it/s]
+2025-06-09 22:35:43,003 - __main__ - INFO - Epoch [12/18], Step [1000/3125], Loss: 2.5290, Perplexity: 12.5412
+Epoch 12 Training:  35%|███▌      | 1099/3125 [18:21<33:36,  1.00it/s]
+2025-06-09 22:37:22,472 - __main__ - INFO - Epoch [12/18], Step [1100/3125], Loss: 2.3719, Perplexity: 10.7179
+Epoch 12 Training:  38%|███▊      | 1199/3125 [20:01<32:32,  1.01s/it]
+2025-06-09 22:39:03,204 - __main__ - INFO - Epoch [12/18], Step [1200/3125], Loss: 2.5641, Perplexity: 12.9886
+Epoch 12 Training:  42%|████▏     | 1299/3125 [21:42<30:36,  1.01s/it]
+2025-06-09 22:40:43,259 - __main__ - INFO - Epoch [12/18], Step [1300/3125], Loss: 2.5392, Perplexity: 12.6697
+Epoch 12 Training:  45%|████▍     | 1399/3125 [23:22<29:17,  1.02s/it]
+2025-06-09 22:42:23,714 - __main__ - INFO - Epoch [12/18], Step [1400/3125], Loss: 2.6697, Perplexity: 14.4351
+Epoch 12 Training:  48%|████▊     | 1499/3125 [25:02<26:46,  1.01it/s]
+2025-06-09 22:44:03,596 - __main__ - INFO - Epoch [12/18], Step [1500/3125], Loss: 2.3967, Perplexity: 10.9870
+Epoch 12 Training:  51%|█████     | 1599/3125 [26:42<24:59,  1.02it/s]
+2025-06-09 22:45:43,674 - __main__ - INFO - Epoch [12/18], Step [1600/3125], Loss: 2.6011, Perplexity: 13.4780
+Epoch 12 Training:  54%|█████▍    | 1699/3125 [28:22<23:49,  1.00s/it]
+2025-06-09 22:47:23,776 - __main__ - INFO - Epoch [12/18], Step [1700/3125], Loss: 2.5272, Perplexity: 12.5179
+Epoch 12 Training:  58%|█████▊    | 1799/3125 [30:02<22:19,  1.01s/it]
+2025-06-09 22:49:03,471 - __main__ - INFO - Epoch [12/18], Step [1800/3125], Loss: 2.3498, Perplexity: 10.4834
+Epoch 12 Training:  61%|██████    | 1899/3125 [31:41<20:06,  1.02it/s]
+2025-06-09 22:50:43,076 - __main__ - INFO - Epoch [12/18], Step [1900/3125], Loss: 2.8234, Perplexity: 16.8332
+Epoch 12 Training:  64%|██████▍   | 1999/3125 [33:21<18:55,  1.01s/it]
+2025-06-09 22:52:22,767 - __main__ - INFO - Epoch [12/18], Step [2000/3125], Loss: 2.1771, Perplexity: 8.8204
+Epoch 12 Training:  67%|██���███▋   | 2099/3125 [35:02<17:22,  1.02s/it]
+2025-06-09 22:54:03,185 - __main__ - INFO - Epoch [12/18], Step [2100/3125], Loss: 2.3431, Perplexity: 10.4136
+Epoch 12 Training:  70%|███████   | 2199/3125 [36:41<15:18,  1.01it/s]
+2025-06-09 22:55:43,082 - __main__ - INFO - Epoch [12/18], Step [2200/3125], Loss: 2.6427, Perplexity: 14.0514
+Epoch 12 Training:  74%|███████▎  | 2299/3125 [38:21<13:42,  1.00it/s]
+2025-06-09 22:57:22,857 - __main__ - INFO - Epoch [12/18], Step [2300/3125], Loss: 2.4172, Perplexity: 11.2148
+Epoch 12 Training:  77%|███████▋  | 2399/3125 [40:01<11:52,  1.02it/s]
+2025-06-09 22:59:02,397 - __main__ - INFO - Epoch [12/18], Step [2400/3125], Loss: 2.7323, Perplexity: 15.3679
+Epoch 12 Training:  80%|███████▉  | 2499/3125 [41:41<10:30,  1.01s/it]
+2025-06-09 23:00:42,439 - __main__ - INFO - Epoch [12/18], Step [2500/3125], Loss: 2.7351, Perplexity: 15.4109
+Epoch 12 Training:  83%|████████▎ | 2599/3125 [43:20<08:37,  1.02it/s]
+2025-06-09 23:02:21,984 - __main__ - INFO - Epoch [12/18], Step [2600/3125], Loss: 2.4624, Perplexity: 11.7333
+Epoch 12 Training:  86%|████████▋ | 2699/3125 [45:00<07:11,  1.01s/it]
+2025-06-09 23:04:02,074 - __main__ - INFO - Epoch [12/18], Step [2700/3125], Loss: 2.4864, Perplexity: 12.0180
+Epoch 12 Training:  90%|████████▉ | 2799/3125 [46:41<05:23,  1.01it/s]
+2025-06-09 23:05:42,197 - __main__ - INFO - Epoch [12/18], Step [2800/3125], Loss: 2.7285, Perplexity: 15.3102
+Epoch 12 Training:  93%|█████████▎| 2899/3125 [48:20<03:45,  1.00it/s]
+2025-06-09 23:07:21,825 - __main__ - INFO - Epoch [12/18], Step [2900/3125], Loss: 2.2980, Perplexity: 9.9542
+Epoch 12 Training:  96%|█████████▌| 2999/3125 [50:00<02:07,  1.01s/it]
+2025-06-09 23:09:02,083 - __main__ - INFO - Epoch [12/18], Step [3000/3125], Loss: 2.5476, Perplexity: 12.7761
+Epoch 12 Training:  99%|█████████▉| 3099/3125 [51:40<00:25,  1.02it/s]
+2025-06-09 23:10:41,765 - __main__ - INFO - Epoch [12/18], Step [3100/3125], Loss: 2.3022, Perplexity: 9.9965
+Epoch 12 Training: 100%|██████████| 3125/3125 [52:06<00:00,  1.00s/it]
+2025-06-09 23:11:06,691 - __main__ - INFO - Epoch 12 Training finished. Avg Loss: 2.4966, Time: 3126.52s
+Validation: 100%|██████████| 391/391 [12:56<00:00,  1.99s/it]
+2025-06-09 23:24:03,561 - __main__ - INFO - Validation Avg Loss: 2.5151, Perplexity: 12.3681
+2025-06-09 23:24:13,805 - __main__ - INFO - Validation BLEU-4: 0.1011
+2025-06-09 23:24:13,806 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-09 23:24:14,070 - __main__ - INFO - CUDA cache emptied.
+2025-06-09 23:24:14,429 - __main__ - INFO - Python garbage collector run.
+Epoch 13 Training:   3%|▎         | 99/3125 [01:42<51:15,  1.02s/it]
+2025-06-09 23:25:58,208 - __main__ - INFO - Epoch [13/18], Step [100/3125], Loss: 2.4482, Perplexity: 11.5679
+Epoch 13 Training:   6%|▋         | 199/3125 [03:22<48:21,  1.01it/s]
+2025-06-09 23:27:37,851 - __main__ - INFO - Epoch [13/18], Step [200/3125], Loss: 2.4197, Perplexity: 11.2422
+Epoch 13 Training:  10%|▉         | 299/3125 [05:02<46:30,  1.01it/s]
+2025-06-09 23:29:17,582 - __main__ - INFO - Epoch [13/18], Step [300/3125], Loss: 2.6092, Perplexity: 13.5885
+Epoch 13 Training:  13%|█▎        | 399/3125 [06:42<45:55,  1.01s/it]
+2025-06-09 23:30:57,708 - __main__ - INFO - Epoch [13/18], Step [400/3125], Loss: 2.3414, Perplexity: 10.3956
+Epoch 13 Training:  16%|█▌        | 499/3125 [08:22<44:21,  1.01s/it]
+2025-06-09 23:32:37,419 - __main__ - INFO - Epoch [13/18], Step [500/3125], Loss: 2.3210, Perplexity: 10.1858
+Epoch 13 Training:  19%|█▉        | 599/3125 [10:02<41:59,  1.00it/s]
+2025-06-09 23:34:17,416 - __main__ - INFO - Epoch [13/18], Step [600/3125], Loss: 2.3739, Perplexity: 10.7388
+Epoch 13 Training:  22%|██▏       | 699/3125 [11:42<39:33,  1.02it/s]
+2025-06-09 23:35:57,481 - __main__ - INFO - Epoch [13/18], Step [700/3125], Loss: 2.5507, Perplexity: 12.8167
+Epoch 13 Training:  26%|██▌       | 799/3125 [13:22<38:21,  1.01it/s]
+2025-06-09 23:37:37,545 - __main__ - INFO - Epoch [13/18], Step [800/3125], Loss: 2.4497, Perplexity: 11.5848
+Epoch 13 Training:  29%|██▉       | 899/3125 [15:01<36:56,  1.00it/s]
+2025-06-09 23:39:17,185 - __main__ - INFO - Epoch [13/18], Step [900/3125], Loss: 2.4555, Perplexity: 11.6527
+Epoch 13 Training:  32%|███▏      | 999/3125 [16:41<36:31,  1.03s/it]
+2025-06-09 23:40:56,979 - __main__ - INFO - Epoch [13/18], Step [1000/3125], Loss: 2.2727, Perplexity: 9.7054
+Epoch 13 Training:  35%|███▌      | 1099/3125 [18:21<33:55,  1.00s/it]
+2025-06-09 23:42:36,830 - __main__ - INFO - Epoch [13/18], Step [1100/3125], Loss: 2.4161, Perplexity: 11.2025
+Epoch 13 Training:  38%|███▊      | 1199/3125 [20:01<32:52,  1.02s/it]
+2025-06-09 23:44:16,954 - __main__ - INFO - Epoch [13/18], Step [1200/3125], Loss: 2.5078, Perplexity: 12.2775
+Epoch 13 Training:  42%|████▏     | 1299/3125 [21:41<30:29,  1.00s/it]
+2025-06-09 23:45:57,440 - __main__ - INFO - Epoch [13/18], Step [1300/3125], Loss: 2.3249, Perplexity: 10.2256
+Epoch 13 Training:  45%|████▍     | 1399/3125 [23:21<28:51,  1.00s/it]
+2025-06-09 23:47:37,061 - __main__ - INFO - Epoch [13/18], Step [1400/3125], Loss: 2.4165, Perplexity: 11.2065
+Epoch 13 Training:  48%|████▊     | 1499/3125 [25:01<27:32,  1.02s/it]
+2025-06-09 23:49:16,690 - __main__ - INFO - Epoch [13/18], Step [1500/3125], Loss: 2.6065, Perplexity: 13.5516
+Epoch 13 Training:  51%|█████     | 1599/3125 [26:41<26:01,  1.02s/it]
+2025-06-09 23:50:57,187 - __main__ - INFO - Epoch [13/18], Step [1600/3125], Loss: 2.4175, Perplexity: 11.2178
+Epoch 13 Training:  54%|█████▍    | 1699/3125 [28:22<23:52,  1.00s/it]
+2025-06-09 23:52:37,586 - __main__ - INFO - Epoch [13/18], Step [1700/3125], Loss: 2.6172, Perplexity: 13.6977
+Epoch 13 Training:  58%|█████▊    | 1799/3125 [30:02<22:25,  1.01s/it]
+2025-06-09 23:54:17,784 - __main__ - INFO - Epoch [13/18], Step [1800/3125], Loss: 2.6982, Perplexity: 14.8536
+Epoch 13 Training:  61%|██████    | 1899/3125 [31:42<21:00,  1.03s/it]
+2025-06-09 23:55:57,904 - __main__ - INFO - Epoch [13/18], Step [1900/3125], Loss: 2.5895, Perplexity: 13.3233
+Epoch 13 Training:  64%|██████▍   | 1999/3125 [33:22<18:36,  1.01it/s]
+2025-06-09 23:57:38,406 - __main__ - INFO - Epoch [13/18], Step [2000/3125], Loss: 2.3281, Perplexity: 10.2589
+Epoch 13 Training:  67%|██████▋   | 2099/3125 [35:02<17:31,  1.03s/it]
+2025-06-09 23:59:18,322 - __main__ - INFO - Epoch [13/18], Step [2100/3125], Loss: 2.4475, Perplexity: 11.5592
+Epoch 13 Training:  70%|███████   | 2199/3125 [36:42<15:18,  1.01it/s]
+2025-06-10 00:00:58,023 - __main__ - INFO - Epoch [13/18], Step [2200/3125], Loss: 2.3961, Perplexity: 10.9799
+Epoch 13 Training:  74%|███████▎  | 2299/3125 [38:22<13:59,  1.02s/it]
+2025-06-10 00:02:38,142 - __main__ - INFO - Epoch [13/18], Step [2300/3125], Loss: 2.2835, Perplexity: 9.8108
+Epoch 13 Training:  77%|███████▋  | 2399/3125 [40:02<12:11,  1.01s/it]
+2025-06-10 00:04:17,990 - __main__ - INFO - Epoch [13/18], Step [2400/3125], Loss: 2.4577, Perplexity: 11.6781
+Epoch 13 Training:  80%|███████▉  | 2499/3125 [41:42<10:38,  1.02s/it]
+2025-06-10 00:05:58,212 - __main__ - INFO - Epoch [13/18], Step [2500/3125], Loss: 2.4646, Perplexity: 11.7588
+Epoch 13 Training:  83%|████████▎ | 2599/3125 [43:22<08:52,  1.01s/it]
+2025-06-10 00:07:37,617 - __main__ - INFO - Epoch [13/18], Step [2600/3125], Loss: 2.4594, Perplexity: 11.6975
+Epoch 13 Training:  86%|████████▋ | 2699/3125 [45:02<07:09,  1.01s/it]
+2025-06-10 00:09:17,938 - __main__ - INFO - Epoch [13/18], Step [2700/3125], Loss: 2.1887, Perplexity: 8.9235
+Epoch 13 Training:  90%|████████▉ | 2799/3125 [46:42<05:24,  1.01it/s]
+2025-06-10 00:10:57,572 - __main__ - INFO - Epoch [13/18], Step [2800/3125], Loss: 2.3643, Perplexity: 10.6365
+Epoch 13 Training:  93%|█████████▎| 2899/3125 [48:21<03:44,  1.01it/s]
+2025-06-10 00:12:37,377 - __main__ - INFO - Epoch [13/18], Step [2900/3125], Loss: 2.3395, Perplexity: 10.3765
+Epoch 13 Training:  96%|█████████▌| 2999/3125 [50:01<02:06,  1.00s/it]
+2025-06-10 00:14:17,341 - __main__ - INFO - Epoch [13/18], Step [3000/3125], Loss: 2.6757, Perplexity: 14.5222
+Epoch 13 Training:  99%|█████████▉| 3099/3125 [51:41<00:25,  1.02it/s]
+2025-06-10 00:15:56,947 - __main__ - INFO - Epoch [13/18], Step [3100/3125], Loss: 2.3261, Perplexity: 10.2375
+Epoch 13 Training: 100%|██████████| 3125/3125 [52:07<00:00,  1.00s/it]
+2025-06-10 00:16:21,710 - __main__ - INFO - Epoch 13 Training finished. Avg Loss: 2.4454, Time: 3127.28s
+Validation: 100%|██████████| 391/391 [11:23<00:00,  1.75s/it]
+2025-06-10 00:27:44,952 - __main__ - INFO - Validation Avg Loss: 2.5155, Perplexity: 12.3724
+2025-06-10 00:27:54,230 - __main__ - INFO - Validation BLEU-4: 0.1028
+2025-06-10 00:27:54,231 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 00:27:54,512 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 00:27:54,845 - __main__ - INFO - Python garbage collector run.
+Epoch 14 Training:   3%|▎         | 99/3125 [01:41<49:13,  1.02it/s]
+2025-06-10 00:29:37,110 - __main__ - INFO - Epoch [14/18], Step [100/3125], Loss: 2.2691, Perplexity: 9.6705
+Epoch 14 Training:   6%|▋         | 199/3125 [03:20<48:25,  1.01it/s]
+2025-06-10 00:31:15,892 - __main__ - INFO - Epoch [14/18], Step [200/3125], Loss: 2.2401, Perplexity: 9.3940
+Epoch 14 Training:  10%|▉         | 299/3125 [04:59<46:23,  1.02it/s]
+2025-06-10 00:32:55,407 - __main__ - INFO - Epoch [14/18], Step [300/3125], Loss: 2.2769, Perplexity: 9.7467
+Epoch 14 Training:  13%|█▎        | 399/3125 [06:38<44:38,  1.02it/s]
+2025-06-10 00:34:34,689 - __main__ - INFO - Epoch [14/18], Step [400/3125], Loss: 2.3194, Perplexity: 10.1698
+Epoch 14 Training:  16%|█▌        | 499/3125 [08:17<43:00,  1.02it/s]
+2025-06-10 00:36:13,668 - __main__ - INFO - Epoch [14/18], Step [500/3125], Loss: 2.3454, Perplexity: 10.4371
+Epoch 14 Training:  19%|█▉        | 599/3125 [09:57<42:10,  1.00s/it]
+2025-06-10 00:37:53,169 - __main__ - INFO - Epoch [14/18], Step [600/3125], Loss: 2.3314, Perplexity: 10.2923
+Epoch 14 Training:  22%|██▏       | 699/3125 [11:36<39:46,  1.02it/s]
+2025-06-10 00:39:32,397 - __main__ - INFO - Epoch [14/18], Step [700/3125], Loss: 2.1478, Perplexity: 8.5664
+Epoch 14 Training:  26%|██▌       | 799/3125 [13:15<38:00,  1.02it/s]
+2025-06-10 00:41:11,424 - __main__ - INFO - Epoch [14/18], Step [800/3125], Loss: 2.4812, Perplexity: 11.9557
+Epoch 14 Training:  29%|██▉       | 899/3125 [14:54<37:12,  1.00s/it]
+2025-06-10 00:42:50,444 - __main__ - INFO - Epoch [14/18], Step [900/3125], Loss: 2.5592, Perplexity: 12.9257
+Epoch 14 Training:  32%|███▏      | 999/3125 [16:34<34:52,  1.02it/s]
+2025-06-10 00:44:29,857 - __main__ - INFO - Epoch [14/18], Step [1000/3125], Loss: 2.4252, Perplexity: 11.3050
+Epoch 14 Training:  35%|███▌      | 1099/3125 [18:13<33:43,  1.00it/s]
+2025-06-10 00:46:09,446 - __main__ - INFO - Epoch [14/18], Step [1100/3125], Loss: 2.4280, Perplexity: 11.3366
+Epoch 14 Training:  38%|███▊      | 1199/3125 [19:52<31:57,  1.00it/s]
+2025-06-10 00:47:48,452 - __main__ - INFO - Epoch [14/18], Step [1200/3125], Loss: 2.2805, Perplexity: 9.7818
+Epoch 14 Training:  42%|████▏     | 1299/3125 [21:32<30:13,  1.01it/s]
+2025-06-10 00:49:27,846 - __main__ - INFO - Epoch [14/18], Step [1300/3125], Loss: 2.3755, Perplexity: 10.7560
+Epoch 14 Training:  45%|████▍     | 1399/3125 [23:11<28:21,  1.01it/s]
+2025-06-10 00:51:07,151 - __main__ - INFO - Epoch [14/18], Step [1400/3125], Loss: 2.4423, Perplexity: 11.4991
+Epoch 14 Training:  48%|████▊     | 1499/3125 [24:49<26:28,  1.02it/s]
+2025-06-10 00:52:45,875 - __main__ - INFO - Epoch [14/18], Step [1500/3125], Loss: 2.7027, Perplexity: 14.9202
+Epoch 14 Training:  51%|█████     | 1599/3125 [26:29<25:11,  1.01it/s]
+2025-06-10 00:54:24,994 - __main__ - INFO - Epoch [14/18], Step [1600/3125], Loss: 2.5307, Perplexity: 12.5619
+Epoch 14 Training:  54%|█████▍    | 1699/3125 [28:08<23:33,  1.01it/s]
+2025-06-10 00:56:04,133 - __main__ - INFO - Epoch [14/18], Step [1700/3125], Loss: 2.6793, Perplexity: 14.5745
+Epoch 14 Training:  58%|█████▊    | 1799/3125 [29:47<21:40,  1.02it/s]
+2025-06-10 00:57:43,590 - __main__ - INFO - Epoch [14/18], Step [1800/3125], Loss: 2.3497, Perplexity: 10.4828
+Epoch 14 Training:  61%|██████    | 1899/3125 [31:27<20:19,  1.01it/s]
+2025-06-10 00:59:23,072 - __main__ - INFO - Epoch [14/18], Step [1900/3125], Loss: 2.2267, Perplexity: 9.2696
+Epoch 14 Training:  64%|██████▍   | 1999/3125 [33:06<18:50,  1.00s/it]
+2025-06-10 01:01:02,177 - __main__ - INFO - Epoch [14/18], Step [2000/3125], Loss: 2.1601, Perplexity: 8.6721
+Epoch 14 Training:  67%|██████▋   | 2099/3125 [34:45<16:47,  1.02it/s]
+2025-06-10 01:02:41,309 - __main__ - INFO - Epoch [14/18], Step [2100/3125], Loss: 2.3438, Perplexity: 10.4212
+Epoch 14 Training:  70%|███████   | 2199/3125 [36:24<15:26,  1.00s/it]
+2025-06-10 01:04:20,695 - __main__ - INFO - Epoch [14/18], Step [2200/3125], Loss: 2.3716, Perplexity: 10.7140
+Epoch 14 Training:  74%|███████▎  | 2299/3125 [38:04<13:56,  1.01s/it]
+2025-06-10 01:06:00,048 - __main__ - INFO - Epoch [14/18], Step [2300/3125], Loss: 2.4434, Perplexity: 11.5116
+Epoch 14 Training:  77%|███████▋  | 2399/3125 [39:42<12:12,  1.01s/it]
+2025-06-10 01:07:38,643 - __main__ - INFO - Epoch [14/18], Step [2400/3125], Loss: 2.5465, Perplexity: 12.7625
+Epoch 14 Training:  80%|███████▉  | 2499/3125 [41:22<10:19,  1.01it/s]
+2025-06-10 01:09:18,224 - __main__ - INFO - Epoch [14/18], Step [2500/3125], Loss: 2.2639, Perplexity: 9.6206
+Epoch 14 Training:  83%|████████▎ | 2599/3125 [43:01<08:31,  1.03it/s]
+2025-06-10 01:10:57,528 - __main__ - INFO - Epoch [14/18], Step [2600/3125], Loss: 2.4817, Perplexity: 11.9613
+Epoch 14 Training:  86%|████████▋ | 2699/3125 [44:41<07:03,  1.01it/s]
+2025-06-10 01:12:36,994 - __main__ - INFO - Epoch [14/18], Step [2700/3125], Loss: 2.5853, Perplexity: 13.2677
+Epoch 14 Training:  90%|████████▉ | 2799/3125 [46:20<05:26,  1.00s/it]
+2025-06-10 01:14:16,565 - __main__ - INFO - Epoch [14/18], Step [2800/3125], Loss: 2.4660, Perplexity: 11.7758
+Epoch 14 Training:  93%|█████████▎| 2899/3125 [47:59<03:42,  1.02it/s]
+2025-06-10 01:15:55,419 - __main__ - INFO - Epoch [14/18], Step [2900/3125], Loss: 2.2190, Perplexity: 9.1977
+Epoch 14 Training:  96%|█████████▌| 2999/3125 [49:38<02:06,  1.00s/it]
+2025-06-10 01:17:34,545 - __main__ - INFO - Epoch [14/18], Step [3000/3125], Loss: 2.3681, Perplexity: 10.6775
+Epoch 14 Training:  99%|█████████▉| 3099/3125 [51:18<00:26,  1.01s/it]
+2025-06-10 01:19:14,006 - __main__ - INFO - Epoch [14/18], Step [3100/3125], Loss: 2.3602, Perplexity: 10.5930
+Epoch 14 Training: 100%|██████████| 3125/3125 [51:44<00:00,  1.01it/s]
+2025-06-10 01:19:39,252 - __main__ - INFO - Epoch 14 Training finished. Avg Loss: 2.4006, Time: 3104.41s
+Validation: 100%|██████████| 391/391 [11:33<00:00,  1.77s/it]
+2025-06-10 01:31:12,588 - __main__ - INFO - Validation Avg Loss: 2.5215, Perplexity: 12.4476
+2025-06-10 01:31:21,839 - __main__ - INFO - Validation BLEU-4: 0.1034
+2025-06-10 01:31:22,515 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1034.pth
+2025-06-10 01:31:22,516 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 01:31:22,796 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 01:31:23,124 - __main__ - INFO - Python garbage collector run.
+Epoch 15 Training:   3%|▎         | 99/3125 [01:41<49:40,  1.02it/s]
+2025-06-10 01:33:05,501 - __main__ - INFO - Epoch [15/18], Step [100/3125], Loss: 2.4716, Perplexity: 11.8419
+Epoch 15 Training:   6%|▋         | 199/3125 [03:20<47:52,  1.02it/s]
+2025-06-10 01:34:44,670 - __main__ - INFO - Epoch [15/18], Step [200/3125], Loss: 2.2404, Perplexity: 9.3972
+Epoch 15 Training:  10%|▉         | 299/3125 [04:59<46:22,  1.02it/s]
+2025-06-10 01:36:23,538 - __main__ - INFO - Epoch [15/18], Step [300/3125], Loss: 2.2223, Perplexity: 9.2283
+Epoch 15 Training:  13%|█▎        | 399/3125 [06:38<45:28,  1.00s/it]
+2025-06-10 01:38:02,777 - __main__ - INFO - Epoch [15/18], Step [400/3125], Loss: 2.2692, Perplexity: 9.6715
+Epoch 15 Training:  16%|█▌        | 499/3125 [08:18<43:07,  1.01it/s]
+2025-06-10 01:39:42,121 - __main__ - INFO - Epoch [15/18], Step [500/3125], Loss: 2.3727, Perplexity: 10.7258
+Epoch 15 Training:  19%|█▉        | 599/3125 [09:57<41:57,  1.00it/s]
+2025-06-10 01:41:21,548 - __main__ - INFO - Epoch [15/18], Step [600/3125], Loss: 2.3680, Perplexity: 10.6762
+Epoch 15 Training:  22%|██▏       | 699/3125 [11:36<39:54,  1.01it/s]
+2025-06-10 01:43:00,303 - __main__ - INFO - Epoch [15/18], Step [700/3125], Loss: 2.2971, Perplexity: 9.9450
+Epoch 15 Training:  26%|██▌       | 799/3125 [13:15<37:38,  1.03it/s]
+2025-06-10 01:44:39,409 - __main__ - INFO - Epoch [15/18], Step [800/3125], Loss: 2.2048, Perplexity: 9.0680
+Epoch 15 Training:  29%|██▉       | 899/3125 [14:54<36:39,  1.01it/s]
+2025-06-10 01:46:19,040 - __main__ - INFO - Epoch [15/18], Step [900/3125], Loss: 2.6075, Perplexity: 13.5645
+Epoch 15 Training:  32%|███▏      | 999/3125 [16:34<35:14,  1.01it/s]
+2025-06-10 01:47:58,449 - __main__ - INFO - Epoch [15/18], Step [1000/3125], Loss: 2.4252, Perplexity: 11.3049
+Epoch 15 Training:  35%|███▌      | 1099/3125 [18:12<33:25,  1.01it/s]
+2025-06-10 01:49:37,039 - __main__ - INFO - Epoch [15/18], Step [1100/3125], Loss: 2.4678, Perplexity: 11.7969
+Epoch 15 Training:  38%|███▊      | 1199/3125 [19:52<32:01,  1.00it/s]
+2025-06-10 01:51:16,217 - __main__ - INFO - Epoch [15/18], Step [1200/3125], Loss: 2.4770, Perplexity: 11.9052
+Epoch 15 Training:  42%|████▏     | 1299/3125 [21:31<30:05,  1.01it/s]
+2025-06-10 01:52:55,362 - __main__ - INFO - Epoch [15/18], Step [1300/3125], Loss: 2.6406, Perplexity: 14.0220
+Epoch 15 Training:  45%|████▍     | 1399/3125 [23:09<28:14,  1.02it/s]
+2025-06-10 01:54:34,083 - __main__ - INFO - Epoch [15/18], Step [1400/3125], Loss: 2.5408, Perplexity: 12.6903
+Epoch 15 Training:  48%|████▊     | 1499/3125 [24:49<26:55,  1.01it/s]
+2025-06-10 01:56:13,541 - __main__ - INFO - Epoch [15/18], Step [1500/3125], Loss: 2.2871, Perplexity: 9.8462
+Epoch 15 Training:  51%|█████     | 1599/3125 [26:28<25:24,  1.00it/s]
+2025-06-10 01:57:52,988 - __main__ - INFO - Epoch [15/18], Step [1600/3125], Loss: 2.2426, Perplexity: 9.4179
+Epoch 15 Training:  54%|█████▍    | 1699/3125 [28:08<23:48,  1.00s/it]
+2025-06-10 01:59:32,458 - __main__ - INFO - Epoch [15/18], Step [1700/3125], Loss: 2.4471, Perplexity: 11.5546
+Epoch 15 Training:  58%|█████▊    | 1799/3125 [29:47<22:07,  1.00s/it]
+2025-06-10 02:01:11,761 - __main__ - INFO - Epoch [15/18], Step [1800/3125], Loss: 2.2109, Perplexity: 9.1242
+Epoch 15 Training:  61%|██████    | 1899/3125 [31:27<19:57,  1.02it/s]
+2025-06-10 02:02:51,374 - __main__ - INFO - Epoch [15/18], Step [1900/3125], Loss: 2.2128, Perplexity: 9.1415
+Epoch 15 Training:  64%|██████▍   | 1999/3125 [33:06<18:27,  1.02it/s]
+2025-06-10 02:04:30,402 - __main__ - INFO - Epoch [15/18], Step [2000/3125], Loss: 2.4859, Perplexity: 12.0115
+Epoch 15 Training:  67%|██████▋   | 2099/3125 [34:45<16:45,  1.02it/s]
+2025-06-10 02:06:09,201 - __main__ - INFO - Epoch [15/18], Step [2100/3125], Loss: 2.1893, Perplexity: 8.9290
+Epoch 15 Training:  70%|███████   | 2199/3125 [36:24<15:26,  1.00s/it]
+2025-06-10 02:07:48,565 - __main__ - INFO - Epoch [15/18], Step [2200/3125], Loss: 2.3444, Perplexity: 10.4273
+Epoch 15 Training:  74%|███████▎  | 2299/3125 [38:03<13:51,  1.01s/it]
+2025-06-10 02:09:27,850 - __main__ - INFO - Epoch [15/18], Step [2300/3125], Loss: 2.3961, Perplexity: 10.9804
+Epoch 15 Training:  77%|███████▋  | 2399/3125 [39:42<11:59,  1.01it/s]
+2025-06-10 02:11:06,898 - __main__ - INFO - Epoch [15/18], Step [2400/3125], Loss: 2.4591, Perplexity: 11.6937
+Epoch 15 Training:  80%|███████▉  | 2499/3125 [41:22<10:09,  1.03it/s]
+2025-06-10 02:12:46,565 - __main__ - INFO - Epoch [15/18], Step [2500/3125], Loss: 2.3786, Perplexity: 10.7898
+Epoch 15 Training:  83%|████████▎ | 2599/3125 [43:01<08:43,  1.00it/s]
+2025-06-10 02:14:25,999 - __main__ - INFO - Epoch [15/18], Step [2600/3125], Loss: 2.2436, Perplexity: 9.4268
+Epoch 15 Training:  86%|████████▋ | 2699/3125 [44:41<07:04,  1.00it/s]
+2025-06-10 02:16:05,214 - __main__ - INFO - Epoch [15/18], Step [2700/3125], Loss: 2.5708, Perplexity: 13.0756
+Epoch 15 Training:  90%|████████▉ | 2799/3125 [46:20<05:32,  1.02s/it]
+2025-06-10 02:17:44,978 - __main__ - INFO - Epoch [15/18], Step [2800/3125], Loss: 2.3898, Perplexity: 10.9116
+Epoch 15 Training:  93%|█████████▎| 2899/3125 [48:00<03:44,  1.01it/s]
+2025-06-10 02:19:24,486 - __main__ - INFO - Epoch [15/18], Step [2900/3125], Loss: 2.4656, Perplexity: 11.7705
+Epoch 15 Training:  96%|█████████▌| 2999/3125 [49:39<02:03,  1.02it/s]
+2025-06-10 02:21:03,418 - __main__ - INFO - Epoch [15/18], Step [3000/3125], Loss: 2.3119, Perplexity: 10.0933
+Epoch 15 Training:  99%|█████████▉| 3099/3125 [51:18<00:25,  1.02it/s]
+2025-06-10 02:22:42,495 - __main__ - INFO - Epoch [15/18], Step [3100/3125], Loss: 2.3714, Perplexity: 10.7121
+Epoch 15 Training: 100%|██████████| 3125/3125 [51:44<00:00,  1.01it/s]
+2025-06-10 02:23:07,613 - __main__ - INFO - Epoch 15 Training finished. Avg Loss: 2.3624, Time: 3104.49s
+Validation: 100%|██████████| 391/391 [12:00<00:00,  1.84s/it]
+2025-06-10 02:35:07,702 - __main__ - INFO - Validation Avg Loss: 2.5179, Perplexity: 12.4026
+2025-06-10 02:35:17,310 - __main__ - INFO - Validation BLEU-4: 0.1042
+2025-06-10 02:35:17,952 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1042.pth
+2025-06-10 02:35:18,572 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_15.pth
+2025-06-10 02:35:18,573 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 02:35:18,838 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 02:35:19,180 - __main__ - INFO - Python garbage collector run.
+Epoch 16 Training:   3%|▎         | 99/3125 [01:41<50:16,  1.00it/s]
+2025-06-10 02:37:01,394 - __main__ - INFO - Epoch [16/18], Step [100/3125], Loss: 2.3317, Perplexity: 10.2956
+Epoch 16 Training:   6%|▋         | 199/3125 [03:21<48:24,  1.01it/s]
+2025-06-10 02:38:41,260 - __main__ - INFO - Epoch [16/18], Step [200/3125], Loss: 2.3495, Perplexity: 10.4806
+Epoch 16 Training:  10%|▉         | 299/3125 [05:00<47:30,  1.01s/it]
+2025-06-10 02:40:20,729 - __main__ - INFO - Epoch [16/18], Step [300/3125], Loss: 2.2306, Perplexity: 9.3058
+Epoch 16 Training:  13%|█▎        | 399/3125 [06:40<45:19,  1.00it/s]
+2025-06-10 02:42:00,751 - __main__ - INFO - Epoch [16/18], Step [400/3125], Loss: 2.3325, Perplexity: 10.3034
+Epoch 16 Training:  16%|█▌        | 499/3125 [08:19<43:00,  1.02it/s]
+2025-06-10 02:43:39,918 - __main__ - INFO - Epoch [16/18], Step [500/3125], Loss: 2.1694, Perplexity: 8.7528
+Epoch 16 Training:  19%|█▉        | 599/3125 [09:59<41:21,  1.02it/s]
+2025-06-10 02:45:19,186 - __main__ - INFO - Epoch [16/18], Step [600/3125], Loss: 2.3946, Perplexity: 10.9643
+Epoch 16 Training:  22%|██▏       | 699/3125 [11:38<40:18,  1.00it/s]
+2025-06-10 02:46:58,656 - __main__ - INFO - Epoch [16/18], Step [700/3125], Loss: 2.3674, Perplexity: 10.6700
+Epoch 16 Training:  26%|██▌       | 799/3125 [13:17<38:25,  1.01it/s]
+2025-06-10 02:48:37,469 - __main__ - INFO - Epoch [16/18], Step [800/3125], Loss: 2.0632, Perplexity: 7.8708
+Epoch 16 Training:  29%|██▉       | 899/3125 [14:57<37:35,  1.01s/it]
+2025-06-10 02:50:17,600 - __main__ - INFO - Epoch [16/18], Step [900/3125], Loss: 2.2940, Perplexity: 9.9146
+Epoch 16 Training:  32%|███▏      | 999/3125 [16:37<34:59,  1.01it/s]
+2025-06-10 02:51:57,234 - __main__ - INFO - Epoch [16/18], Step [1000/3125], Loss: 2.2395, Perplexity: 9.3891
+Epoch 16 Training:  35%|███▌      | 1099/3125 [18:16<33:56,  1.01s/it]
+2025-06-10 02:53:37,095 - __main__ - INFO - Epoch [16/18], Step [1100/3125], Loss: 2.4083, Perplexity: 11.1150
+Epoch 16 Training:  38%|███▊      | 1199/3125 [19:56<31:49,  1.01it/s]
+2025-06-10 02:55:16,212 - __main__ - INFO - Epoch [16/18], Step [1200/3125], Loss: 2.3857, Perplexity: 10.8667
+Epoch 16 Training:  42%|████▏     | 1299/3125 [21:35<30:20,  1.00it/s]
+2025-06-10 02:56:55,657 - __main__ - INFO - Epoch [16/18], Step [1300/3125], Loss: 2.3296, Perplexity: 10.2736
+Epoch 16 Training:  45%|████▍     | 1399/3125 [23:15<28:18,  1.02it/s]
+2025-06-10 02:58:35,348 - __main__ - INFO - Epoch [16/18], Step [1400/3125], Loss: 2.1133, Perplexity: 8.2756
+Epoch 16 Training:  48%|████▊     | 1499/3125 [24:54<26:23,  1.03it/s]
+2025-06-10 03:00:14,587 - __main__ - INFO - Epoch [16/18], Step [1500/3125], Loss: 2.3756, Perplexity: 10.7575
+Epoch 16 Training:  51%|█████     | 1599/3125 [26:33<25:10,  1.01it/s]
+2025-06-10 03:01:53,838 - __main__ - INFO - Epoch [16/18], Step [1600/3125], Loss: 2.3544, Perplexity: 10.5314
+Epoch 16 Training:  54%|█████▍    | 1699/3125 [28:13<23:56,  1.01s/it]
+2025-06-10 03:03:33,780 - __main__ - INFO - Epoch [16/18], Step [1700/3125], Loss: 2.3411, Perplexity: 10.3928
+Epoch 16 Training:  58%|█████▊    | 1799/3125 [29:53<22:03,  1.00it/s]
+2025-06-10 03:05:13,555 - __main__ - INFO - Epoch [16/18], Step [1800/3125], Loss: 2.2282, Perplexity: 9.2828
+Epoch 16 Training:  61%|██████    | 1899/3125 [31:33<20:17,  1.01it/s]
+2025-06-10 03:06:53,528 - __main__ - INFO - Epoch [16/18], Step [1900/3125], Loss: 2.3572, Perplexity: 10.5609
+Epoch 16 Training:  64%|██████▍   | 1999/3125 [33:13<18:40,  1.00it/s]
+2025-06-10 03:08:33,299 - __main__ - INFO - Epoch [16/18], Step [2000/3125], Loss: 2.2650, Perplexity: 9.6316
+Epoch 16 Training:  67%|██████▋   | 2099/3125 [34:52<16:49,  1.02it/s]
+2025-06-10 03:10:12,860 - __main__ - INFO - Epoch [16/18], Step [2100/3125], Loss: 2.4694, Perplexity: 11.8158
+Epoch 16 Training:  70%|███████   | 2199/3125 [36:32<15:30,  1.00s/it]
+2025-06-10 03:11:52,351 - __main__ - INFO - Epoch [16/18], Step [2200/3125], Loss: 2.2784, Perplexity: 9.7614
+Epoch 16 Training:  74%|███████▎  | 2299/3125 [38:11<13:41,  1.01it/s]
+2025-06-10 03:13:31,646 - __main__ - INFO - Epoch [16/18], Step [2300/3125], Loss: 2.3814, Perplexity: 10.8201
+Epoch 16 Training:  77%|███████▋  | 2399/3125 [39:51<12:05,  1.00it/s]
+2025-06-10 03:15:11,562 - __main__ - INFO - Epoch [16/18], Step [2400/3125], Loss: 2.3831, Perplexity: 10.8388
+Epoch 16 Training:  80%|███████▉  | 2499/3125 [41:30<10:24,  1.00it/s]
+2025-06-10 03:16:51,094 - __main__ - INFO - Epoch [16/18], Step [2500/3125], Loss: 2.2298, Perplexity: 9.2982
+Epoch 16 Training:  83%|████████▎ | 2599/3125 [43:10<08:46,  1.00s/it]
+2025-06-10 03:18:30,989 - __main__ - INFO - Epoch [16/18], Step [2600/3125], Loss: 2.4608, Perplexity: 11.7146
+Epoch 16 Training:  86%|████████▋ | 2699/3125 [44:50<07:06,  1.00s/it]
+2025-06-10 03:20:10,419 - __main__ - INFO - Epoch [16/18], Step [2700/3125], Loss: 2.4152, Perplexity: 11.1916
+Epoch 16 Training:  90%|████████▉ | 2799/3125 [46:29<05:25,  1.00it/s]
+2025-06-10 03:21:49,872 - __main__ - INFO - Epoch [16/18], Step [2800/3125], Loss: 2.4860, Perplexity: 12.0134
+Epoch 16 Training:  93%|█████████▎| 2899/3125 [48:09<03:41,  1.02it/s]
+2025-06-10 03:23:29,269 - __main__ - INFO - Epoch [16/18], Step [2900/3125], Loss: 2.1620, Perplexity: 8.6885
+Epoch 16 Training:  96%|█████████▌| 2999/3125 [49:48<02:05,  1.01it/s]
+2025-06-10 03:25:09,144 - __main__ - INFO - Epoch [16/18], Step [3000/3125], Loss: 2.6663, Perplexity: 14.3864
+Epoch 16 Training:  99%|█████████▉| 3099/3125 [51:28<00:26,  1.00s/it]
+2025-06-10 03:26:48,695 - __main__ - INFO - Epoch [16/18], Step [3100/3125], Loss: 2.5140, Perplexity: 12.3541
+Epoch 16 Training: 100%|██████████| 3125/3125 [51:54<00:00,  1.00it/s]
+2025-06-10 03:27:14,007 - __main__ - INFO - Epoch 16 Training finished. Avg Loss: 2.3264, Time: 3114.82s
+Validation: 100%|██████████| 391/391 [12:06<00:00,  1.86s/it]
+2025-06-10 03:39:20,377 - __main__ - INFO - Validation Avg Loss: 2.5298, Perplexity: 12.5513
+2025-06-10 03:39:30,111 - __main__ - INFO - Validation BLEU-4: 0.1047
+2025-06-10 03:39:30,777 - __main__ - INFO - Saved best model checkpoint to ./output/best_model_bleu0.1047.pth
+2025-06-10 03:39:30,778 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 03:39:31,073 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 03:39:31,413 - __main__ - INFO - Python garbage collector run.
+Epoch 17 Training:   3%|▎         | 99/3125 [01:41<49:33,  1.02it/s]
+2025-06-10 03:41:13,434 - __main__ - INFO - Epoch [17/18], Step [100/3125], Loss: 2.1717, Perplexity: 8.7730
+Epoch 17 Training:   6%|▋         | 199/3125 [03:20<47:53,  1.02it/s]
+2025-06-10 03:42:52,934 - __main__ - INFO - Epoch [17/18], Step [200/3125], Loss: 2.2953, Perplexity: 9.9273
+Epoch 17 Training:  10%|▉         | 299/3125 [05:00<46:40,  1.01it/s]
+2025-06-10 03:44:32,435 - __main__ - INFO - Epoch [17/18], Step [300/3125], Loss: 2.1929, Perplexity: 8.9608
+Epoch 17 Training:  13%|█▎        | 399/3125 [06:38<44:54,  1.01it/s]
+2025-06-10 03:46:11,321 - __main__ - INFO - Epoch [17/18], Step [400/3125], Loss: 2.1077, Perplexity: 8.2291
+Epoch 17 Training:  16%|█▌        | 499/3125 [08:18<44:24,  1.01s/it]
+2025-06-10 03:47:50,763 - __main__ - INFO - Epoch [17/18], Step [500/3125], Loss: 2.3065, Perplexity: 10.0394
+Epoch 17 Training:  19%|█▉        | 599/3125 [09:57<41:07,  1.02it/s]
+2025-06-10 03:49:29,973 - __main__ - INFO - Epoch [17/18], Step [600/3125], Loss: 2.0144, Perplexity: 7.4962
+Epoch 17 Training:  22%|██▏       | 699/3125 [11:36<40:13,  1.01it/s]
+2025-06-10 03:51:08,910 - __main__ - INFO - Epoch [17/18], Step [700/3125], Loss: 2.4351, Perplexity: 11.4164
+Epoch 17 Training:  26%|██▌       | 799/3125 [13:15<38:10,  1.02it/s]
+2025-06-10 03:52:47,480 - __main__ - INFO - Epoch [17/18], Step [800/3125], Loss: 2.3415, Perplexity: 10.3965
+Epoch 17 Training:  29%|██▉       | 899/3125 [14:54<36:43,  1.01it/s]
+2025-06-10 03:54:26,563 - __main__ - INFO - Epoch [17/18], Step [900/3125], Loss: 2.1439, Perplexity: 8.5324
+Epoch 17 Training:  32%|███▏      | 999/3125 [16:34<36:04,  1.02s/it]
+2025-06-10 03:56:06,565 - __main__ - INFO - Epoch [17/18], Step [1000/3125], Loss: 2.4303, Perplexity: 11.3623
+Epoch 17 Training:  35%|███▌      | 1099/3125 [18:13<33:49,  1.00s/it]
+2025-06-10 03:57:46,160 - __main__ - INFO - Epoch [17/18], Step [1100/3125], Loss: 2.2407, Perplexity: 9.3998
+Epoch 17 Training:  38%|███▊      | 1199/3125 [19:53<31:51,  1.01it/s]
+2025-06-10 03:59:25,975 - __main__ - INFO - Epoch [17/18], Step [1200/3125], Loss: 2.2660, Perplexity: 9.6408
+Epoch 17 Training:  42%|████▏     | 1299/3125 [21:33<30:11,  1.01it/s]
+2025-06-10 04:01:05,823 - __main__ - INFO - Epoch [17/18], Step [1300/3125], Loss: 2.2952, Perplexity: 9.9262
+Epoch 17 Training:  45%|████▍     | 1399/3125 [23:12<28:26,  1.01it/s]
+2025-06-10 04:02:45,094 - __main__ - INFO - Epoch [17/18], Step [1400/3125], Loss: 2.4064, Perplexity: 11.0944
+Epoch 17 Training:  48%|████▊     | 1499/3125 [24:52<27:26,  1.01s/it]
+2025-06-10 04:04:24,506 - __main__ - INFO - Epoch [17/18], Step [1500/3125], Loss: 2.5295, Perplexity: 12.5467
+Epoch 17 Training:  51%|█████     | 1599/3125 [26:31<25:00,  1.02it/s]
+2025-06-10 04:06:03,514 - __main__ - INFO - Epoch [17/18], Step [1600/3125], Loss: 2.1637, Perplexity: 8.7030
+Epoch 17 Training:  54%|█████▍    | 1699/3125 [28:10<23:23,  1.02it/s]
+2025-06-10 04:07:43,189 - __main__ - INFO - Epoch [17/18], Step [1700/3125], Loss: 2.3608, Perplexity: 10.5998
+Epoch 17 Training:  58%|█████▊    | 1799/3125 [29:50<21:51,  1.01it/s]
+2025-06-10 04:09:22,526 - __main__ - INFO - Epoch [17/18], Step [1800/3125], Loss: 2.1197, Perplexity: 8.3285
+Epoch 17 Training:  61%|██████    | 1899/3125 [31:29<20:36,  1.01s/it]
+2025-06-10 04:11:01,558 - __main__ - INFO - Epoch [17/18], Step [1900/3125], Loss: 2.2632, Perplexity: 9.6134
+Epoch 17 Training:  64%|██████▍   | 1999/3125 [33:08<18:50,  1.00s/it]
+2025-06-10 04:12:40,863 - __main__ - INFO - Epoch [17/18], Step [2000/3125], Loss: 2.0741, Perplexity: 7.9572
+Epoch 17 Training:  67%|██████▋   | 2099/3125 [34:47<16:58,  1.01it/s]
+2025-06-10 04:14:20,125 - __main__ - INFO - Epoch [17/18], Step [2100/3125], Loss: 2.2237, Perplexity: 9.2414
+Epoch 17 Training:  70%|███████   | 2199/3125 [36:27<15:12,  1.01it/s]
+2025-06-10 04:15:59,690 - __main__ - INFO - Epoch [17/18], Step [2200/3125], Loss: 2.1895, Perplexity: 8.9311
+Epoch 17 Training:  74%|███████▎  | 2299/3125 [38:06<13:45,  1.00it/s]
+2025-06-10 04:17:39,132 - __main__ - INFO - Epoch [17/18], Step [2300/3125], Loss: 2.3708, Perplexity: 10.7057
+Epoch 17 Training:  77%|███████▋  | 2399/3125 [39:46<11:50,  1.02it/s]
+2025-06-10 04:19:18,650 - __main__ - INFO - Epoch [17/18], Step [2400/3125], Loss: 2.2712, Perplexity: 9.6909
+Epoch 17 Training:  80%|███████▉  | 2499/3125 [41:25<10:20,  1.01it/s]
+2025-06-10 04:20:58,163 - __main__ - INFO - Epoch [17/18], Step [2500/3125], Loss: 2.3275, Perplexity: 10.2521
+Epoch 17 Training:  83%|████████▎ | 2599/3125 [43:04<08:34,  1.02it/s]
+2025-06-10 04:22:37,221 - __main__ - INFO - Epoch [17/18], Step [2600/3125], Loss: 2.1308, Perplexity: 8.4219
+Epoch 17 Training:  86%|████████▋ | 2699/3125 [44:43<06:55,  1.02it/s]
+2025-06-10 04:24:16,203 - __main__ - INFO - Epoch [17/18], Step [2700/3125], Loss: 2.3833, Perplexity: 10.8407
+Epoch 17 Training:  90%|████████▉ | 2799/3125 [46:22<05:26,  1.00s/it]
+2025-06-10 04:25:55,437 - __main__ - INFO - Epoch [17/18], Step [2800/3125], Loss: 2.4461, Perplexity: 11.5430
+Epoch 17 Training:  93%|█████████▎| 2899/3125 [48:02<03:44,  1.01it/s]
+2025-06-10 04:27:34,464 - __main__ - INFO - Epoch [17/18], Step [2900/3125], Loss: 2.0600, Perplexity: 7.8457
+Epoch 17 Training:  96%|█████████▌| 2999/3125 [49:41<02:05,  1.01it/s]
+2025-06-10 04:29:13,778 - __main__ - INFO - Epoch [17/18], Step [3000/3125], Loss: 2.2095, Perplexity: 9.1108
+Epoch 17 Training:  99%|█████████▉| 3099/3125 [51:20<00:26,  1.02s/it]
+2025-06-10 04:30:53,335 - __main__ - INFO - Epoch [17/18], Step [3100/3125], Loss: 2.0986, Perplexity: 8.1547
+Epoch 17 Training: 100%|██████████| 3125/3125 [51:47<00:00,  1.01it/s]
+2025-06-10 04:31:18,469 - __main__ - INFO - Epoch 17 Training finished. Avg Loss: 2.2938, Time: 3107.05s
+Validation: 100%|██████████| 391/391 [11:31<00:00,  1.77s/it]
+2025-06-10 04:42:50,318 - __main__ - INFO - Validation Avg Loss: 2.5300, Perplexity: 12.5540
+2025-06-10 04:42:59,665 - __main__ - INFO - Validation BLEU-4: 0.1040
+2025-06-10 04:42:59,666 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 04:42:59,930 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 04:43:00,260 - __main__ - INFO - Python garbage collector run.
+Epoch 18 Training:   3%|▎         | 99/3125 [01:41<49:15,  1.02it/s]
+2025-06-10 04:44:42,407 - __main__ - INFO - Epoch [18/18], Step [100/3125], Loss: 2.1066, Perplexity: 8.2205
+Epoch 18 Training:   6%|▋         | 199/3125 [03:20<48:12,  1.01it/s]
+2025-06-10 04:46:21,631 - __main__ - INFO - Epoch [18/18], Step [200/3125], Loss: 2.1505, Perplexity: 8.5893
+Epoch 18 Training:  10%|▉         | 299/3125 [04:59<47:55,  1.02s/it]
+2025-06-10 04:48:01,201 - __main__ - INFO - Epoch [18/18], Step [300/3125], Loss: 2.2280, Perplexity: 9.2809
+Epoch 18 Training:  13%|█▎        | 399/3125 [06:39<45:23,  1.00it/s]
+2025-06-10 04:49:40,603 - __main__ - INFO - Epoch [18/18], Step [400/3125], Loss: 2.2427, Perplexity: 9.4188
+Epoch 18 Training:  16%|█▌        | 499/3125 [08:18<43:50,  1.00s/it]
+2025-06-10 04:51:19,865 - __main__ - INFO - Epoch [18/18], Step [500/3125], Loss: 2.2644, Perplexity: 9.6253
+Epoch 18 Training:  19%|█▉        | 599/3125 [09:57<42:00,  1.00it/s]
+2025-06-10 04:52:58,962 - __main__ - INFO - Epoch [18/18], Step [600/3125], Loss: 2.2968, Perplexity: 9.9419
+Epoch 18 Training:  22%|██▏       | 699/3125 [11:37<40:05,  1.01it/s]
+2025-06-10 04:54:38,562 - __main__ - INFO - Epoch [18/18], Step [700/3125], Loss: 2.4407, Perplexity: 11.4816
+Epoch 18 Training:  26%|██▌       | 799/3125 [13:16<38:41,  1.00it/s]
+2025-06-10 04:56:17,798 - __main__ - INFO - Epoch [18/18], Step [800/3125], Loss: 2.3881, Perplexity: 10.8931
+Epoch 18 Training:  29%|██▉       | 899/3125 [14:56<36:14,  1.02it/s]
+2025-06-10 04:57:57,327 - __main__ - INFO - Epoch [18/18], Step [900/3125], Loss: 2.2182, Perplexity: 9.1906
+Epoch 18 Training:  32%|███▏      | 999/3125 [16:34<34:58,  1.01it/s]
+2025-06-10 04:59:35,897 - __main__ - INFO - Epoch [18/18], Step [1000/3125], Loss: 2.3312, Perplexity: 10.2898
+Epoch 18 Training:  35%|███▌      | 1099/3125 [18:13<34:01,  1.01s/it]
+2025-06-10 05:01:14,940 - __main__ - INFO - Epoch [18/18], Step [1100/3125], Loss: 2.0509, Perplexity: 7.7749
+Epoch 18 Training:  38%|███▊      | 1199/3125 [19:53<32:03,  1.00it/s]
+2025-06-10 05:02:54,567 - __main__ - INFO - Epoch [18/18], Step [1200/3125], Loss: 2.3490, Perplexity: 10.4752
+Epoch 18 Training:  42%|████▏     | 1299/3125 [21:32<30:12,  1.01it/s]
+2025-06-10 05:04:33,867 - __main__ - INFO - Epoch [18/18], Step [1300/3125], Loss: 2.2147, Perplexity: 9.1590
+Epoch 18 Training:  45%|████▍     | 1399/3125 [23:12<28:30,  1.01it/s]
+2025-06-10 05:06:13,537 - __main__ - INFO - Epoch [18/18], Step [1400/3125], Loss: 2.3278, Perplexity: 10.2557
+Epoch 18 Training:  48%|████▊     | 1499/3125 [24:51<27:01,  1.00it/s]
+2025-06-10 05:07:52,776 - __main__ - INFO - Epoch [18/18], Step [1500/3125], Loss: 1.9178, Perplexity: 6.8062
+Epoch 18 Training:  51%|█████     | 1599/3125 [26:30<25:01,  1.02it/s]
+2025-06-10 05:09:31,970 - __main__ - INFO - Epoch [18/18], Step [1600/3125], Loss: 2.2685, Perplexity: 9.6654
+Epoch 18 Training:  54%|█████▍    | 1699/3125 [28:10<23:19,  1.02it/s]
+2025-06-10 05:11:11,387 - __main__ - INFO - Epoch [18/18], Step [1700/3125], Loss: 2.2014, Perplexity: 9.0381
+Epoch 18 Training:  58%|█████▊    | 1799/3125 [29:49<21:54,  1.01it/s]
+2025-06-10 05:12:51,080 - __main__ - INFO - Epoch [18/18], Step [1800/3125], Loss: 2.0424, Perplexity: 7.7092
+Epoch 18 Training:  61%|██████    | 1899/3125 [31:29<20:22,  1.00it/s]
+2025-06-10 05:14:30,955 - __main__ - INFO - Epoch [18/18], Step [1900/3125], Loss: 2.3529, Perplexity: 10.5158
+Epoch 18 Training:  64%|██████▍   | 1999/3125 [33:09<19:06,  1.02s/it]
+2025-06-10 05:16:10,452 - __main__ - INFO - Epoch [18/18], Step [2000/3125], Loss: 2.2688, Perplexity: 9.6676
+Epoch 18 Training:  67%|██████▋   | 2099/3125 [34:48<17:13,  1.01s/it]
+2025-06-10 05:17:49,445 - __main__ - INFO - Epoch [18/18], Step [2100/3125], Loss: 2.1891, Perplexity: 8.9270
+Epoch 18 Training:  70%|███████   | 2199/3125 [36:27<15:05,  1.02it/s]
+2025-06-10 05:19:28,304 - __main__ - INFO - Epoch [18/18], Step [2200/3125], Loss: 2.1682, Perplexity: 8.7424
+Epoch 18 Training:  74%|███████▎  | 2299/3125 [38:05<13:34,  1.01it/s]
+2025-06-10 05:21:07,173 - __main__ - INFO - Epoch [18/18], Step [2300/3125], Loss: 2.3965, Perplexity: 10.9842
+Epoch 18 Training:  77%|███████▋  | 2399/3125 [39:45<12:08,  1.00s/it]
+2025-06-10 05:22:46,706 - __main__ - INFO - Epoch [18/18], Step [2400/3125], Loss: 2.2135, Perplexity: 9.1480
+Epoch 18 Training:  80%|███████▉  | 2499/3125 [41:24<10:26,  1.00s/it]
+2025-06-10 05:24:25,673 - __main__ - INFO - Epoch [18/18], Step [2500/3125], Loss: 2.4143, Perplexity: 11.1821
+Epoch 18 Training:  83%|████████▎ | 2599/3125 [43:03<08:40,  1.01it/s]
+2025-06-10 05:26:04,713 - __main__ - INFO - Epoch [18/18], Step [2600/3125], Loss: 2.3422, Perplexity: 10.4039
+Epoch 18 Training:  86%|████████▋ | 2699/3125 [44:42<07:04,  1.00it/s]
+2025-06-10 05:27:44,011 - __main__ - INFO - Epoch [18/18], Step [2700/3125], Loss: 2.2772, Perplexity: 9.7493
+Epoch 18 Training:  90%|████████▉ | 2799/3125 [46:22<05:21,  1.01it/s]
+2025-06-10 05:29:23,479 - __main__ - INFO - Epoch [18/18], Step [2800/3125], Loss: 2.3167, Perplexity: 10.1422
+Epoch 18 Training:  93%|█████████▎| 2899/3125 [48:01<03:48,  1.01s/it]
+2025-06-10 05:31:02,917 - __main__ - INFO - Epoch [18/18], Step [2900/3125], Loss: 2.3364, Perplexity: 10.3438
+Epoch 18 Training:  96%|█████████▌| 2999/3125 [49:40<02:03,  1.02it/s]
+2025-06-10 05:32:41,879 - __main__ - INFO - Epoch [18/18], Step [3000/3125], Loss: 2.0664, Perplexity: 7.8962
+Epoch 18 Training:  99%|█████████▉| 3099/3125 [51:19<00:25,  1.02it/s]
+2025-06-10 05:34:20,694 - __main__ - INFO - Epoch [18/18], Step [3100/3125], Loss: 2.2455, Perplexity: 9.4448
+Epoch 18 Training: 100%|██████████| 3125/3125 [51:45<00:00,  1.01it/s]
+2025-06-10 05:34:45,854 - __main__ - INFO - Epoch 18 Training finished. Avg Loss: 2.2637, Time: 3105.59s
+Validation: 100%|██████████| 391/391 [11:38<00:00,  1.79s/it]
+2025-06-10 05:46:23,861 - __main__ - INFO - Validation Avg Loss: 2.5381, Perplexity: 12.6555
+2025-06-10 05:46:33,271 - __main__ - INFO - Validation BLEU-4: 0.1029
+2025-06-10 05:46:33,272 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 05:46:33,546 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 05:46:33,877 - __main__ - INFO - Python garbage collector run.
+2025-06-10 05:46:33,878 - __main__ - INFO - Training complete.
+2025-06-10 05:46:33,879 - __main__ - INFO - Model Training Complete!

text_files/training_log_21_30.txt ADDED Viewed

	@@ -0,0 +1,734 @@

+2025-06-10 20:53:19,604 - __main__ - INFO - Previous notebook output found at: /kaggle/input/part-4-28-36/output. Copying to ./output...
+2025-06-10 20:55:03,275 - __main__ - INFO - Previous output copied successfully to current working directory for resumption.
+2025-06-10 20:55:03,276 - __main__ - INFO -
+--- Starting Model Training ---
+2025-06-10 20:55:03,277 - __main__ - INFO - Starting training process...
+2025-06-10 20:55:03,277 - __main__ - INFO - Using device: cuda
+2025-06-10 20:55:03,284 - __main__ - WARNING - Vocabulary source not found at /kaggle/input/vocabulary_s/pytorch/default/1/vocabulary.pkl. Will build new vocabulary.
+2025-06-10 20:55:03,285 - __main__ - INFO - Building new vocabulary from training dataset...
+2025-06-10 20:55:04,299 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+Processing annotations: 100%|██████████| 591753/591753 [05:24<00:00, 1824.55it/s]
+2025-06-10 21:00:28,664 - __main__ - INFO - Dataset size after filtering: 591753 samples.
+2025-06-10 21:00:28,711 - __main__ - INFO - Building vocabulary...
+Counting word frequencies: 100%|██████████| 591753/591753 [00:01<00:00, 318528.75it/s]
+2025-06-10 21:00:30,590 - __main__ - INFO - Vocabulary size: 14030
+2025-06-10 21:00:30,741 - __main__ - INFO - New vocabulary built.
+2025-06-10 21:00:30,755 - __main__ - INFO - Saved newly built vocabulary to ./output/vocabulary.pkl
+2025-06-10 21:00:31,712 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_train2017.json
+Processing annotations: 100%|██████████| 591753/591753 [03:24<00:00, 2887.41it/s]
+2025-06-10 21:03:56,848 - __main__ - INFO - Using subset of 200000 samples for the dataset.
+2025-06-10 21:03:56,848 - __main__ - INFO - Dataset size after filtering: 200000 samples.
+2025-06-10 21:03:56,937 - __main__ - INFO - Successfully loaded captions from /kaggle/input/coco-2017-dataset/coco2017/annotations/captions_val2017.json
+Processing annotations: 100%|██████████| 25014/25014 [00:15<00:00, 1607.29it/s]
+2025-06-10 21:04:12,505 - __main__ - INFO - Dataset size after filtering: 25014 samples.
+2025-06-10 21:04:12,506 - __main__ - INFO - Training dataset size: 200000
+2025-06-10 21:04:12,507 - __main__ - INFO - Validation dataset size: 25014
+Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
+100%|██████████| 97.8M/97.8M [00:00<00:00, 201MB/s]
+2025-06-10 21:04:13,819 - __main__ - INFO - ResNet encoder base layers are fine-tuning enabled.
+2025-06-10 21:04:14,310 - __main__ - INFO - Attempting to resume training from: ./output/best_model_bleu0.1058.pth
+2025-06-10 21:04:14,909 - __main__ - INFO - Resumed training from epoch 20. Best validation score so far: 0.1058
+Epoch 21 Training:   3%|▎         | 99/3125 [01:41<51:35,  1.02s/it]
+2025-06-10 21:05:56,977 - __main__ - INFO - Epoch [21/30], Step [100/3125], Loss: 2.4415, Perplexity: 11.4905
+Epoch 21 Training:   6%|▋         | 199/3125 [03:27<51:39,  1.06s/it]
+2025-06-10 21:07:43,080 - __main__ - INFO - Epoch [21/30], Step [200/3125], Loss: 2.4132, Perplexity: 11.1694
+Epoch 21 Training:  10%|▉         | 299/3125 [05:13<50:24,  1.07s/it]
+2025-06-10 21:09:29,550 - __main__ - INFO - Epoch [21/30], Step [300/3125], Loss: 2.5896, Perplexity: 13.3244
+Epoch 21 Training:  13%|█▎        | 399/3125 [06:59<48:25,  1.07s/it]
+2025-06-10 21:11:15,424 - __main__ - INFO - Epoch [21/30], Step [400/3125], Loss: 2.4345, Perplexity: 11.4096
+Epoch 21 Training:  16%|█▌        | 499/3125 [08:45<45:43,  1.04s/it]
+2025-06-10 21:13:01,920 - __main__ - INFO - Epoch [21/30], Step [500/3125], Loss: 2.4611, Perplexity: 11.7174
+Epoch 21 Training:  19%|█▉        | 599/3125 [10:32<44:58,  1.07s/it]
+2025-06-10 21:14:48,141 - __main__ - INFO - Epoch [21/30], Step [600/3125], Loss: 2.7550, Perplexity: 15.7210
+Epoch 21 Training:  22%|██▏       | 699/3125 [12:18<42:27,  1.05s/it]
+2025-06-10 21:16:33,948 - __main__ - INFO - Epoch [21/30], Step [700/3125], Loss: 2.3508, Perplexity: 10.4938
+Epoch 21 Training:  26%|██▌       | 799/3125 [14:03<40:56,  1.06s/it]
+2025-06-10 21:18:19,911 - __main__ - INFO - Epoch [21/30], Step [800/3125], Loss: 2.5791, Perplexity: 13.1847
+Epoch 21 Training:  29%|██▉       | 899/3125 [15:50<39:04,  1.05s/it]
+2025-06-10 21:20:06,147 - __main__ - INFO - Epoch [21/30], Step [900/3125], Loss: 2.2819, Perplexity: 9.7949
+Epoch 21 Training:  32%|███▏      | 999/3125 [17:36<37:53,  1.07s/it]
+2025-06-10 21:21:52,490 - __main__ - INFO - Epoch [21/30], Step [1000/3125], Loss: 2.4685, Perplexity: 11.8042
+Epoch 21 Training:  35%|███▌      | 1099/3125 [19:22<36:05,  1.07s/it]
+2025-06-10 21:23:38,264 - __main__ - INFO - Epoch [21/30], Step [1100/3125], Loss: 2.5896, Perplexity: 13.3245
+Epoch 21 Training:  38%|███▊      | 1199/3125 [21:08<33:27,  1.04s/it]
+2025-06-10 21:25:24,152 - __main__ - INFO - Epoch [21/30], Step [1200/3125], Loss: 2.2209, Perplexity: 9.2160
+Epoch 21 Training:  42%|████▏     | 1299/3125 [22:54<32:42,  1.08s/it]
+2025-06-10 21:27:10,347 - __main__ - INFO - Epoch [21/30], Step [1300/3125], Loss: 2.5391, Perplexity: 12.6681
+Epoch 21 Training:  45%|████▍     | 1399/3125 [24:40<30:37,  1.06s/it]
+2025-06-10 21:28:56,226 - __main__ - INFO - Epoch [21/30], Step [1400/3125], Loss: 2.5600, Perplexity: 12.9352
+Epoch 21 Training:  48%|████▊     | 1499/3125 [26:26<28:51,  1.06s/it]
+2025-06-10 21:30:42,131 - __main__ - INFO - Epoch [21/30], Step [1500/3125], Loss: 2.4823, Perplexity: 11.9683
+Epoch 21 Training:  51%|█████     | 1599/3125 [28:12<26:58,  1.06s/it]
+2025-06-10 21:32:28,501 - __main__ - INFO - Epoch [21/30], Step [1600/3125], Loss: 2.6207, Perplexity: 13.7450
+Epoch 21 Training:  54%|█████▍    | 1699/3125 [29:57<24:43,  1.04s/it]
+2025-06-10 21:34:13,930 - __main__ - INFO - Epoch [21/30], Step [1700/3125], Loss: 2.2406, Perplexity: 9.3989
+Epoch 21 Training:  58%|█████▊    | 1799/3125 [31:43<23:20,  1.06s/it]
+2025-06-10 21:35:59,951 - __main__ - INFO - Epoch [21/30], Step [1800/3125], Loss: 2.4947, Perplexity: 12.1177
+Epoch 21 Training:  61%|██████    | 1899/3125 [33:30<21:38,  1.06s/it]
+2025-06-10 21:37:46,093 - __main__ - INFO - Epoch [21/30], Step [1900/3125], Loss: 2.3141, Perplexity: 10.1161
+Epoch 21 Training:  64%|██████▍   | 1999/3125 [35:16<19:59,  1.07s/it]
+2025-06-10 21:39:32,258 - __main__ - INFO - Epoch [21/30], Step [2000/3125], Loss: 2.0813, Perplexity: 8.0148
+Epoch 21 Training:  67%|██████▋   | 2099/3125 [37:02<17:52,  1.05s/it]
+2025-06-10 21:41:18,231 - __main__ - INFO - Epoch [21/30], Step [2100/3125], Loss: 2.5372, Perplexity: 12.6438
+Epoch 21 Training:  70%|███████   | 2199/3125 [38:47<16:14,  1.05s/it]
+2025-06-10 21:43:03,869 - __main__ - INFO - Epoch [21/30], Step [2200/3125], Loss: 2.4665, Perplexity: 11.7807
+Epoch 21 Training:  74%|███████▎  | 2299/3125 [40:34<14:31,  1.06s/it]
+2025-06-10 21:44:50,121 - __main__ - INFO - Epoch [21/30], Step [2300/3125], Loss: 2.4530, Perplexity: 11.6234
+Epoch 21 Training:  77%|███████▋  | 2399/3125 [42:20<12:34,  1.04s/it]
+2025-06-10 21:46:36,332 - __main__ - INFO - Epoch [21/30], Step [2400/3125], Loss: 2.5520, Perplexity: 12.8323
+Epoch 21 Training:  80%|███████▉  | 2499/3125 [44:06<11:06,  1.06s/it]
+2025-06-10 21:48:22,862 - __main__ - INFO - Epoch [21/30], Step [2500/3125], Loss: 2.2327, Perplexity: 9.3254
+Epoch 21 Training:  83%|████████▎ | 2599/3125 [45:52<09:27,  1.08s/it]
+2025-06-10 21:50:08,942 - __main__ - INFO - Epoch [21/30], Step [2600/3125], Loss: 2.3985, Perplexity: 11.0071
+Epoch 21 Training:  86%|████████▋ | 2699/3125 [47:39<07:34,  1.07s/it]
+2025-06-10 21:51:55,620 - __main__ - INFO - Epoch [21/30], Step [2700/3125], Loss: 2.4432, Perplexity: 11.5094
+Epoch 21 Training:  90%|████████▉ | 2799/3125 [49:25<05:41,  1.05s/it]
+2025-06-10 21:53:41,636 - __main__ - INFO - Epoch [21/30], Step [2800/3125], Loss: 2.5233, Perplexity: 12.4703
+Epoch 21 Training:  93%|█████████▎| 2899/3125 [51:11<03:52,  1.03s/it]
+2025-06-10 21:55:27,338 - __main__ - INFO - Epoch [21/30], Step [2900/3125], Loss: 2.4090, Perplexity: 11.1229
+Epoch 21 Training:  96%|█████████▌| 2999/3125 [52:57<02:13,  1.06s/it]
+2025-06-10 21:57:13,693 - __main__ - INFO - Epoch [21/30], Step [3000/3125], Loss: 2.3775, Perplexity: 10.7784
+Epoch 21 Training:  99%|█████████▉| 3099/3125 [54:44<00:28,  1.09s/it]
+2025-06-10 21:59:00,426 - __main__ - INFO - Epoch [21/30], Step [3100/3125], Loss: 2.3746, Perplexity: 10.7470
+Epoch 21 Training: 100%|██████████| 3125/3125 [55:12<00:00,  1.06s/it]
+2025-06-10 21:59:27,122 - __main__ - INFO - Epoch 21 Training finished. Avg Loss: 2.4583, Time: 3312.21s
+Validation: 100%|██████████| 391/391 [12:09<00:00,  1.86s/it]
+2025-06-10 22:11:36,158 - __main__ - INFO - Validation Avg Loss: 2.4595, Perplexity: 11.6989
+2025-06-10 22:11:50,870 - __main__ - INFO - Validation BLEU-4: 0.1050
+2025-06-10 22:11:50,871 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 22:11:51,146 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 22:11:51,395 - __main__ - INFO - Python garbage collector run.
+Epoch 22 Training:   3%|▎         | 99/3125 [01:49<54:20,  1.08s/it]
+2025-06-10 22:13:41,608 - __main__ - INFO - Epoch [22/30], Step [100/3125], Loss: 2.3862, Perplexity: 10.8717
+Epoch 22 Training:   6%|▋         | 199/3125 [03:35<51:35,  1.06s/it]
+2025-06-10 22:15:27,731 - __main__ - INFO - Epoch [22/30], Step [200/3125], Loss: 2.5702, Perplexity: 13.0690
+Epoch 22 Training:  10%|▉         | 299/3125 [05:22<50:17,  1.07s/it]
+2025-06-10 22:17:14,467 - __main__ - INFO - Epoch [22/30], Step [300/3125], Loss: 2.2531, Perplexity: 9.5168
+Epoch 22 Training:  13%|█▎        | 399/3125 [07:08<47:40,  1.05s/it]
+2025-06-10 22:19:00,484 - __main__ - INFO - Epoch [22/30], Step [400/3125], Loss: 2.4486, Perplexity: 11.5717
+Epoch 22 Training:  16%|█▌        | 499/3125 [08:54<46:05,  1.05s/it]
+2025-06-10 22:20:46,614 - __main__ - INFO - Epoch [22/30], Step [500/3125], Loss: 2.4738, Perplexity: 11.8676
+Epoch 22 Training:  19%|█▉        | 599/3125 [10:40<43:56,  1.04s/it]
+2025-06-10 22:22:32,689 - __main__ - INFO - Epoch [22/30], Step [600/3125], Loss: 2.2134, Perplexity: 9.1471
+Epoch 22 Training:  22%|██▏       | 699/3125 [12:26<42:46,  1.06s/it]
+2025-06-10 22:24:19,210 - __main__ - INFO - Epoch [22/30], Step [700/3125], Loss: 2.5602, Perplexity: 12.9386
+Epoch 22 Training:  26%|██▌       | 799/3125 [14:13<41:00,  1.06s/it]
+2025-06-10 22:26:05,595 - __main__ - INFO - Epoch [22/30], Step [800/3125], Loss: 2.2362, Perplexity: 9.3575
+Epoch 22 Training:  29%|██▉       | 899/3125 [15:59<39:51,  1.07s/it]
+2025-06-10 22:27:51,583 - __main__ - INFO - Epoch [22/30], Step [900/3125], Loss: 2.5474, Perplexity: 12.7736
+Epoch 22 Training:  32%|███▏      | 999/3125 [17:45<37:09,  1.05s/it]
+2025-06-10 22:29:37,511 - __main__ - INFO - Epoch [22/30], Step [1000/3125], Loss: 2.0833, Perplexity: 8.0311
+Epoch 22 Training:  35%|███▌      | 1099/3125 [19:31<36:00,  1.07s/it]
+2025-06-10 22:31:23,818 - __main__ - INFO - Epoch [22/30], Step [1100/3125], Loss: 2.2881, Perplexity: 9.8567
+Epoch 22 Training:  38%|███▊      | 1199/3125 [21:17<33:42,  1.05s/it]
+2025-06-10 22:33:10,029 - __main__ - INFO - Epoch [22/30], Step [1200/3125], Loss: 2.4615, Perplexity: 11.7227
+Epoch 22 Training:  42%|████▏     | 1299/3125 [23:04<32:16,  1.06s/it]
+2025-06-10 22:34:56,481 - __main__ - INFO - Epoch [22/30], Step [1300/3125], Loss: 2.2817, Perplexity: 9.7936
+Epoch 22 Training:  45%|████▍     | 1399/3125 [24:49<30:29,  1.06s/it]
+2025-06-10 22:36:42,421 - __main__ - INFO - Epoch [22/30], Step [1400/3125], Loss: 2.5625, Perplexity: 12.9683
+Epoch 22 Training:  48%|████▊     | 1499/3125 [26:35<28:07,  1.04s/it]
+2025-06-10 22:38:28,339 - __main__ - INFO - Epoch [22/30], Step [1500/3125], Loss: 2.1516, Perplexity: 8.5990
+Epoch 22 Training:  51%|█████     | 1599/3125 [28:21<27:05,  1.07s/it]
+2025-06-10 22:40:14,168 - __main__ - INFO - Epoch [22/30], Step [1600/3125], Loss: 2.4977, Perplexity: 12.1545
+Epoch 22 Training:  54%|█████▍    | 1699/3125 [30:08<25:18,  1.06s/it]
+2025-06-10 22:42:00,685 - __main__ - INFO - Epoch [22/30], Step [1700/3125], Loss: 2.2119, Perplexity: 9.1334
+Epoch 22 Training:  58%|█████▊    | 1799/3125 [31:54<23:35,  1.07s/it]
+2025-06-10 22:43:47,004 - __main__ - INFO - Epoch [22/30], Step [1800/3125], Loss: 2.2168, Perplexity: 9.1775
+Epoch 22 Training:  61%|██████    | 1899/3125 [33:41<21:24,  1.05s/it]
+2025-06-10 22:45:34,017 - __main__ - INFO - Epoch [22/30], Step [1900/3125], Loss: 2.4493, Perplexity: 11.5805
+Epoch 22 Training:  64%|██████▍   | 1999/3125 [35:27<19:42,  1.05s/it]
+2025-06-10 22:47:19,916 - __main__ - INFO - Epoch [22/30], Step [2000/3125], Loss: 2.5140, Perplexity: 12.3543
+Epoch 22 Training:  67%|██████▋   | 2099/3125 [37:13<18:13,  1.07s/it]
+2025-06-10 22:49:06,385 - __main__ - INFO - Epoch [22/30], Step [2100/3125], Loss: 2.2537, Perplexity: 9.5231
+Epoch 22 Training:  70%|███████   | 2199/3125 [39:00<16:35,  1.08s/it]
+2025-06-10 22:50:53,063 - __main__ - INFO - Epoch [22/30], Step [2200/3125], Loss: 2.3459, Perplexity: 10.4427
+Epoch 22 Training:  74%|███████▎  | 2299/3125 [40:46<14:28,  1.05s/it]
+2025-06-10 22:52:39,062 - __main__ - INFO - Epoch [22/30], Step [2300/3125], Loss: 2.2346, Perplexity: 9.3426
+Epoch 22 Training:  77%|███████▋  | 2399/3125 [42:33<12:48,  1.06s/it]
+2025-06-10 22:54:25,721 - __main__ - INFO - Epoch [22/30], Step [2400/3125], Loss: 2.4777, Perplexity: 11.9139
+Epoch 22 Training:  80%|███████▉  | 2499/3125 [44:19<11:06,  1.07s/it]
+2025-06-10 22:56:12,322 - __main__ - INFO - Epoch [22/30], Step [2500/3125], Loss: 2.3603, Perplexity: 10.5946
+Epoch 22 Training:  83%|████████▎ | 2599/3125 [46:05<09:17,  1.06s/it]
+2025-06-10 22:57:57,889 - __main__ - INFO - Epoch [22/30], Step [2600/3125], Loss: 2.3461, Perplexity: 10.4452
+Epoch 22 Training:  86%|████████▋ | 2699/3125 [47:51<07:28,  1.05s/it]
+2025-06-10 22:59:44,171 - __main__ - INFO - Epoch [22/30], Step [2700/3125], Loss: 2.3966, Perplexity: 10.9861
+Epoch 22 Training:  90%|████████▉ | 2799/3125 [49:37<05:44,  1.06s/it]
+2025-06-10 23:01:30,177 - __main__ - INFO - Epoch [22/30], Step [2800/3125], Loss: 2.4276, Perplexity: 11.3319
+Epoch 22 Training:  93%|█████████▎| 2899/3125 [51:24<03:57,  1.05s/it]
+2025-06-10 23:03:16,879 - __main__ - INFO - Epoch [22/30], Step [2900/3125], Loss: 2.3832, Perplexity: 10.8394
+Epoch 22 Training:  96%|█████████▌| 2999/3125 [53:10<02:13,  1.06s/it]
+2025-06-10 23:05:02,687 - __main__ - INFO - Epoch [22/30], Step [3000/3125], Loss: 2.3673, Perplexity: 10.6688
+Epoch 22 Training:  99%|█████████▉| 3099/3125 [54:56<00:27,  1.08s/it]
+2025-06-10 23:06:49,083 - __main__ - INFO - Epoch [22/30], Step [3100/3125], Loss: 2.2368, Perplexity: 9.3634
+Epoch 22 Training: 100%|██████████| 3125/3125 [55:24<00:00,  1.06s/it]
+2025-06-10 23:07:15,758 - __main__ - INFO - Epoch 22 Training finished. Avg Loss: 2.3948, Time: 3324.36s
+Validation: 100%|██████████| 391/391 [11:56<00:00,  1.83s/it]
+2025-06-10 23:19:12,047 - __main__ - INFO - Validation Avg Loss: 2.4655, Perplexity: 11.7698
+2025-06-10 23:19:22,587 - __main__ - INFO - Validation BLEU-4: 0.1049
+2025-06-10 23:19:22,588 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-10 23:19:22,860 - __main__ - INFO - CUDA cache emptied.
+2025-06-10 23:19:23,252 - __main__ - INFO - Python garbage collector run.
+Epoch 23 Training:   3%|▎         | 99/3125 [01:49<54:15,  1.08s/it]
+2025-06-10 23:21:13,874 - __main__ - INFO - Epoch [23/30], Step [100/3125], Loss: 2.2665, Perplexity: 9.6457
+Epoch 23 Training:   6%|▋         | 199/3125 [03:35<51:49,  1.06s/it]
+2025-06-10 23:23:00,151 - __main__ - INFO - Epoch [23/30], Step [200/3125], Loss: 2.1517, Perplexity: 8.5996
+Epoch 23 Training:  10%|▉         | 299/3125 [05:22<49:46,  1.06s/it]
+2025-06-10 23:24:46,702 - __main__ - INFO - Epoch [23/30], Step [300/3125], Loss: 2.2574, Perplexity: 9.5578
+Epoch 23 Training:  13%|█▎        | 399/3125 [07:07<47:28,  1.04s/it]
+2025-06-10 23:26:32,323 - __main__ - INFO - Epoch [23/30], Step [400/3125], Loss: 2.5305, Perplexity: 12.5602
+Epoch 23 Training:  16%|█▌        | 499/3125 [08:54<47:09,  1.08s/it]
+2025-06-10 23:28:18,691 - __main__ - INFO - Epoch [23/30], Step [500/3125], Loss: 2.4340, Perplexity: 11.4045
+Epoch 23 Training:  19%|█▉        | 599/3125 [10:40<44:49,  1.06s/it]
+2025-06-10 23:30:05,103 - __main__ - INFO - Epoch [23/30], Step [600/3125], Loss: 2.1875, Perplexity: 8.9125
+Epoch 23 Training:  22%|██▏       | 699/3125 [12:27<42:33,  1.05s/it]
+2025-06-10 23:31:51,337 - __main__ - INFO - Epoch [23/30], Step [700/3125], Loss: 2.1650, Perplexity: 8.7143
+Epoch 23 Training:  26%|██▌       | 799/3125 [14:12<41:21,  1.07s/it]
+2025-06-10 23:33:37,200 - __main__ - INFO - Epoch [23/30], Step [800/3125], Loss: 2.2312, Perplexity: 9.3107
+Epoch 23 Training:  29%|██▉       | 899/3125 [15:58<38:33,  1.04s/it]
+2025-06-10 23:35:23,133 - __main__ - INFO - Epoch [23/30], Step [900/3125], Loss: 2.3446, Perplexity: 10.4295
+Epoch 23 Training:  32%|███▏      | 999/3125 [17:45<38:01,  1.07s/it]
+2025-06-10 23:37:09,905 - __main__ - INFO - Epoch [23/30], Step [1000/3125], Loss: 2.4275, Perplexity: 11.3308
+Epoch 23 Training:  35%|███▌      | 1099/3125 [19:31<35:55,  1.06s/it]
+2025-06-10 23:38:56,155 - __main__ - INFO - Epoch [23/30], Step [1100/3125], Loss: 2.3552, Perplexity: 10.5405
+Epoch 23 Training:  38%|███▊      | 1199/3125 [21:17<34:35,  1.08s/it]
+2025-06-10 23:40:42,156 - __main__ - INFO - Epoch [23/30], Step [1200/3125], Loss: 2.3776, Perplexity: 10.7791
+Epoch 23 Training:  42%|████▏     | 1299/3125 [23:04<32:11,  1.06s/it]
+2025-06-10 23:42:28,873 - __main__ - INFO - Epoch [23/30], Step [1300/3125], Loss: 2.4015, Perplexity: 11.0394
+Epoch 23 Training:  45%|████▍     | 1399/3125 [24:51<31:06,  1.08s/it]
+2025-06-10 23:44:15,643 - __main__ - INFO - Epoch [23/30], Step [1400/3125], Loss: 2.4753, Perplexity: 11.8853
+Epoch 23 Training:  48%|████▊     | 1499/3125 [26:38<29:06,  1.07s/it]
+2025-06-10 23:46:02,436 - __main__ - INFO - Epoch [23/30], Step [1500/3125], Loss: 2.4314, Perplexity: 11.3750
+Epoch 23 Training:  51%|█████     | 1599/3125 [28:24<27:09,  1.07s/it]
+2025-06-10 23:47:48,633 - __main__ - INFO - Epoch [23/30], Step [1600/3125], Loss: 2.2185, Perplexity: 9.1938
+Epoch 23 Training:  54%|█████▍    | 1699/3125 [30:11<25:15,  1.06s/it]
+2025-06-10 23:49:35,393 - __main__ - INFO - Epoch [23/30], Step [1700/3125], Loss: 2.2120, Perplexity: 9.1336
+Epoch 23 Training:  58%|█████▊    | 1799/3125 [31:57<23:31,  1.06s/it]
+2025-06-10 23:51:21,898 - __main__ - INFO - Epoch [23/30], Step [1800/3125], Loss: 2.2967, Perplexity: 9.9416
+Epoch 23 Training:  61%|██████    | 1899/3125 [33:44<21:31,  1.05s/it]
+2025-06-10 23:53:08,643 - __main__ - INFO - Epoch [23/30], Step [1900/3125], Loss: 2.4373, Perplexity: 11.4426
+Epoch 23 Training:  64%|██████▍   | 1999/3125 [35:30<19:44,  1.05s/it]
+2025-06-10 23:54:54,728 - __main__ - INFO - Epoch [23/30], Step [2000/3125], Loss: 2.3913, Perplexity: 10.9281
+Epoch 23 Training:  67%|██████▋   | 2099/3125 [37:16<18:04,  1.06s/it]
+2025-06-10 23:56:41,305 - __main__ - INFO - Epoch [23/30], Step [2100/3125], Loss: 2.2783, Perplexity: 9.7605
+Epoch 23 Training:  70%|███████   | 2199/3125 [39:03<16:14,  1.05s/it]
+2025-06-10 23:58:27,550 - __main__ - INFO - Epoch [23/30], Step [2200/3125], Loss: 2.6455, Perplexity: 14.0907
+Epoch 23 Training:  74%|███████▎  | 2299/3125 [40:49<14:49,  1.08s/it]
+2025-06-11 00:00:14,133 - __main__ - INFO - Epoch [23/30], Step [2300/3125], Loss: 2.3268, Perplexity: 10.2454
+Epoch 23 Training:  77%|███████▋  | 2399/3125 [42:36<12:45,  1.05s/it]
+2025-06-11 00:02:00,712 - __main__ - INFO - Epoch [23/30], Step [2400/3125], Loss: 2.3324, Perplexity: 10.3022
+Epoch 23 Training:  80%|███████▉  | 2499/3125 [44:22<10:58,  1.05s/it]
+2025-06-11 00:03:46,825 - __main__ - INFO - Epoch [23/30], Step [2500/3125], Loss: 2.1346, Perplexity: 8.4539
+Epoch 23 Training:  83%|████████▎ | 2599/3125 [46:08<09:30,  1.08s/it]
+2025-06-11 00:05:33,242 - __main__ - INFO - Epoch [23/30], Step [2600/3125], Loss: 2.3986, Perplexity: 11.0080
+Epoch 23 Training:  86%|████████▋ | 2699/3125 [47:55<07:32,  1.06s/it]
+2025-06-11 00:07:19,976 - __main__ - INFO - Epoch [23/30], Step [2700/3125], Loss: 2.3777, Perplexity: 10.7800
+Epoch 23 Training:  90%|████████▉ | 2799/3125 [49:42<05:44,  1.06s/it]
+2025-06-11 00:09:06,704 - __main__ - INFO - Epoch [23/30], Step [2800/3125], Loss: 2.4017, Perplexity: 11.0422
+Epoch 23 Training:  93%|█████████▎| 2899/3125 [51:29<03:59,  1.06s/it]
+2025-06-11 00:10:53,384 - __main__ - INFO - Epoch [23/30], Step [2900/3125], Loss: 2.6419, Perplexity: 14.0403
+Epoch 23 Training:  96%|█████████▌| 2999/3125 [53:15<02:14,  1.07s/it]
+2025-06-11 00:12:39,864 - __main__ - INFO - Epoch [23/30], Step [3000/3125], Loss: 2.4274, Perplexity: 11.3291
+Epoch 23 Training:  99%|█████████▉| 3099/3125 [55:01<00:27,  1.05s/it]
+2025-06-11 00:14:26,295 - __main__ - INFO - Epoch [23/30], Step [3100/3125], Loss: 2.4689, Perplexity: 11.8095
+Epoch 23 Training: 100%|██████████| 3125/3125 [55:29<00:00,  1.07s/it]
+2025-06-11 00:14:53,188 - __main__ - INFO - Epoch 23 Training finished. Avg Loss: 2.3468, Time: 3329.93s
+Validation: 100%|██████████| 391/391 [12:24<00:00,  1.90s/it]
+2025-06-11 00:27:17,526 - __main__ - INFO - Validation Avg Loss: 2.4694, Perplexity: 11.8158
+2025-06-11 00:27:27,488 - __main__ - INFO - Validation BLEU-4: 0.1051
+2025-06-11 00:27:27,489 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 00:27:27,784 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 00:27:28,140 - __main__ - INFO - Python garbage collector run.
+Epoch 24 Training:   3%|▎         | 99/3125 [01:48<54:03,  1.07s/it]
+2025-06-11 00:29:17,820 - __main__ - INFO - Epoch [24/30], Step [100/3125], Loss: 2.1095, Perplexity: 8.2443
+Epoch 24 Training:   6%|▋         | 199/3125 [03:35<51:20,  1.05s/it]
+2025-06-11 00:31:04,436 - __main__ - INFO - Epoch [24/30], Step [200/3125], Loss: 2.2281, Perplexity: 9.2821
+Epoch 24 Training:  10%|▉         | 299/3125 [05:21<49:51,  1.06s/it]
+2025-06-11 00:32:51,283 - __main__ - INFO - Epoch [24/30], Step [300/3125], Loss: 2.3703, Perplexity: 10.7003
+Epoch 24 Training:  13%|█▎        | 399/3125 [07:08<49:01,  1.08s/it]
+2025-06-11 00:34:37,996 - __main__ - INFO - Epoch [24/30], Step [400/3125], Loss: 2.3050, Perplexity: 10.0242
+Epoch 24 Training:  16%|█▌        | 499/3125 [08:55<46:28,  1.06s/it]
+2025-06-11 00:36:25,008 - __main__ - INFO - Epoch [24/30], Step [500/3125], Loss: 2.2863, Perplexity: 9.8383
+Epoch 24 Training:  19%|█▉        | 599/3125 [10:42<45:00,  1.07s/it]
+2025-06-11 00:38:11,729 - __main__ - INFO - Epoch [24/30], Step [600/3125], Loss: 2.2520, Perplexity: 9.5070
+Epoch 24 Training:  22%|██▏       | 699/3125 [12:29<42:15,  1.05s/it]
+2025-06-11 00:39:58,355 - __main__ - INFO - Epoch [24/30], Step [700/3125], Loss: 2.2716, Perplexity: 9.6947
+Epoch 24 Training:  26%|██▌       | 799/3125 [14:15<41:47,  1.08s/it]
+2025-06-11 00:41:44,889 - __main__ - INFO - Epoch [24/30], Step [800/3125], Loss: 2.3995, Perplexity: 11.0173
+Epoch 24 Training:  29%|██▉       | 899/3125 [16:02<39:14,  1.06s/it]
+2025-06-11 00:43:31,398 - __main__ - INFO - Epoch [24/30], Step [900/3125], Loss: 2.3343, Perplexity: 10.3224
+Epoch 24 Training:  32%|███▏      | 999/3125 [17:49<38:04,  1.07s/it]
+2025-06-11 00:45:18,449 - __main__ - INFO - Epoch [24/30], Step [1000/3125], Loss: 2.2976, Perplexity: 9.9507
+Epoch 24 Training:  35%|███▌      | 1099/3125 [19:35<35:49,  1.06s/it]
+2025-06-11 00:47:04,619 - __main__ - INFO - Epoch [24/30], Step [1100/3125], Loss: 2.3547, Perplexity: 10.5347
+Epoch 24 Training:  38%|███▊      | 1199/3125 [21:21<33:58,  1.06s/it]
+2025-06-11 00:48:50,972 - __main__ - INFO - Epoch [24/30], Step [1200/3125], Loss: 2.3108, Perplexity: 10.0823
+Epoch 24 Training:  42%|████▏     | 1299/3125 [23:08<32:12,  1.06s/it]
+2025-06-11 00:50:37,638 - __main__ - INFO - Epoch [24/30], Step [1300/3125], Loss: 2.3038, Perplexity: 10.0118
+Epoch 24 Training:  45%|████▍     | 1399/3125 [24:54<31:07,  1.08s/it]
+2025-06-11 00:52:24,182 - __main__ - INFO - Epoch [24/30], Step [1400/3125], Loss: 2.4687, Perplexity: 11.8066
+Epoch 24 Training:  48%|████▊     | 1499/3125 [26:41<28:34,  1.05s/it]
+2025-06-11 00:54:10,798 - __main__ - INFO - Epoch [24/30], Step [1500/3125], Loss: 2.2251, Perplexity: 9.2544
+Epoch 24 Training:  51%|█████     | 1599/3125 [28:28<27:44,  1.09s/it]
+2025-06-11 00:55:57,193 - __main__ - INFO - Epoch [24/30], Step [1600/3125], Loss: 2.4617, Perplexity: 11.7248
+Epoch 24 Training:  54%|█████▍    | 1699/3125 [30:14<24:56,  1.05s/it]
+2025-06-11 00:57:43,842 - __main__ - INFO - Epoch [24/30], Step [1700/3125], Loss: 2.2415, Perplexity: 9.4070
+Epoch 24 Training:  58%|█████▊    | 1799/3125 [32:01<23:13,  1.05s/it]
+2025-06-11 00:59:30,263 - __main__ - INFO - Epoch [24/30], Step [1800/3125], Loss: 2.2795, Perplexity: 9.7719
+Epoch 24 Training:  61%|██████    | 1899/3125 [33:47<21:46,  1.07s/it]
+2025-06-11 01:01:17,186 - __main__ - INFO - Epoch [24/30], Step [1900/3125], Loss: 2.5203, Perplexity: 12.4326
+Epoch 24 Training:  64%|██████▍   | 1999/3125 [35:34<19:54,  1.06s/it]
+2025-06-11 01:03:03,886 - __main__ - INFO - Epoch [24/30], Step [2000/3125], Loss: 2.4630, Perplexity: 11.7403
+Epoch 24 Training:  67%|██████▋   | 2099/3125 [37:21<18:23,  1.08s/it]
+2025-06-11 01:04:50,564 - __main__ - INFO - Epoch [24/30], Step [2100/3125], Loss: 2.3497, Perplexity: 10.4825
+Epoch 24 Training:  70%|███████   | 2199/3125 [39:07<16:16,  1.05s/it]
+2025-06-11 01:06:36,779 - __main__ - INFO - Epoch [24/30], Step [2200/3125], Loss: 2.4901, Perplexity: 12.0629
+Epoch 24 Training:  74%|███████▎  | 2299/3125 [40:54<14:45,  1.07s/it]
+2025-06-11 01:08:23,474 - __main__ - INFO - Epoch [24/30], Step [2300/3125], Loss: 2.1345, Perplexity: 8.4530
+Epoch 24 Training:  77%|███████▋  | 2399/3125 [42:40<12:54,  1.07s/it]
+2025-06-11 01:10:09,974 - __main__ - INFO - Epoch [24/30], Step [2400/3125], Loss: 2.3948, Perplexity: 10.9664
+Epoch 24 Training:  80%|███████▉  | 2499/3125 [44:27<11:02,  1.06s/it]
+2025-06-11 01:11:56,681 - __main__ - INFO - Epoch [24/30], Step [2500/3125], Loss: 2.1817, Perplexity: 8.8612
+Epoch 24 Training:  83%|████████▎ | 2599/3125 [46:14<09:10,  1.05s/it]
+2025-06-11 01:13:43,503 - __main__ - INFO - Epoch [24/30], Step [2600/3125], Loss: 2.4069, Perplexity: 11.0995
+Epoch 24 Training:  86%|████████▋ | 2699/3125 [48:00<07:44,  1.09s/it]
+2025-06-11 01:15:30,157 - __main__ - INFO - Epoch [24/30], Step [2700/3125], Loss: 2.4753, Perplexity: 11.8856
+Epoch 24 Training:  90%|████████▉ | 2799/3125 [49:47<05:48,  1.07s/it]
+2025-06-11 01:17:16,725 - __main__ - INFO - Epoch [24/30], Step [2800/3125], Loss: 2.4636, Perplexity: 11.7471
+Epoch 24 Training:  93%|█████████▎| 2899/3125 [51:34<04:01,  1.07s/it]
+2025-06-11 01:19:03,616 - __main__ - INFO - Epoch [24/30], Step [2900/3125], Loss: 2.6810, Perplexity: 14.5996
+Epoch 24 Training:  96%|█████████▌| 2999/3125 [53:21<02:12,  1.05s/it]
+2025-06-11 01:20:50,647 - __main__ - INFO - Epoch [24/30], Step [3000/3125], Loss: 2.2329, Perplexity: 9.3272
+Epoch 24 Training:  99%|█████████▉| 3099/3125 [55:08<00:27,  1.05s/it]
+2025-06-11 01:22:37,396 - __main__ - INFO - Epoch [24/30], Step [3100/3125], Loss: 2.1047, Perplexity: 8.2049
+Epoch 24 Training: 100%|██████████| 3125/3125 [55:35<00:00,  1.07s/it]
+2025-06-11 01:23:03,915 - __main__ - INFO - Epoch 24 Training finished. Avg Loss: 2.3058, Time: 3335.77s
+Validation: 100%|██████████| 391/391 [13:00<00:00,  2.00s/it]
+2025-06-11 01:36:04,607 - __main__ - INFO - Validation Avg Loss: 2.4789, Perplexity: 11.9279
+2025-06-11 01:36:14,953 - __main__ - INFO - Validation BLEU-4: 0.1056
+2025-06-11 01:36:14,954 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 01:36:15,247 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 01:36:15,640 - __main__ - INFO - Python garbage collector run.
+Epoch 25 Training:   3%|▎         | 99/3125 [01:48<52:42,  1.05s/it]
+2025-06-11 01:38:05,659 - __main__ - INFO - Epoch [25/30], Step [100/3125], Loss: 2.0677, Perplexity: 7.9063
+Epoch 25 Training:   6%|▋         | 199/3125 [03:35<52:05,  1.07s/it]
+2025-06-11 01:39:52,448 - __main__ - INFO - Epoch [25/30], Step [200/3125], Loss: 2.3110, Perplexity: 10.0845
+Epoch 25 Training:  10%|▉         | 299/3125 [05:22<51:13,  1.09s/it]
+2025-06-11 01:41:39,080 - __main__ - INFO - Epoch [25/30], Step [300/3125], Loss: 2.0452, Perplexity: 7.7308
+Epoch 25 Training:  13%|█▎        | 399/3125 [07:09<48:33,  1.07s/it]
+2025-06-11 01:43:25,913 - __main__ - INFO - Epoch [25/30], Step [400/3125], Loss: 2.3708, Perplexity: 10.7054
+Epoch 25 Training:  16%|█▌        | 499/3125 [08:55<46:30,  1.06s/it]
+2025-06-11 01:45:12,427 - __main__ - INFO - Epoch [25/30], Step [500/3125], Loss: 2.2842, Perplexity: 9.8177
+Epoch 25 Training:  19%|█▉        | 599/3125 [10:41<43:54,  1.04s/it]
+2025-06-11 01:46:58,573 - __main__ - INFO - Epoch [25/30], Step [600/3125], Loss: 2.4650, Perplexity: 11.7635
+Epoch 25 Training:  22%|██▏       | 699/3125 [12:28<42:52,  1.06s/it]
+2025-06-11 01:48:44,783 - __main__ - INFO - Epoch [25/30], Step [700/3125], Loss: 2.2797, Perplexity: 9.7735
+Epoch 25 Training:  26%|██▌       | 799/3125 [14:14<40:40,  1.05s/it]
+2025-06-11 01:50:31,173 - __main__ - INFO - Epoch [25/30], Step [800/3125], Loss: 2.4073, Perplexity: 11.1045
+Epoch 25 Training:  29%|██▉       | 899/3125 [16:00<39:29,  1.06s/it]
+2025-06-11 01:52:17,406 - __main__ - INFO - Epoch [25/30], Step [900/3125], Loss: 1.9411, Perplexity: 6.9664
+Epoch 25 Training:  32%|███▏      | 999/3125 [17:47<37:30,  1.06s/it]
+2025-06-11 01:54:04,274 - __main__ - INFO - Epoch [25/30], Step [1000/3125], Loss: 2.1742, Perplexity: 8.7955
+Epoch 25 Training:  35%|███▌      | 1099/3125 [19:35<36:58,  1.10s/it]
+2025-06-11 01:55:51,769 - __main__ - INFO - Epoch [25/30], Step [1100/3125], Loss: 2.2986, Perplexity: 9.9598
+Epoch 25 Training:  38%|███▊      | 1199/3125 [21:21<34:48,  1.08s/it]
+2025-06-11 01:57:38,013 - __main__ - INFO - Epoch [25/30], Step [1200/3125], Loss: 2.2249, Perplexity: 9.2525
+Epoch 25 Training:  42%|████▏     | 1299/3125 [23:08<32:41,  1.07s/it]
+2025-06-11 01:59:24,777 - __main__ - INFO - Epoch [25/30], Step [1300/3125], Loss: 2.2743, Perplexity: 9.7211
+Epoch 25 Training:  45%|████▍     | 1399/3125 [24:54<30:32,  1.06s/it]
+2025-06-11 02:01:11,365 - __main__ - INFO - Epoch [25/30], Step [1400/3125], Loss: 2.4074, Perplexity: 11.1048
+Epoch 25 Training:  48%|████▊     | 1499/3125 [26:41<29:37,  1.09s/it]
+2025-06-11 02:02:57,836 - __main__ - INFO - Epoch [25/30], Step [1500/3125], Loss: 2.3521, Perplexity: 10.5077
+Epoch 25 Training:  51%|█████     | 1599/3125 [28:27<27:13,  1.07s/it]
+2025-06-11 02:04:44,044 - __main__ - INFO - Epoch [25/30], Step [1600/3125], Loss: 2.1566, Perplexity: 8.6420
+Epoch 25 Training:  54%|█████▍    | 1699/3125 [30:14<25:49,  1.09s/it]
+2025-06-11 02:06:30,992 - __main__ - INFO - Epoch [25/30], Step [1700/3125], Loss: 2.5753, Perplexity: 13.1350
+Epoch 25 Training:  58%|█████▊    | 1799/3125 [32:01<23:47,  1.08s/it]
+2025-06-11 02:08:17,923 - __main__ - INFO - Epoch [25/30], Step [1800/3125], Loss: 2.5129, Perplexity: 12.3405
+Epoch 25 Training:  61%|██████    | 1899/3125 [33:47<21:49,  1.07s/it]
+2025-06-11 02:10:04,520 - __main__ - INFO - Epoch [25/30], Step [1900/3125], Loss: 2.1021, Perplexity: 8.1835
+Epoch 25 Training:  64%|██████▍   | 1999/3125 [35:35<20:19,  1.08s/it]
+2025-06-11 02:11:51,758 - __main__ - INFO - Epoch [25/30], Step [2000/3125], Loss: 2.2199, Perplexity: 9.2062
+Epoch 25 Training:  67%|██████▋   | 2099/3125 [37:22<18:24,  1.08s/it]
+2025-06-11 02:13:38,790 - __main__ - INFO - Epoch [25/30], Step [2100/3125], Loss: 2.2128, Perplexity: 9.1416
+Epoch 25 Training:  70%|███████   | 2199/3125 [39:08<16:33,  1.07s/it]
+2025-06-11 02:15:25,639 - __main__ - INFO - Epoch [25/30], Step [2200/3125], Loss: 2.4628, Perplexity: 11.7379
+Epoch 25 Training:  74%|███████▎  | 2299/3125 [40:55<14:31,  1.06s/it]
+2025-06-11 02:17:11,818 - __main__ - INFO - Epoch [25/30], Step [2300/3125], Loss: 2.2937, Perplexity: 9.9117
+Epoch 25 Training:  77%|███████▋  | 2399/3125 [42:42<12:44,  1.05s/it]
+2025-06-11 02:18:58,784 - __main__ - INFO - Epoch [25/30], Step [2400/3125], Loss: 2.3119, Perplexity: 10.0937
+Epoch 25 Training:  80%|███████▉  | 2499/3125 [44:28<11:17,  1.08s/it]
+2025-06-11 02:20:45,531 - __main__ - INFO - Epoch [25/30], Step [2500/3125], Loss: 2.4301, Perplexity: 11.3601
+Epoch 25 Training:  83%|████████▎ | 2599/3125 [46:14<09:22,  1.07s/it]
+2025-06-11 02:22:31,593 - __main__ - INFO - Epoch [25/30], Step [2600/3125], Loss: 2.3248, Perplexity: 10.2245
+Epoch 25 Training:  86%|████████▋ | 2699/3125 [48:01<07:28,  1.05s/it]
+2025-06-11 02:24:18,268 - __main__ - INFO - Epoch [25/30], Step [2700/3125], Loss: 2.1451, Perplexity: 8.5426
+Epoch 25 Training:  90%|████████▉ | 2799/3125 [49:48<05:48,  1.07s/it]
+2025-06-11 02:26:05,182 - __main__ - INFO - Epoch [25/30], Step [2800/3125], Loss: 2.4403, Perplexity: 11.4766
+Epoch 25 Training:  93%|█████████▎| 2899/3125 [51:35<04:02,  1.07s/it]
+2025-06-11 02:27:52,231 - __main__ - INFO - Epoch [25/30], Step [2900/3125], Loss: 2.5203, Perplexity: 12.4329
+Epoch 25 Training:  96%|█████████▌| 2999/3125 [53:22<02:14,  1.07s/it]
+2025-06-11 02:29:39,135 - __main__ - INFO - Epoch [25/30], Step [3000/3125], Loss: 2.2823, Perplexity: 9.7993
+Epoch 25 Training:  99%|█████████▉| 3099/3125 [55:08<00:28,  1.08s/it]
+2025-06-11 02:31:25,479 - __main__ - INFO - Epoch [25/30], Step [3100/3125], Loss: 2.2913, Perplexity: 9.8882
+Epoch 25 Training: 100%|██████████| 3125/3125 [55:36<00:00,  1.07s/it]
+2025-06-11 02:31:52,162 - __main__ - INFO - Epoch 25 Training finished. Avg Loss: 2.2707, Time: 3336.52s
+Validation: 100%|██████████| 391/391 [12:53<00:00,  1.98s/it]
+2025-06-11 02:44:45,366 - __main__ - INFO - Validation Avg Loss: 2.4919, Perplexity: 12.0847
+2025-06-11 02:44:55,839 - __main__ - INFO - Validation BLEU-4: 0.1028
+2025-06-11 02:44:57,085 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_25.pth
+2025-06-11 02:44:57,087 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 02:44:57,380 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 02:44:57,781 - __main__ - INFO - Python garbage collector run.
+Epoch 26 Training:   3%|▎         | 99/3125 [01:48<54:10,  1.07s/it]
+2025-06-11 02:46:47,510 - __main__ - INFO - Epoch [26/30], Step [100/3125], Loss: 2.1781, Perplexity: 8.8298
+Epoch 26 Training:   6%|▋         | 199/3125 [03:35<51:51,  1.06s/it]
+2025-06-11 02:48:34,182 - __main__ - INFO - Epoch [26/30], Step [200/3125], Loss: 2.1307, Perplexity: 8.4208
+Epoch 26 Training:  10%|▉         | 299/3125 [05:21<49:37,  1.05s/it]
+2025-06-11 02:50:20,758 - __main__ - INFO - Epoch [26/30], Step [300/3125], Loss: 2.1756, Perplexity: 8.8076
+Epoch 26 Training:  13%|█▎        | 399/3125 [07:08<48:45,  1.07s/it]
+2025-06-11 02:52:07,509 - __main__ - INFO - Epoch [26/30], Step [400/3125], Loss: 1.9878, Perplexity: 7.2998
+Epoch 26 Training:  16%|█▌        | 499/3125 [08:55<47:05,  1.08s/it]
+2025-06-11 02:53:54,661 - __main__ - INFO - Epoch [26/30], Step [500/3125], Loss: 2.3650, Perplexity: 10.6442
+Epoch 26 Training:  19%|█▉        | 599/3125 [10:42<45:02,  1.07s/it]
+2025-06-11 02:55:41,126 - __main__ - INFO - Epoch [26/30], Step [600/3125], Loss: 2.5252, Perplexity: 12.4940
+Epoch 26 Training:  22%|██▏       | 699/3125 [12:28<42:59,  1.06s/it]
+2025-06-11 02:57:27,719 - __main__ - INFO - Epoch [26/30], Step [700/3125], Loss: 2.4526, Perplexity: 11.6181
+Epoch 26 Training:  26%|██▌       | 799/3125 [14:15<41:31,  1.07s/it]
+2025-06-11 02:59:14,465 - __main__ - INFO - Epoch [26/30], Step [800/3125], Loss: 2.1245, Perplexity: 8.3691
+Epoch 26 Training:  29%|██▉       | 899/3125 [16:02<39:09,  1.06s/it]
+2025-06-11 03:01:01,100 - __main__ - INFO - Epoch [26/30], Step [900/3125], Loss: 2.0997, Perplexity: 8.1638
+Epoch 26 Training:  32%|███▏      | 999/3125 [17:48<37:49,  1.07s/it]
+2025-06-11 03:02:47,434 - __main__ - INFO - Epoch [26/30], Step [1000/3125], Loss: 2.3237, Perplexity: 10.2132
+Epoch 26 Training:  35%|███▌      | 1099/3125 [19:35<35:20,  1.05s/it]
+2025-06-11 03:04:34,056 - __main__ - INFO - Epoch [26/30], Step [1100/3125], Loss: 2.1471, Perplexity: 8.5599
+Epoch 26 Training:  38%|███▊      | 1199/3125 [21:22<34:25,  1.07s/it]
+2025-06-11 03:06:21,068 - __main__ - INFO - Epoch [26/30], Step [1200/3125], Loss: 2.4894, Perplexity: 12.0542
+Epoch 26 Training:  42%|████▏     | 1299/3125 [23:08<32:10,  1.06s/it]
+2025-06-11 03:08:07,541 - __main__ - INFO - Epoch [26/30], Step [1300/3125], Loss: 2.2348, Perplexity: 9.3447
+Epoch 26 Training:  45%|████▍     | 1399/3125 [24:55<31:03,  1.08s/it]
+2025-06-11 03:09:53,995 - __main__ - INFO - Epoch [26/30], Step [1400/3125], Loss: 2.2782, Perplexity: 9.7595
+Epoch 26 Training:  48%|████▊     | 1499/3125 [26:41<28:26,  1.05s/it]
+2025-06-11 03:11:40,489 - __main__ - INFO - Epoch [26/30], Step [1500/3125], Loss: 2.4081, Perplexity: 11.1129
+Epoch 26 Training:  51%|█████     | 1599/3125 [28:28<27:13,  1.07s/it]
+2025-06-11 03:13:27,420 - __main__ - INFO - Epoch [26/30], Step [1600/3125], Loss: 2.1137, Perplexity: 8.2785
+Epoch 26 Training:  54%|█████▍    | 1699/3125 [30:15<25:21,  1.07s/it]
+2025-06-11 03:15:14,451 - __main__ - INFO - Epoch [26/30], Step [1700/3125], Loss: 2.3987, Perplexity: 11.0087
+Epoch 26 Training:  58%|█████▊    | 1799/3125 [32:02<23:28,  1.06s/it]
+2025-06-11 03:17:01,020 - __main__ - INFO - Epoch [26/30], Step [1800/3125], Loss: 2.0616, Perplexity: 7.8588
+Epoch 26 Training:  61%|██████    | 1899/3125 [33:48<21:49,  1.07s/it]
+2025-06-11 03:18:46,981 - __main__ - INFO - Epoch [26/30], Step [1900/3125], Loss: 2.1317, Perplexity: 8.4295
+Epoch 26 Training:  64%|██████▍   | 1999/3125 [35:34<19:58,  1.06s/it]
+2025-06-11 03:20:33,558 - __main__ - INFO - Epoch [26/30], Step [2000/3125], Loss: 2.2971, Perplexity: 9.9451
+Epoch 26 Training:  67%|██████▋   | 2099/3125 [37:21<17:52,  1.04s/it]
+2025-06-11 03:22:20,224 - __main__ - INFO - Epoch [26/30], Step [2100/3125], Loss: 2.2505, Perplexity: 9.4921
+Epoch 26 Training:  70%|███████   | 2199/3125 [39:08<16:41,  1.08s/it]
+2025-06-11 03:24:06,994 - __main__ - INFO - Epoch [26/30], Step [2200/3125], Loss: 2.1467, Perplexity: 8.5564
+Epoch 26 Training:  74%|███████▎  | 2299/3125 [40:55<14:45,  1.07s/it]
+2025-06-11 03:25:54,174 - __main__ - INFO - Epoch [26/30], Step [2300/3125], Loss: 1.9914, Perplexity: 7.3260
+Epoch 26 Training:  77%|███████▋  | 2399/3125 [42:41<12:45,  1.05s/it]
+2025-06-11 03:27:40,333 - __main__ - INFO - Epoch [26/30], Step [2400/3125], Loss: 2.0928, Perplexity: 8.1075
+Epoch 26 Training:  80%|███████▉  | 2499/3125 [44:28<11:26,  1.10s/it]
+2025-06-11 03:29:27,130 - __main__ - INFO - Epoch [26/30], Step [2500/3125], Loss: 2.4869, Perplexity: 12.0239
+Epoch 26 Training:  83%|████████▎ | 2599/3125 [46:15<09:12,  1.05s/it]
+2025-06-11 03:31:13,925 - __main__ - INFO - Epoch [26/30], Step [2600/3125], Loss: 2.0959, Perplexity: 8.1325
+Epoch 26 Training:  86%|████████▋ | 2699/3125 [48:01<07:29,  1.06s/it]
+2025-06-11 03:32:59,910 - __main__ - INFO - Epoch [26/30], Step [2700/3125], Loss: 2.1937, Perplexity: 8.9682
+Epoch 26 Training:  90%|████████▉ | 2799/3125 [49:48<05:49,  1.07s/it]
+2025-06-11 03:34:46,905 - __main__ - INFO - Epoch [26/30], Step [2800/3125], Loss: 2.1959, Perplexity: 8.9884
+Epoch 26 Training:  93%|█████████▎| 2899/3125 [51:35<04:01,  1.07s/it]
+2025-06-11 03:36:34,047 - __main__ - INFO - Epoch [26/30], Step [2900/3125], Loss: 2.0231, Perplexity: 7.5616
+Epoch 26 Training:  96%|█████████▌| 2999/3125 [53:21<02:14,  1.07s/it]
+2025-06-11 03:38:20,737 - __main__ - INFO - Epoch [26/30], Step [3000/3125], Loss: 2.2614, Perplexity: 9.5964
+Epoch 26 Training:  99%|█████████▉| 3099/3125 [55:08<00:27,  1.05s/it]
+2025-06-11 03:40:07,482 - __main__ - INFO - Epoch [26/30], Step [3100/3125], Loss: 2.3379, Perplexity: 10.3591
+Epoch 26 Training: 100%|██████████| 3125/3125 [55:36<00:00,  1.07s/it]
+2025-06-11 03:40:34,240 - __main__ - INFO - Epoch 26 Training finished. Avg Loss: 2.2387, Time: 3336.46s
+Validation: 100%|██████████| 391/391 [12:57<00:00,  1.99s/it]
+2025-06-11 03:53:32,177 - __main__ - INFO - Validation Avg Loss: 2.4963, Perplexity: 12.1371
+2025-06-11 03:53:42,502 - __main__ - INFO - Validation BLEU-4: 0.1031
+2025-06-11 03:53:42,503 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 03:53:42,769 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 03:53:43,136 - __main__ - INFO - Python garbage collector run.
+Epoch 27 Training:   3%|▎         | 99/3125 [01:49<54:23,  1.08s/it]
+2025-06-11 03:55:33,471 - __main__ - INFO - Epoch [27/30], Step [100/3125], Loss: 2.4510, Perplexity: 11.6003
+Epoch 27 Training:   6%|▋         | 199/3125 [03:35<52:21,  1.07s/it]
+2025-06-11 03:57:19,933 - __main__ - INFO - Epoch [27/30], Step [200/3125], Loss: 2.1434, Perplexity: 8.5288
+Epoch 27 Training:  10%|▉         | 299/3125 [05:22<50:05,  1.06s/it]
+2025-06-11 03:59:06,702 - __main__ - INFO - Epoch [27/30], Step [300/3125], Loss: 2.2218, Perplexity: 9.2243
+Epoch 27 Training:  13%|█▎        | 399/3125 [07:09<48:36,  1.07s/it]
+2025-06-11 04:00:53,717 - __main__ - INFO - Epoch [27/30], Step [400/3125], Loss: 2.3096, Perplexity: 10.0700
+Epoch 27 Training:  16%|█▌        | 499/3125 [08:56<46:17,  1.06s/it]
+2025-06-11 04:02:40,535 - __main__ - INFO - Epoch [27/30], Step [500/3125], Loss: 1.9651, Perplexity: 7.1359
+Epoch 27 Training:  19%|█▉        | 599/3125 [10:42<45:15,  1.08s/it]
+2025-06-11 04:04:27,128 - __main__ - INFO - Epoch [27/30], Step [600/3125], Loss: 2.1665, Perplexity: 8.7279
+Epoch 27 Training:  22%|██▏       | 699/3125 [12:29<44:25,  1.10s/it]
+2025-06-11 04:06:13,909 - __main__ - INFO - Epoch [27/30], Step [700/3125], Loss: 1.9937, Perplexity: 7.3428
+Epoch 27 Training:  26%|██▌       | 799/3125 [14:16<41:57,  1.08s/it]
+2025-06-11 04:08:01,161 - __main__ - INFO - Epoch [27/30], Step [800/3125], Loss: 2.3485, Perplexity: 10.4698
+Epoch 27 Training:  29%|██▉       | 899/3125 [16:03<39:14,  1.06s/it]
+2025-06-11 04:09:47,364 - __main__ - INFO - Epoch [27/30], Step [900/3125], Loss: 2.2921, Perplexity: 9.8954
+Epoch 27 Training:  32%|███▏      | 999/3125 [17:49<38:03,  1.07s/it]
+2025-06-11 04:11:34,152 - __main__ - INFO - Epoch [27/30], Step [1000/3125], Loss: 2.1013, Perplexity: 8.1766
+Epoch 27 Training:  35%|███▌      | 1099/3125 [19:36<36:00,  1.07s/it]
+2025-06-11 04:13:20,882 - __main__ - INFO - Epoch [27/30], Step [1100/3125], Loss: 2.0653, Perplexity: 7.8879
+Epoch 27 Training:  38%|███▊      | 1199/3125 [21:23<33:38,  1.05s/it]
+2025-06-11 04:15:07,329 - __main__ - INFO - Epoch [27/30], Step [1200/3125], Loss: 2.2314, Perplexity: 9.3128
+Epoch 27 Training:  42%|████▏     | 1299/3125 [23:09<32:06,  1.05s/it]
+2025-06-11 04:16:53,655 - __main__ - INFO - Epoch [27/30], Step [1300/3125], Loss: 2.1889, Perplexity: 8.9255
+Epoch 27 Training:  45%|████▍     | 1399/3125 [24:56<30:39,  1.07s/it]
+2025-06-11 04:18:40,202 - __main__ - INFO - Epoch [27/30], Step [1400/3125], Loss: 2.2393, Perplexity: 9.3871
+Epoch 27 Training:  48%|████▊     | 1499/3125 [26:42<28:32,  1.05s/it]
+2025-06-11 04:20:27,025 - __main__ - INFO - Epoch [27/30], Step [1500/3125], Loss: 2.0922, Perplexity: 8.1025
+Epoch 27 Training:  51%|██��██     | 1599/3125 [28:29<27:12,  1.07s/it]
+2025-06-11 04:22:13,704 - __main__ - INFO - Epoch [27/30], Step [1600/3125], Loss: 2.0977, Perplexity: 8.1471
+Epoch 27 Training:  54%|█████▍    | 1699/3125 [30:15<25:23,  1.07s/it]
+2025-06-11 04:24:00,090 - __main__ - INFO - Epoch [27/30], Step [1700/3125], Loss: 2.1646, Perplexity: 8.7110
+Epoch 27 Training:  58%|█████▊    | 1799/3125 [32:02<23:43,  1.07s/it]
+2025-06-11 04:25:46,973 - __main__ - INFO - Epoch [27/30], Step [1800/3125], Loss: 2.0869, Perplexity: 8.0598
+Epoch 27 Training:  61%|██████    | 1899/3125 [33:49<22:11,  1.09s/it]
+2025-06-11 04:27:34,140 - __main__ - INFO - Epoch [27/30], Step [1900/3125], Loss: 2.2150, Perplexity: 9.1616
+Epoch 27 Training:  64%|██████▍   | 1999/3125 [35:36<19:52,  1.06s/it]
+2025-06-11 04:29:20,586 - __main__ - INFO - Epoch [27/30], Step [2000/3125], Loss: 2.3303, Perplexity: 10.2812
+Epoch 27 Training:  67%|██████▋   | 2099/3125 [37:23<18:15,  1.07s/it]
+2025-06-11 04:31:07,237 - __main__ - INFO - Epoch [27/30], Step [2100/3125], Loss: 2.0849, Perplexity: 8.0437
+Epoch 27 Training:  70%|███████   | 2199/3125 [39:10<16:12,  1.05s/it]
+2025-06-11 04:32:54,360 - __main__ - INFO - Epoch [27/30], Step [2200/3125], Loss: 2.1256, Perplexity: 8.3781
+Epoch 27 Training:  74%|███████▎  | 2299/3125 [40:56<14:43,  1.07s/it]
+2025-06-11 04:34:41,095 - __main__ - INFO - Epoch [27/30], Step [2300/3125], Loss: 2.4150, Perplexity: 11.1895
+Epoch 27 Training:  77%|███████▋  | 2399/3125 [42:42<12:44,  1.05s/it]
+2025-06-11 04:36:26,954 - __main__ - INFO - Epoch [27/30], Step [2400/3125], Loss: 2.2232, Perplexity: 9.2372
+Epoch 27 Training:  80%|███████▉  | 2499/3125 [44:29<11:00,  1.05s/it]
+2025-06-11 04:38:13,296 - __main__ - INFO - Epoch [27/30], Step [2500/3125], Loss: 2.0479, Perplexity: 7.7517
+Epoch 27 Training:  83%|████████▎ | 2599/3125 [46:15<09:15,  1.06s/it]
+2025-06-11 04:39:59,872 - __main__ - INFO - Epoch [27/30], Step [2600/3125], Loss: 2.2103, Perplexity: 9.1183
+Epoch 27 Training:  86%|████████▋ | 2699/3125 [48:02<07:37,  1.07s/it]
+2025-06-11 04:41:46,260 - __main__ - INFO - Epoch [27/30], Step [2700/3125], Loss: 2.0779, Perplexity: 7.9875
+Epoch 27 Training:  90%|████████▉ | 2799/3125 [49:48<05:47,  1.06s/it]
+2025-06-11 04:43:32,947 - __main__ - INFO - Epoch [27/30], Step [2800/3125], Loss: 2.2260, Perplexity: 9.2629
+Epoch 27 Training:  93%|█████████▎| 2899/3125 [51:35<03:58,  1.05s/it]
+2025-06-11 04:45:19,449 - __main__ - INFO - Epoch [27/30], Step [2900/3125], Loss: 1.9721, Perplexity: 7.1857
+Epoch 27 Training:  96%|█████████▌| 2999/3125 [53:21<02:17,  1.09s/it]
+2025-06-11 04:47:05,517 - __main__ - INFO - Epoch [27/30], Step [3000/3125], Loss: 2.0447, Perplexity: 7.7268
+Epoch 27 Training:  99%|█████████▉| 3099/3125 [55:07<00:27,  1.06s/it]
+2025-06-11 04:48:52,184 - __main__ - INFO - Epoch [27/30], Step [3100/3125], Loss: 2.3461, Perplexity: 10.4452
+Epoch 27 Training: 100%|██████████| 3125/3125 [55:35<00:00,  1.07s/it]
+2025-06-11 04:49:19,002 - __main__ - INFO - Epoch 27 Training finished. Avg Loss: 2.1767, Time: 3335.86s
+Validation: 100%|██████████| 391/391 [12:56<00:00,  1.99s/it]
+2025-06-11 05:02:15,213 - __main__ - INFO - Validation Avg Loss: 2.5049, Perplexity: 12.2423
+2025-06-11 05:02:25,331 - __main__ - INFO - Validation BLEU-4: 0.1046
+2025-06-11 05:02:25,332 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 05:02:25,616 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 05:02:25,976 - __main__ - INFO - Python garbage collector run.
+Epoch 28 Training:   3%|▎         | 99/3125 [01:49<55:22,  1.10s/it]
+2025-06-11 05:04:16,514 - __main__ - INFO - Epoch [28/30], Step [100/3125], Loss: 2.3226, Perplexity: 10.2017
+Epoch 28 Training:   6%|▋         | 199/3125 [03:36<53:05,  1.09s/it]
+2025-06-11 05:06:03,370 - __main__ - INFO - Epoch [28/30], Step [200/3125], Loss: 2.0446, Perplexity: 7.7264
+Epoch 28 Training:  10%|▉         | 299/3125 [05:22<49:38,  1.05s/it]
+2025-06-11 05:07:49,943 - __main__ - INFO - Epoch [28/30], Step [300/3125], Loss: 2.1842, Perplexity: 8.8834
+Epoch 28 Training:  13%|█▎        | 399/3125 [07:09<49:08,  1.08s/it]
+2025-06-11 05:09:36,606 - __main__ - INFO - Epoch [28/30], Step [400/3125], Loss: 2.1027, Perplexity: 8.1881
+Epoch 28 Training:  16%|█▌        | 499/3125 [08:56<46:24,  1.06s/it]
+2025-06-11 05:11:23,149 - __main__ - INFO - Epoch [28/30], Step [500/3125], Loss: 2.0650, Perplexity: 7.8852
+Epoch 28 Training:  19%|█▉        | 599/3125 [10:43<45:02,  1.07s/it]
+2025-06-11 05:13:10,157 - __main__ - INFO - Epoch [28/30], Step [600/3125], Loss: 2.1477, Perplexity: 8.5651
+Epoch 28 Training:  22%|██▏       | 699/3125 [12:29<42:30,  1.05s/it]
+2025-06-11 05:14:56,988 - __main__ - INFO - Epoch [28/30], Step [700/3125], Loss: 2.2763, Perplexity: 9.7404
+Epoch 28 Training:  26%|██▌       | 799/3125 [14:17<43:15,  1.12s/it]
+2025-06-11 05:16:44,106 - __main__ - INFO - Epoch [28/30], Step [800/3125], Loss: 2.0723, Perplexity: 7.9430
+Epoch 28 Training:  29%|██▉       | 899/3125 [16:04<40:10,  1.08s/it]
+2025-06-11 05:18:31,089 - __main__ - INFO - Epoch [28/30], Step [900/3125], Loss: 2.2289, Perplexity: 9.2896
+Epoch 28 Training:  32%|███▏      | 999/3125 [17:50<38:19,  1.08s/it]
+2025-06-11 05:20:17,225 - __main__ - INFO - Epoch [28/30], Step [1000/3125], Loss: 2.0397, Perplexity: 7.6886
+Epoch 28 Training:  35%|███▌      | 1099/3125 [19:36<36:10,  1.07s/it]
+2025-06-11 05:22:03,933 - __main__ - INFO - Epoch [28/30], Step [1100/3125], Loss: 2.0839, Perplexity: 8.0357
+Epoch 28 Training:  38%|███▊      | 1199/3125 [21:23<34:02,  1.06s/it]
+2025-06-11 05:23:50,178 - __main__ - INFO - Epoch [28/30], Step [1200/3125], Loss: 2.1007, Perplexity: 8.1722
+Epoch 28 Training:  42%|████▏     | 1299/3125 [23:09<32:02,  1.05s/it]
+2025-06-11 05:25:36,305 - __main__ - INFO - Epoch [28/30], Step [1300/3125], Loss: 2.0338, Perplexity: 7.6428
+Epoch 28 Training:  45%|████▍     | 1399/3125 [24:55<30:44,  1.07s/it]
+2025-06-11 05:27:23,028 - __main__ - INFO - Epoch [28/30], Step [1400/3125], Loss: 2.2426, Perplexity: 9.4181
+Epoch 28 Training:  48%|████▊     | 1499/3125 [26:42<29:24,  1.09s/it]
+2025-06-11 05:29:09,790 - __main__ - INFO - Epoch [28/30], Step [1500/3125], Loss: 2.0902, Perplexity: 8.0865
+Epoch 28 Training:  51%|█████     | 1599/3125 [28:29<27:14,  1.07s/it]
+2025-06-11 05:30:56,617 - __main__ - INFO - Epoch [28/30], Step [1600/3125], Loss: 2.0530, Perplexity: 7.7911
+Epoch 28 Training:  54%|█████▍    | 1699/3125 [30:15<25:52,  1.09s/it]
+2025-06-11 05:32:42,683 - __main__ - INFO - Epoch [28/30], Step [1700/3125], Loss: 2.1393, Perplexity: 8.4933
+Epoch 28 Training:  58%|█████▊    | 1799/3125 [32:01<23:19,  1.06s/it]
+2025-06-11 05:34:28,982 - __main__ - INFO - Epoch [28/30], Step [1800/3125], Loss: 2.2889, Perplexity: 9.8640
+Epoch 28 Training:  61%|██████    | 1899/3125 [33:48<21:40,  1.06s/it]
+2025-06-11 05:36:15,579 - __main__ - INFO - Epoch [28/30], Step [1900/3125], Loss: 2.0841, Perplexity: 8.0370
+Epoch 28 Training:  64%|██████▍   | 1999/3125 [35:34<19:31,  1.04s/it]
+2025-06-11 05:38:01,967 - __main__ - INFO - Epoch [28/30], Step [2000/3125], Loss: 2.1644, Perplexity: 8.7093
+Epoch 28 Training:  67%|██████▋   | 2099/3125 [37:20<18:02,  1.06s/it]
+2025-06-11 05:39:47,966 - __main__ - INFO - Epoch [28/30], Step [2100/3125], Loss: 2.3094, Perplexity: 10.0683
+Epoch 28 Training:  70%|███████   | 2199/3125 [39:07<16:35,  1.08s/it]
+2025-06-11 05:41:34,146 - __main__ - INFO - Epoch [28/30], Step [2200/3125], Loss: 2.1274, Perplexity: 8.3931
+Epoch 28 Training:  74%|███████▎  | 2299/3125 [40:53<14:40,  1.07s/it]
+2025-06-11 05:43:20,302 - __main__ - INFO - Epoch [28/30], Step [2300/3125], Loss: 2.1598, Perplexity: 8.6696
+Epoch 28 Training:  77%|███████▋  | 2399/3125 [42:39<12:56,  1.07s/it]
+2025-06-11 05:45:06,835 - __main__ - INFO - Epoch [28/30], Step [2400/3125], Loss: 2.0426, Perplexity: 7.7105
+Epoch 28 Training:  80%|███████▉  | 2499/3125 [44:26<11:20,  1.09s/it]
+2025-06-11 05:46:53,147 - __main__ - INFO - Epoch [28/30], Step [2500/3125], Loss: 2.0391, Perplexity: 7.6833
+Epoch 28 Training:  83%|████████▎ | 2599/3125 [46:12<09:33,  1.09s/it]
+2025-06-11 05:48:39,287 - __main__ - INFO - Epoch [28/30], Step [2600/3125], Loss: 2.1521, Perplexity: 8.6025
+Epoch 28 Training:  86%|████████▋ | 2699/3125 [47:58<07:33,  1.06s/it]
+2025-06-11 05:50:25,247 - __main__ - INFO - Epoch [28/30], Step [2700/3125], Loss: 2.0867, Perplexity: 8.0583
+Epoch 28 Training:  90%|████████▉ | 2799/3125 [49:43<05:47,  1.07s/it]
+2025-06-11 05:52:10,744 - __main__ - INFO - Epoch [28/30], Step [2800/3125], Loss: 1.9385, Perplexity: 6.9486
+Epoch 28 Training:  93%|█████████▎| 2899/3125 [51:29<03:58,  1.06s/it]
+2025-06-11 05:53:56,657 - __main__ - INFO - Epoch [28/30], Step [2900/3125], Loss: 2.1660, Perplexity: 8.7234
+Epoch 28 Training:  96%|█████████▌| 2999/3125 [53:15<02:13,  1.06s/it]
+2025-06-11 05:55:42,755 - __main__ - INFO - Epoch [28/30], Step [3000/3125], Loss: 2.3245, Perplexity: 10.2219
+Epoch 28 Training:  99%|█████████▉| 3099/3125 [55:01<00:27,  1.05s/it]
+2025-06-11 05:57:28,701 - __main__ - INFO - Epoch [28/30], Step [3100/3125], Loss: 2.1882, Perplexity: 8.9189
+Epoch 28 Training: 100%|██████████| 3125/3125 [55:29<00:00,  1.07s/it]
+2025-06-11 05:57:55,002 - __main__ - INFO - Epoch 28 Training finished. Avg Loss: 2.1508, Time: 3329.03s
+Validation: 100%|██████████| 391/391 [11:34<00:00,  1.78s/it]
+2025-06-11 06:09:29,731 - __main__ - INFO - Validation Avg Loss: 2.5089, Perplexity: 12.2919
+2025-06-11 06:09:38,926 - __main__ - INFO - Validation BLEU-4: 0.1039
+2025-06-11 06:09:38,927 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 06:09:39,197 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 06:09:39,519 - __main__ - INFO - Python garbage collector run.
+Epoch 29 Training:   3%|▎         | 99/3125 [01:47<53:33,  1.06s/it]
+2025-06-11 06:11:28,524 - __main__ - INFO - Epoch [29/30], Step [100/3125], Loss: 1.8376, Perplexity: 6.2816
+Epoch 29 Training:   6%|▋         | 199/3125 [03:34<50:54,  1.04s/it]
+2025-06-11 06:13:14,677 - __main__ - INFO - Epoch [29/30], Step [200/3125], Loss: 2.0208, Perplexity: 7.5444
+Epoch 29 Training:  10%|▉         | 299/3125 [05:20<49:39,  1.05s/it]
+2025-06-11 06:15:00,766 - __main__ - INFO - Epoch [29/30], Step [300/3125], Loss: 2.4057, Perplexity: 11.0859
+Epoch 29 Training:  13%|█▎        | 399/3125 [07:05<47:27,  1.04s/it]
+2025-06-11 06:16:46,520 - __main__ - INFO - Epoch [29/30], Step [400/3125], Loss: 1.9454, Perplexity: 6.9966
+Epoch 29 Training:  16%|█▌        | 499/3125 [08:51<45:58,  1.05s/it]
+2025-06-11 06:18:32,435 - __main__ - INFO - Epoch [29/30], Step [500/3125], Loss: 2.0933, Perplexity: 8.1118
+Epoch 29 Training:  19%|█▉        | 599/3125 [10:37<44:17,  1.05s/it]
+2025-06-11 06:20:18,117 - __main__ - INFO - Epoch [29/30], Step [600/3125], Loss: 2.1162, Perplexity: 8.2996
+Epoch 29 Training:  22%|██▏       | 699/3125 [12:23<43:03,  1.06s/it]
+2025-06-11 06:22:03,917 - __main__ - INFO - Epoch [29/30], Step [700/3125], Loss: 2.0233, Perplexity: 7.5629
+Epoch 29 Training:  26%|██▌       | 799/3125 [14:08<41:18,  1.07s/it]
+2025-06-11 06:23:49,332 - __main__ - INFO - Epoch [29/30], Step [800/3125], Loss: 2.1161, Perplexity: 8.2987
+Epoch 29 Training:  29%|██▉       | 899/3125 [15:54<39:17,  1.06s/it]
+2025-06-11 06:25:35,462 - __main__ - INFO - Epoch [29/30], Step [900/3125], Loss: 2.3258, Perplexity: 10.2354
+Epoch 29 Training:  32%|███▏      | 999/3125 [17:40<37:19,  1.05s/it]
+2025-06-11 06:27:21,095 - __main__ - INFO - Epoch [29/30], Step [1000/3125], Loss: 2.0303, Perplexity: 7.6167
+Epoch 29 Training:  35%|███▌      | 1099/3125 [19:26<35:34,  1.05s/it]
+2025-06-11 06:29:07,024 - __main__ - INFO - Epoch [29/30], Step [1100/3125], Loss: 2.2297, Perplexity: 9.2970
+Epoch 29 Training:  38%|███▊      | 1199/3125 [21:12<34:00,  1.06s/it]
+2025-06-11 06:30:53,597 - __main__ - INFO - Epoch [29/30], Step [1200/3125], Loss: 2.1602, Perplexity: 8.6727
+Epoch 29 Training:  42%|████▏     | 1299/3125 [22:58<32:18,  1.06s/it]
+2025-06-11 06:32:39,350 - __main__ - INFO - Epoch [29/30], Step [1300/3125], Loss: 2.0937, Perplexity: 8.1153
+Epoch 29 Training:  45%|████▍     | 1399/3125 [24:44<31:07,  1.08s/it]
+2025-06-11 06:34:25,480 - __main__ - INFO - Epoch [29/30], Step [1400/3125], Loss: 2.1163, Perplexity: 8.3006
+Epoch 29 Training:  48%|████▊     | 1499/3125 [26:30<28:14,  1.04s/it]
+2025-06-11 06:36:10,724 - __main__ - INFO - Epoch [29/30], Step [1500/3125], Loss: 2.1770, Perplexity: 8.8202
+Epoch 29 Training:  51%|█████     | 1599/3125 [28:16<26:56,  1.06s/it]
+2025-06-11 06:37:56,930 - __main__ - INFO - Epoch [29/30], Step [1600/3125], Loss: 2.0694, Perplexity: 7.9199
+Epoch 29 Training:  54%|█████▍    | 1699/3125 [30:01<24:52,  1.05s/it]
+2025-06-11 06:39:42,561 - __main__ - INFO - Epoch [29/30], Step [1700/3125], Loss: 2.2901, Perplexity: 9.8764
+Epoch 29 Training:  58%|█████▊    | 1799/3125 [31:48<23:39,  1.07s/it]
+2025-06-11 06:41:28,770 - __main__ - INFO - Epoch [29/30], Step [1800/3125], Loss: 2.0297, Perplexity: 7.6121
+Epoch 29 Training:  61%|██████    | 1899/3125 [33:33<21:21,  1.05s/it]
+2025-06-11 06:43:13,871 - __main__ - INFO - Epoch [29/30], Step [1900/3125], Loss: 1.9835, Perplexity: 7.2681
+Epoch 29 Training:  64%|██████▍   | 1999/3125 [35:18<19:36,  1.05s/it]
+2025-06-11 06:44:59,278 - __main__ - INFO - Epoch [29/30], Step [2000/3125], Loss: 2.1101, Perplexity: 8.2492
+Epoch 29 Training:  67%|██████▋   | 2099/3125 [37:04<17:57,  1.05s/it]
+2025-06-11 06:46:45,203 - __main__ - INFO - Epoch [29/30], Step [2100/3125], Loss: 2.0714, Perplexity: 7.9361
+Epoch 29 Training:  70%|███████   | 2199/3125 [38:50<16:17,  1.06s/it]
+2025-06-11 06:48:30,789 - __main__ - INFO - Epoch [29/30], Step [2200/3125], Loss: 2.3346, Perplexity: 10.3258
+Epoch 29 Training:  74%|███████▎  | 2299/3125 [40:36<14:44,  1.07s/it]
+2025-06-11 06:50:16,941 - __main__ - INFO - Epoch [29/30], Step [2300/3125], Loss: 2.1250, Perplexity: 8.3732
+Epoch 29 Training:  77%|███████▋  | 2399/3125 [42:22<12:51,  1.06s/it]
+2025-06-11 06:52:02,985 - __main__ - INFO - Epoch [29/30], Step [2400/3125], Loss: 2.2709, Perplexity: 9.6881
+Epoch 29 Training:  80%|███████▉  | 2499/3125 [44:08<10:55,  1.05s/it]
+2025-06-11 06:53:49,065 - __main__ - INFO - Epoch [29/30], Step [2500/3125], Loss: 2.1652, Perplexity: 8.7159
+Epoch 29 Training:  83%|████████▎ | 2599/3125 [45:54<09:09,  1.05s/it]
+2025-06-11 06:55:35,402 - __main__ - INFO - Epoch [29/30], Step [2600/3125], Loss: 2.4139, Perplexity: 11.1778
+Epoch 29 Training:  86%|████████▋ | 2699/3125 [47:40<07:31,  1.06s/it]
+2025-06-11 06:57:21,309 - __main__ - INFO - Epoch [29/30], Step [2700/3125], Loss: 2.1402, Perplexity: 8.5011
+Epoch 29 Training:  90%|████████▉ | 2799/3125 [49:26<05:43,  1.05s/it]
+2025-06-11 06:59:07,135 - __main__ - INFO - Epoch [29/30], Step [2800/3125], Loss: 2.0135, Perplexity: 7.4895
+Epoch 29 Training:  93%|█████████▎| 2899/3125 [51:12<03:56,  1.05s/it]
+2025-06-11 07:00:52,852 - __main__ - INFO - Epoch [29/30], Step [2900/3125], Loss: 2.1379, Perplexity: 8.4812
+Epoch 29 Training:  96%|█████████▌| 2999/3125 [52:58<02:17,  1.09s/it]
+2025-06-11 07:02:39,118 - __main__ - INFO - Epoch [29/30], Step [3000/3125], Loss: 2.2424, Perplexity: 9.4156
+Epoch 29 Training:  99%|█████████▉| 3099/3125 [54:44<00:27,  1.05s/it]
+2025-06-11 07:04:24,904 - __main__ - INFO - Epoch [29/30], Step [3100/3125], Loss: 2.1572, Perplexity: 8.6465
+Epoch 29 Training: 100%|██████████| 3125/3125 [55:11<00:00,  1.06s/it]
+2025-06-11 07:04:51,485 - __main__ - INFO - Epoch 29 Training finished. Avg Loss: 2.1306, Time: 3311.96s
+Validation: 100%|██████████| 391/391 [11:46<00:00,  1.81s/it]
+2025-06-11 07:16:38,260 - __main__ - INFO - Validation Avg Loss: 2.5210, Perplexity: 12.4409
+2025-06-11 07:16:47,455 - __main__ - INFO - Validation BLEU-4: 0.1032
+2025-06-11 07:16:47,456 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 07:16:47,750 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 07:16:48,102 - __main__ - INFO - Python garbage collector run.
+Epoch 30 Training:   3%|▎         | 99/3125 [01:47<52:54,  1.05s/it]
+2025-06-11 07:18:36,828 - __main__ - INFO - Epoch [30/30], Step [100/3125], Loss: 2.1938, Perplexity: 8.9692
+Epoch 30 Training:   6%|▋         | 199/3125 [03:34<51:39,  1.06s/it]
+2025-06-11 07:20:23,162 - __main__ - INFO - Epoch [30/30], Step [200/3125], Loss: 2.1014, Perplexity: 8.1780
+Epoch 30 Training:  10%|▉         | 299/3125 [05:19<49:32,  1.05s/it]
+2025-06-11 07:22:08,720 - __main__ - INFO - Epoch [30/30], Step [300/3125], Loss: 2.2413, Perplexity: 9.4058
+Epoch 30 Training:  13%|█▎        | 399/3125 [07:05<48:04,  1.06s/it]
+2025-06-11 07:23:54,826 - __main__ - INFO - Epoch [30/30], Step [400/3125], Loss: 2.0270, Perplexity: 7.5915
+Epoch 30 Training:  16%|█▌        | 499/3125 [08:51<46:34,  1.06s/it]
+2025-06-11 07:25:40,842 - __main__ - INFO - Epoch [30/30], Step [500/3125], Loss: 1.9092, Perplexity: 6.7479
+Epoch 30 Training:  19%|█▉        | 599/3125 [10:37<44:13,  1.05s/it]
+2025-06-11 07:27:26,885 - __main__ - INFO - Epoch [30/30], Step [600/3125], Loss: 2.1688, Perplexity: 8.7480
+Epoch 30 Training:  22%|██▏       | 699/3125 [12:23<44:03,  1.09s/it]
+2025-06-11 07:29:13,137 - __main__ - INFO - Epoch [30/30], Step [700/3125], Loss: 2.1492, Perplexity: 8.5776
+Epoch 30 Training:  26%|██▌       | 799/3125 [14:10<41:12,  1.06s/it]
+2025-06-11 07:30:59,160 - __main__ - INFO - Epoch [30/30], Step [800/3125], Loss: 2.1651, Perplexity: 8.7158
+Epoch 30 Training:  29%|██▉       | 899/3125 [15:55<39:10,  1.06s/it]
+2025-06-11 07:32:44,756 - __main__ - INFO - Epoch [30/30], Step [900/3125], Loss: 2.0124, Perplexity: 7.4812
+Epoch 30 Training:  32%|███▏      | 999/3125 [17:41<37:03,  1.05s/it]
+2025-06-11 07:34:30,665 - __main__ - INFO - Epoch [30/30], Step [1000/3125], Loss: 2.3341, Perplexity: 10.3199
+Epoch 30 Training:  35%|███▌      | 1099/3125 [19:27<35:36,  1.05s/it]
+2025-06-11 07:36:16,441 - __main__ - INFO - Epoch [30/30], Step [1100/3125], Loss: 2.0736, Perplexity: 7.9533
+Epoch 30 Training:  38%|███▊      | 1199/3125 [21:13<34:42,  1.08s/it]
+2025-06-11 07:38:02,427 - __main__ - INFO - Epoch [30/30], Step [1200/3125], Loss: 2.2919, Perplexity: 9.8934
+Epoch 30 Training:  42%|████▏     | 1299/3125 [22:59<32:05,  1.05s/it]
+2025-06-11 07:39:48,493 - __main__ - INFO - Epoch [30/30], Step [1300/3125], Loss: 1.8976, Perplexity: 6.6700
+Epoch 30 Training:  45%|████▍     | 1399/3125 [24:45<30:39,  1.07s/it]
+2025-06-11 07:41:34,729 - __main__ - INFO - Epoch [30/30], Step [1400/3125], Loss: 2.1037, Perplexity: 8.1963
+Epoch 30 Training:  48%|████▊     | 1499/3125 [26:31<28:54,  1.07s/it]
+2025-06-11 07:43:20,236 - __main__ - INFO - Epoch [30/30], Step [1500/3125], Loss: 1.9812, Perplexity: 7.2515
+Epoch 30 Training:  51%|█████     | 1599/3125 [28:16<26:55,  1.06s/it]
+2025-06-11 07:45:05,806 - __main__ - INFO - Epoch [30/30], Step [1600/3125], Loss: 1.8422, Perplexity: 6.3105
+Epoch 30 Training:  54%|█████▍    | 1699/3125 [30:01<24:47,  1.04s/it]
+2025-06-11 07:46:51,077 - __main__ - INFO - Epoch [30/30], Step [1700/3125], Loss: 2.1148, Perplexity: 8.2879
+Epoch 30 Training:  58%|█████▊    | 1799/3125 [31:47<23:33,  1.07s/it]
+2025-06-11 07:48:36,971 - __main__ - INFO - Epoch [30/30], Step [1800/3125], Loss: 2.0392, Perplexity: 7.6841
+Epoch 30 Training:  61%|██████    | 1899/3125 [33:33<21:36,  1.06s/it]
+2025-06-11 07:50:22,901 - __main__ - INFO - Epoch [30/30], Step [1900/3125], Loss: 2.1974, Perplexity: 9.0015
+Epoch 30 Training:  64%|██████▍   | 1999/3125 [35:20<20:00,  1.07s/it]
+2025-06-11 07:52:09,317 - __main__ - INFO - Epoch [30/30], Step [2000/3125], Loss: 1.9900, Perplexity: 7.3157
+Epoch 30 Training:  67%|██████▋   | 2099/3125 [37:06<18:26,  1.08s/it]
+2025-06-11 07:53:55,343 - __main__ - INFO - Epoch [30/30], Step [2100/3125], Loss: 2.1626, Perplexity: 8.6936
+Epoch 30 Training:  70%|███████   | 2199/3125 [38:51<16:06,  1.04s/it]
+2025-06-11 07:55:40,879 - __main__ - INFO - Epoch [30/30], Step [2200/3125], Loss: 2.0304, Perplexity: 7.6174
+Epoch 30 Training:  74%|███████▎  | 2299/3125 [40:37<14:34,  1.06s/it]
+2025-06-11 07:57:26,744 - __main__ - INFO - Epoch [30/30], Step [2300/3125], Loss: 2.1228, Perplexity: 8.3543
+Epoch 30 Training:  77%|███████▋  | 2399/3125 [42:22<12:59,  1.07s/it]
+2025-06-11 07:59:12,098 - __main__ - INFO - Epoch [30/30], Step [2400/3125], Loss: 2.1954, Perplexity: 8.9837
+Epoch 30 Training:  80%|███████▉  | 2499/3125 [44:08<10:49,  1.04s/it]
+2025-06-11 08:00:57,947 - __main__ - INFO - Epoch [30/30], Step [2500/3125], Loss: 2.1629, Perplexity: 8.6964
+Epoch 30 Training:  83%|████████▎ | 2599/3125 [45:55<09:26,  1.08s/it]
+2025-06-11 08:02:44,387 - __main__ - INFO - Epoch [30/30], Step [2600/3125], Loss: 2.4293, Perplexity: 11.3511
+Epoch 30 Training:  86%|████████▋ | 2699/3125 [47:41<07:26,  1.05s/it]
+2025-06-11 08:04:30,330 - __main__ - INFO - Epoch [30/30], Step [2700/3125], Loss: 1.8879, Perplexity: 6.6057
+Epoch 30 Training:  90%|████████▉ | 2799/3125 [49:27<05:51,  1.08s/it]
+2025-06-11 08:06:16,399 - __main__ - INFO - Epoch [30/30], Step [2800/3125], Loss: 2.2829, Perplexity: 9.8055
+Epoch 30 Training:  93%|█████████▎| 2899/3125 [51:13<04:01,  1.07s/it]
+2025-06-11 08:08:02,721 - __main__ - INFO - Epoch [30/30], Step [2900/3125], Loss: 2.2601, Perplexity: 9.5845
+Epoch 30 Training:  96%|█████████▌| 2999/3125 [53:00<02:13,  1.06s/it]
+2025-06-11 08:09:49,266 - __main__ - INFO - Epoch [30/30], Step [3000/3125], Loss: 1.9881, Perplexity: 7.3020
+Epoch 30 Training:  99%|█████████▉| 3099/3125 [54:46<00:27,  1.06s/it]
+2025-06-11 08:11:35,190 - __main__ - INFO - Epoch [30/30], Step [3100/3125], Loss: 2.1743, Perplexity: 8.7960
+Epoch 30 Training: 100%|██████████| 3125/3125 [55:13<00:00,  1.06s/it]
+2025-06-11 08:12:01,688 - __main__ - INFO - Epoch 30 Training finished. Avg Loss: 2.1133, Time: 3313.58s
+Validation: 100%|██████████| 391/391 [11:49<00:00,  1.81s/it]
+2025-06-11 08:23:51,320 - __main__ - INFO - Validation Avg Loss: 2.5266, Perplexity: 12.5108
+2025-06-11 08:24:00,605 - __main__ - INFO - Validation BLEU-4: 0.1039
+2025-06-11 08:24:01,277 - __main__ - INFO - Saved periodic model checkpoint to ./output/model_epoch_30.pth
+2025-06-11 08:24:01,278 - __main__ - INFO - Performing memory optimization after epoch...
+2025-06-11 08:24:01,543 - __main__ - INFO - CUDA cache emptied.
+2025-06-11 08:24:01,902 - __main__ - INFO - Python garbage collector run.
+2025-06-11 08:24:01,904 - __main__ - INFO - Training complete.
+2025-06-11 08:24:01,904 - __main__ - INFO - Model Training Complete!

text_files/training_metrics.csv ADDED Viewed

	@@ -0,0 +1,1190 @@

+Epoch,Step,Loss,Perplexity
+1,100,5.4706,237.609
+1,200,5.1889,179.2651
+1,300,4.7951,120.9148
+1,400,4.1438,63.0419
+1,500,4.0644,58.2316
+1,600,4.1386,62.7179
+1,700,4.4148,82.6655
+1,800,4.0775,58.9954
+1,900,3.8928,49.0485
+1,1000,3.7069,40.7267
+1,1100,3.7165,41.1201
+1,1200,3.6987,40.396
+1,1300,3.8443,46.7262
+1,1400,3.7788,43.7654
+1,1500,3.9427,51.5586
+1,1600,3.9204,50.4209
+1,1700,3.6418,38.16
+1,1800,3.5172,33.6913
+1,1900,3.6409,38.1244
+1,2000,3.2969,27.0292
+1,2100,3.4517,31.5544
+1,2200,3.492,32.8529
+1,2300,3.7028,40.5603
+1,2400,3.6794,39.6238
+1,2500,3.4977,33.0382
+1,2600,3.4883,32.7311
+1,2700,3.4596,31.8036
+1,2800,3.4043,30.092
+1,2900,3.1842,24.1488
+1,3000,3.2843,26.6914
+1,3100,3.195,24.41
+2,100,3.1401,23.1065
+2,200,3.0563,21.2493
+2,300,3.2782,26.5282
+2,400,3.1834,24.1295
+2,500,3.3472,28.4244
+2,600,3.1609,23.5928
+2,700,2.9984,20.0538
+2,800,3.2494,25.7758
+2,900,3.1955,24.4232
+2,1000,3.4101,30.2673
+2,1100,3.3706,29.0971
+2,1200,3.168,23.7608
+2,1300,3.0362,20.8251
+2,1400,3.0925,22.0321
+2,1500,3.1057,22.3241
+2,1600,3.3277,27.873
+2,1700,3.161,23.5945
+2,1800,3.2718,26.3582
+2,1900,3.1425,23.1612
+2,2000,3.0522,21.1619
+2,2100,2.9711,19.5126
+2,2200,3.0638,21.4079
+2,2300,3.1867,24.2093
+2,2400,3.006,20.207
+2,2500,3.1718,23.8494
+2,2600,3.0628,21.3874
+2,2700,3.3898,29.6609
+2,2800,2.7914,16.3038
+2,2900,2.8758,17.7403
+2,3000,3.0766,21.6847
+2,3100,3.0787,21.7303
+3,100,3.1375,23.0468
+3,200,2.9503,19.1119
+3,300,2.9096,18.3486
+3,400,2.9213,18.5648
+3,500,2.7807,16.1304
+3,600,2.9757,19.6029
+3,700,2.7691,15.9448
+3,800,2.629,13.8597
+3,900,2.7852,16.2028
+3,1000,2.9282,18.6934
+3,1100,3.0185,20.4607
+3,1200,3.1393,23.0879
+3,1300,2.9986,20.058
+3,1400,2.849,17.2704
+3,1500,3.1596,23.561
+3,1600,2.9312,18.7505
+3,1700,2.8247,16.8555
+3,1800,2.9927,19.9386
+3,1900,2.7951,16.3638
+3,2000,2.8393,17.1032
+3,2100,3.0383,20.8706
+3,2200,3.158,23.5244
+3,2300,2.9753,19.5955
+3,2400,2.7754,16.0452
+3,2500,2.5844,13.2559
+3,2600,2.9575,19.249
+3,2700,2.7714,15.9803
+3,2800,2.6014,13.4829
+3,2900,2.924,18.6159
+3,3000,2.8361,17.0495
+3,3100,2.9557,19.2145
+4,100,2.8362,17.0513
+4,200,2.5668,13.0241
+4,300,2.7211,15.1978
+4,400,2.9763,19.6153
+4,500,2.7447,15.5594
+4,600,2.6688,14.4227
+4,700,2.756,15.737
+4,800,2.7199,15.1782
+4,900,2.9431,18.9745
+4,1000,2.7849,16.1979
+4,1100,2.6553,14.2291
+4,1200,2.7191,15.1662
+4,1300,2.7089,15.0125
+4,1400,2.9824,19.7352
+4,1500,2.7723,15.9946
+4,1600,2.8024,16.4847
+4,1700,2.7457,15.5758
+4,1800,2.9873,19.8314
+4,1900,2.6393,14.0032
+4,2000,2.6532,14.1999
+4,2100,2.7995,16.4372
+4,2200,2.7488,15.6245
+4,2300,2.8463,17.2242
+4,2400,2.6901,14.7335
+4,2500,2.889,17.9762
+4,2600,2.6551,14.227
+4,2700,2.8474,17.2437
+4,2800,2.7218,15.2075
+4,2900,2.6753,14.5169
+4,3000,2.8698,17.6342
+4,3100,2.5715,13.0854
+5,100,2.7858,16.2124
+5,200,2.5799,13.1955
+5,300,2.7141,15.091
+5,400,2.6322,13.9042
+5,500,2.992,19.9248
+5,600,2.5876,13.2972
+5,700,2.6221,13.7644
+5,800,2.5546,12.8666
+5,900,2.7899,16.2789
+5,1000,2.718,15.1505
+5,1100,2.6338,13.9268
+5,1200,2.9994,20.0732
+5,1300,2.5138,12.3518
+5,1400,2.7708,15.9718
+5,1500,2.6756,14.5208
+5,1600,2.6377,13.9804
+5,1700,2.6101,13.6009
+5,1800,2.6736,14.4918
+5,1900,2.6328,13.9123
+5,2000,2.5901,13.3318
+5,2100,2.9779,19.6469
+5,2200,2.5809,13.2096
+5,2300,2.736,15.4247
+5,2400,2.5268,12.5135
+5,2500,2.8699,17.6357
+5,2600,2.7812,16.1382
+5,2700,2.654,14.2111
+5,2800,2.6715,14.4612
+5,2900,2.878,17.7783
+5,3000,2.6441,14.0706
+5,3100,2.6745,14.5049
+6,100,2.9338,18.7981
+6,200,2.746,15.5799
+6,300,2.3894,10.9066
+6,400,2.6891,14.7191
+6,500,2.7032,14.9273
+6,600,2.6848,14.6553
+6,700,2.4991,12.1715
+6,800,2.5794,13.1892
+6,900,2.5044,12.2366
+6,1000,2.6207,13.7448
+6,1100,2.5424,12.7103
+6,1200,2.554,12.8587
+6,1300,2.5427,12.7145
+6,1400,2.6368,13.9681
+6,1500,2.5717,13.0879
+6,1600,2.4953,12.1255
+6,1700,2.73,15.3336
+6,1800,2.6784,14.5613
+6,1900,2.6738,14.4946
+6,2000,2.6148,13.6649
+6,2100,2.8018,16.4748
+6,2200,2.6942,14.794
+6,2300,2.4844,11.9935
+6,2400,2.4125,11.1615
+6,2500,2.5616,12.9564
+6,2600,2.5049,12.2425
+6,2700,2.4866,12.0206
+6,2800,2.3411,10.3931
+6,2900,2.6441,14.0713
+6,3000,2.5855,13.2695
+6,3100,2.7371,15.4419
+7,100,2.6545,14.218
+7,200,2.3907,10.9206
+7,300,2.3964,10.9837
+7,400,2.2917,9.8918
+7,500,2.546,12.7565
+7,600,2.7165,15.1276
+7,700,2.316,10.135
+7,800,2.4257,11.3097
+7,900,2.8313,16.9669
+7,1000,2.4841,11.9898
+7,1100,2.4737,11.8667
+7,1200,2.3904,10.9178
+7,1300,2.4815,11.9595
+7,1400,2.7324,15.3697
+7,1500,2.7363,15.4298
+7,1600,2.7751,16.0397
+7,1700,2.5598,12.9329
+7,1800,2.7369,15.4388
+7,1900,2.8117,16.6374
+7,2000,2.636,13.9572
+7,2100,2.4383,11.4533
+7,2200,2.7142,15.0932
+7,2300,2.459,11.6927
+7,2400,2.6457,14.0935
+7,2500,2.6708,14.4518
+7,2600,2.5663,13.0179
+7,2700,2.802,16.4769
+7,2800,2.6069,13.5576
+7,2900,2.5213,12.4453
+7,3000,2.5426,12.7128
+7,3100,2.6069,13.5576
+8,100,2.5949,13.3954
+8,200,2.3867,10.8772
+8,300,2.4636,11.7471
+8,400,2.5906,13.3381
+8,500,2.5785,13.1772
+8,600,2.5902,13.333
+8,700,2.6504,14.1599
+8,800,2.5812,13.2125
+8,900,2.3477,10.4617
+8,1000,2.5416,12.7004
+8,1100,2.5581,12.9117
+8,1200,2.6781,14.5574
+8,1300,2.4678,11.7967
+8,1400,2.7964,16.3863
+8,1500,2.5617,12.9583
+8,1600,2.4647,11.7597
+8,1700,2.5186,12.4108
+8,1800,2.7458,15.5764
+8,1900,2.4218,11.2661
+8,2000,2.4996,12.1774
+8,2100,2.4715,11.84
+8,2200,2.7304,15.3395
+8,2300,2.6088,13.5826
+8,2400,2.2952,9.9266
+8,2500,2.6801,14.5865
+8,2600,2.6453,14.0877
+8,2700,2.498,12.1576
+8,2800,2.4855,12.0065
+8,2900,2.7647,15.8748
+8,3000,2.3954,10.9722
+8,3100,2.5672,13.0291
+9,100,2.4885,12.0428
+9,200,2.4464,11.5461
+9,300,2.5567,12.8926
+9,400,2.5434,12.7234
+9,500,2.4282,11.338
+9,600,2.4907,12.0691
+9,700,2.5079,12.2789
+9,800,2.4735,11.8636
+9,900,2.5014,12.2001
+9,1000,2.7705,15.9662
+9,1100,2.6952,14.8082
+10,100,2.6771,14.5422
+10,200,2.5936,13.3779
+10,300,2.731,15.3478
+10,400,2.96,19.2978
+10,500,2.6822,14.6177
+10,600,2.7823,16.1564
+10,700,2.5442,12.7334
+10,800,2.6036,13.512
+10,900,2.46,11.7049
+10,1000,2.8091,16.5947
+10,1100,2.668,14.4108
+10,1200,2.708,14.9991
+10,1300,2.6393,14.0032
+10,1400,2.722,15.2114
+10,1500,2.9185,18.5139
+10,1600,2.7292,15.3205
+10,1700,2.7339,15.3932
+10,1800,2.4978,12.1557
+10,1900,2.5602,12.9386
+10,2000,2.6032,13.5067
+10,2100,2.6049,13.5293
+10,2200,2.8925,18.039
+10,2300,2.6391,14.0
+10,2400,2.6457,14.0939
+10,2500,2.6442,14.0721
+10,2600,2.5336,12.5989
+10,2700,2.6224,13.7686
+10,2800,2.5839,13.2486
+10,2900,2.5405,12.6863
+10,3000,2.7648,15.8761
+10,3100,2.7951,16.3647
+11,100,2.3691,10.6873
+11,200,2.4723,11.8502
+11,300,2.5049,12.242
+11,400,2.5821,13.2247
+11,500,2.9378,18.875
+11,600,2.4629,11.7391
+11,700,2.6155,13.6746
+11,800,2.4537,11.6314
+11,900,2.4852,12.003
+11,1000,2.6182,13.7113
+11,1100,2.3901,10.915
+11,1200,2.6152,13.6705
+11,1300,2.7108,15.042
+11,1400,2.5307,12.5626
+11,1500,2.541,12.6921
+11,1600,2.5365,12.6349
+11,1700,2.8819,17.8483
+11,1800,2.5707,13.0754
+11,1900,2.4161,11.2024
+11,2000,2.5705,13.0728
+11,2100,2.4674,11.7918
+11,2200,2.5938,13.3808
+11,2300,2.6236,13.7849
+11,2400,2.3626,10.6186
+11,2500,2.7776,16.08
+11,2600,2.3201,10.1771
+11,2700,2.7016,14.904
+11,2800,2.5685,13.0467
+11,2900,2.5174,12.3969
+11,3000,2.4016,11.0412
+11,3100,2.7063,14.9733
+12,100,2.4403,11.477
+12,200,2.3241,10.2174
+12,300,2.3308,10.2864
+12,400,2.5223,12.4575
+12,500,2.4984,12.1631
+12,600,2.4451,11.5312
+12,700,2.379,10.7946
+12,800,2.3688,10.6844
+12,900,2.5906,13.3372
+12,1000,2.529,12.5412
+12,1100,2.3719,10.7179
+12,1200,2.5641,12.9886
+12,1300,2.5392,12.6697
+12,1400,2.6697,14.4351
+12,1500,2.3967,10.987
+12,1600,2.6011,13.478
+12,1700,2.5272,12.5179
+12,1800,2.3498,10.4834
+12,1900,2.8234,16.8332
+12,2000,2.1771,8.8204
+12,2100,2.3431,10.4136
+12,2200,2.6427,14.0514
+12,2300,2.4172,11.2148
+12,2400,2.7323,15.3679
+12,2500,2.7351,15.4109
+12,2600,2.4624,11.7333
+12,2700,2.4864,12.018
+12,2800,2.7285,15.3102
+12,2900,2.298,9.9542
+12,3000,2.5476,12.7761
+12,3100,2.3022,9.9965
+13,100,2.4482,11.5679
+13,200,2.4197,11.2422
+13,300,2.6092,13.5885
+13,400,2.3414,10.3956
+13,500,2.321,10.1858
+13,600,2.3739,10.7388
+13,700,2.5507,12.8167
+13,800,2.4497,11.5848
+13,900,2.4555,11.6527
+13,1000,2.2727,9.7054
+13,1100,2.4161,11.2025
+13,1200,2.5078,12.2775
+13,1300,2.3249,10.2256
+13,1400,2.4165,11.2065
+13,1500,2.6065,13.5516
+13,1600,2.4175,11.2178
+13,1700,2.6172,13.6977
+13,1800,2.6982,14.8536
+13,1900,2.5895,13.3233
+13,2000,2.3281,10.2589
+13,2100,2.4475,11.5592
+13,2200,2.3961,10.9799
+13,2300,2.2835,9.8108
+13,2400,2.4577,11.6781
+13,2500,2.4646,11.7588
+13,2600,2.4594,11.6975
+13,2700,2.1887,8.9235
+13,2800,2.3643,10.6365
+13,2900,2.3395,10.3765
+13,3000,2.6757,14.5222
+13,3100,2.3261,10.2375
+14,100,2.2691,9.6705
+14,200,2.2401,9.394
+14,300,2.2769,9.7467
+14,400,2.3194,10.1698
+14,500,2.3454,10.4371
+14,600,2.3314,10.2923
+14,700,2.1478,8.5664
+14,800,2.4812,11.9557
+14,900,2.5592,12.9257
+14,1000,2.4252,11.305
+14,1100,2.428,11.3366
+14,1200,2.2805,9.7818
+14,1300,2.3755,10.756
+14,1400,2.4423,11.4991
+14,1500,2.7027,14.9202
+14,1600,2.5307,12.5619
+14,1700,2.6793,14.5745
+14,1800,2.3497,10.4828
+14,1900,2.2267,9.2696
+14,2000,2.1601,8.6721
+14,2100,2.3438,10.4212
+14,2200,2.3716,10.714
+14,2300,2.4434,11.5116
+14,2400,2.5465,12.7625
+14,2500,2.2639,9.6206
+14,2600,2.4817,11.9613
+14,2700,2.5853,13.2677
+14,2800,2.466,11.7758
+14,2900,2.219,9.1977
+14,3000,2.3681,10.6775
+14,3100,2.3602,10.593
+15,100,2.4716,11.8419
+15,200,2.2404,9.3972
+15,300,2.2223,9.2283
+15,400,2.2692,9.6715
+15,500,2.3727,10.7258
+15,600,2.368,10.6762
+15,700,2.2971,9.945
+15,800,2.2048,9.068
+15,900,2.6075,13.5645
+15,1000,2.4252,11.3049
+15,1100,2.4678,11.7969
+15,1200,2.477,11.9052
+15,1300,2.6406,14.022
+15,1400,2.5408,12.6903
+15,1500,2.2871,9.8462
+15,1600,2.2426,9.4179
+15,1700,2.4471,11.5546
+15,1800,2.2109,9.1242
+15,1900,2.2128,9.1415
+15,2000,2.4859,12.0115
+15,2100,2.1893,8.929
+15,2200,2.3444,10.4273
+15,2300,2.3961,10.9804
+15,2400,2.4591,11.6937
+15,2500,2.3786,10.7898
+15,2600,2.2436,9.4268
+15,2700,2.5708,13.0756
+15,2800,2.3898,10.9116
+15,2900,2.4656,11.7705
+15,3000,2.3119,10.0933
+15,3100,2.3714,10.7121
+16,100,2.3317,10.2956
+16,200,2.3495,10.4806
+16,300,2.2306,9.3058
+16,400,2.3325,10.3034
+16,500,2.1694,8.7528
+16,600,2.3946,10.9643
+16,700,2.3674,10.67
+16,800,2.0632,7.8708
+16,900,2.294,9.9146
+16,1000,2.2395,9.3891
+16,1100,2.4083,11.115
+16,1200,2.3857,10.8667
+16,1300,2.3296,10.2736
+16,1400,2.1133,8.2756
+16,1500,2.3756,10.7575
+16,1600,2.3544,10.5314
+16,1700,2.3411,10.3928
+16,1800,2.2282,9.2828
+16,1900,2.3572,10.5609
+16,2000,2.265,9.6316
+16,2100,2.4694,11.8158
+16,2200,2.2784,9.7614
+16,2300,2.3814,10.8201
+16,2400,2.3831,10.8388
+16,2500,2.2298,9.2982
+16,2600,2.4608,11.7146
+16,2700,2.4152,11.1916
+16,2800,2.486,12.0134
+16,2900,2.162,8.6885
+16,3000,2.6663,14.3864
+16,3100,2.514,12.3541
+17,100,2.1717,8.773
+17,100,2.3526,10.5128
+17,200,2.2953,9.9273
+17,200,2.4604,11.709
+17,300,2.1929,8.9608
+17,300,2.576,13.1449
+17,400,2.1077,8.2291
+17,400,2.5386,12.6615
+17,500,2.3065,10.0394
+17,500,2.4724,11.8508
+17,600,2.0144,7.4962
+17,600,3.0153,20.3955
+17,700,2.4351,11.4164
+17,700,2.6407,14.0232
+17,800,2.3415,10.3965
+17,800,2.5756,13.1391
+17,900,2.1439,8.5324
+17,900,2.205,9.0706
+17,1000,2.4303,11.3623
+17,1000,2.3957,10.9755
+17,1100,2.2407,9.3998
+17,1100,2.2961,9.9352
+17,1200,2.266,9.6408
+17,1200,2.2552,9.5371
+17,1300,2.2952,9.9262
+17,1300,2.5,12.1825
+17,1400,2.4064,11.0944
+17,1400,2.7185,15.1579
+17,1500,2.5295,12.5467
+17,1500,2.5459,12.7545
+17,1600,2.1637,8.703
+17,1600,2.3916,10.9307
+17,1700,2.3608,10.5998
+17,1700,2.5494,12.7994
+17,1800,2.1197,8.3285
+17,1800,2.4654,11.7682
+17,1900,2.2632,9.6134
+17,1900,2.6649,14.3659
+17,2000,2.0741,7.9572
+17,2000,2.2949,9.9233
+17,2100,2.2237,9.2414
+17,2100,2.6354,13.9489
+17,2200,2.1895,8.9311
+17,2200,2.5667,13.0228
+17,2300,2.3708,10.7057
+17,2300,2.4153,11.1936
+17,2400,2.2712,9.6909
+17,2400,2.5093,12.2963
+17,2500,2.3275,10.2521
+17,2500,2.3697,10.6938
+17,2600,2.1308,8.4219
+17,2600,2.5674,13.032
+17,2700,2.3833,10.8407
+17,2700,2.4103,11.1374
+17,2800,2.4461,11.543
+17,2800,2.4239,11.2903
+17,2900,2.06,7.8457
+17,2900,2.6061,13.5467
+17,3000,2.2095,9.1108
+17,3000,2.3188,10.1634
+17,3100,2.0986,8.1547
+17,3100,2.3833,10.8402
+18,100,2.1066,8.2205
+18,100,2.5242,12.4803
+18,200,2.1505,8.5893
+18,200,2.5674,13.0321
+18,300,2.228,9.2809
+18,300,2.4194,11.2394
+18,400,2.2427,9.4188
+18,400,2.8139,16.6747
+18,500,2.2644,9.6253
+18,500,2.3331,10.3095
+18,600,2.2968,9.9419
+18,600,2.3093,10.0678
+18,700,2.4407,11.4816
+18,700,2.3935,10.9519
+18,800,2.3881,10.8931
+18,800,2.4676,11.7936
+18,900,2.2182,9.1906
+18,900,2.4923,12.089
+18,1000,2.3312,10.2898
+18,1000,2.346,10.4438
+18,1100,2.0509,7.7749
+18,1100,2.3353,10.3324
+18,1200,2.349,10.4752
+18,1200,2.6501,14.1561
+18,1300,2.2147,9.159
+18,1300,2.5033,12.2222
+18,1400,2.3278,10.2557
+18,1400,2.5552,12.8738
+18,1500,1.9178,6.8062
+18,1500,2.1927,8.9596
+18,1600,2.2685,9.6654
+18,1600,2.497,12.1465
+18,1700,2.2014,9.0381
+18,1700,2.4935,12.1037
+18,1800,2.0424,7.7092
+18,1800,2.4783,11.9213
+18,1900,2.3529,10.5158
+18,1900,2.3436,10.4189
+18,2000,2.2688,9.6676
+18,2000,2.5853,13.2677
+18,2100,2.1891,8.927
+18,2100,2.5161,12.3804
+18,2200,2.1682,8.7424
+18,2200,2.6254,13.8102
+18,2300,2.3965,10.9842
+18,2300,2.4902,12.0631
+18,2400,2.2135,9.148
+18,2400,2.4329,11.3913
+18,2500,2.4143,11.1821
+18,2500,2.4294,11.352
+18,2600,2.3422,10.4039
+18,2600,2.5354,12.621
+18,2700,2.2772,9.7493
+18,2700,2.4877,12.0333
+18,2800,2.3167,10.1422
+18,2800,2.4673,11.7911
+18,2900,2.3364,10.3438
+18,2900,2.3145,10.1199
+18,3000,2.0664,7.8962
+18,3000,2.5194,12.4214
+18,3100,2.2455,9.4448
+18,3100,2.6512,14.1717
+19,100,2.3789,10.7934
+19,200,2.4119,11.1547
+19,300,2.4709,11.8328
+19,400,2.3014,9.9881
+19,500,2.5112,12.3191
+19,600,2.4034,11.061
+19,700,2.4421,11.4974
+19,800,2.4358,11.4253
+19,900,2.4511,11.6007
+19,1000,2.3752,10.7536
+19,1100,2.2818,9.7943
+19,1200,2.526,12.5039
+19,1300,2.235,9.3461
+19,1400,2.5668,13.0242
+19,1500,2.438,11.4499
+19,1600,2.1895,8.9306
+19,1700,2.3286,10.264
+19,1800,2.3863,10.8737
+19,1900,2.4867,12.0217
+19,2000,2.3649,10.643
+19,2100,2.3546,10.5342
+19,2200,2.3941,10.9579
+19,2300,2.1938,8.9695
+19,2400,2.4041,11.0679
+19,2500,2.5131,12.3433
+19,2600,2.2205,9.2123
+19,2700,2.293,9.9044
+19,2800,2.5673,13.0307
+19,2900,2.4287,11.3445
+19,3000,2.3494,10.4788
+19,3100,2.6807,14.5951
+20,100,2.2876,9.851
+20,200,2.3219,10.1953
+20,300,2.318,10.1558
+20,400,2.2775,9.7521
+20,500,2.3627,10.6199
+20,600,2.584,13.2502
+20,700,2.3991,11.013
+20,800,2.32,10.1755
+20,900,2.1218,8.3458
+20,1000,2.3538,10.5252
+20,1100,2.222,9.2257
+20,1200,2.2933,9.9077
+20,1300,2.4057,11.0867
+20,1400,2.4458,11.5401
+20,1500,2.283,9.8058
+20,1600,2.3211,10.1873
+20,1700,2.4251,11.3038
+20,1800,2.3395,10.3759
+20,1900,2.591,13.3425
+20,2000,2.2993,9.9669
+20,2100,2.1527,8.6081
+20,2200,2.3867,10.8779
+20,2300,2.3479,10.4636
+20,2400,2.234,9.3375
+20,2500,2.3098,10.0721
+20,2600,2.3198,10.1732
+20,2700,2.3959,10.9776
+20,2800,2.7605,15.808
+20,2900,2.4974,12.1514
+20,3000,2.4413,11.4875
+20,3100,2.3338,10.3173
+21,100,2.1393,8.4933
+21,100,2.4415,11.4905
+21,200,2.3351,10.3309
+21,200,2.4132,11.1694
+21,300,2.0899,8.0842
+21,300,2.5896,13.3244
+21,400,2.1491,8.5775
+21,400,2.4345,11.4096
+21,500,2.2872,9.8477
+21,500,2.4611,11.7174
+21,600,2.4272,11.3273
+21,600,2.755,15.721
+21,700,2.1416,8.513
+21,700,2.3508,10.4938
+21,800,2.3899,10.9122
+21,800,2.5791,13.1847
+21,900,2.3071,10.045
+21,900,2.2819,9.7949
+21,1000,2.1995,9.0203
+21,1000,2.4685,11.8042
+21,1100,2.3339,10.3183
+21,1100,2.5896,13.3245
+21,1200,2.4029,11.0547
+21,1200,2.2209,9.216
+21,1300,2.2694,9.674
+21,1300,2.5391,12.6681
+21,1400,2.1527,8.6082
+21,1400,2.56,12.9352
+21,1500,2.2131,9.1442
+21,1500,2.4823,11.9683
+21,1600,2.1473,8.5615
+21,1600,2.6207,13.745
+21,1700,2.249,9.4778
+21,1700,2.2406,9.3989
+21,1800,2.3484,10.4684
+21,1800,2.4947,12.1177
+21,1900,2.369,10.6865
+21,1900,2.3141,10.1161
+21,2000,2.1483,8.5702
+21,2000,2.0813,8.0148
+21,2100,2.4676,11.7938
+21,2100,2.5372,12.6438
+21,2200,2.5696,13.0607
+21,2200,2.4665,11.7807
+21,2300,2.3604,10.5955
+21,2300,2.453,11.6234
+21,2400,2.4663,11.7783
+21,2400,2.552,12.8323
+21,2500,2.4053,11.0819
+21,2500,2.2327,9.3254
+21,2600,2.3557,10.5451
+21,2600,2.3985,11.0071
+21,2700,2.2723,9.7018
+21,2700,2.4432,11.5094
+21,2800,2.2502,9.4895
+21,2800,2.5233,12.4703
+21,2900,2.4847,11.9973
+21,2900,2.409,11.1229
+21,3000,2.2226,9.2317
+21,3000,2.3775,10.7784
+21,3100,2.1203,8.334
+21,3100,2.3746,10.747
+22,100,2.0841,8.0371
+22,100,2.3862,10.8717
+22,200,2.2103,9.1183
+22,200,2.5702,13.069
+22,300,2.2659,9.6397
+22,300,2.2531,9.5168
+22,400,2.4005,11.0286
+22,400,2.4486,11.5717
+22,500,2.1922,8.9553
+22,500,2.4738,11.8676
+22,600,2.3171,10.1463
+22,600,2.2134,9.1471
+22,700,2.1564,8.6396
+22,700,2.5602,12.9386
+22,800,2.3937,10.9535
+22,800,2.2362,9.3575
+22,900,2.3248,10.2245
+22,900,2.5474,12.7736
+22,1000,2.0701,7.9253
+22,1000,2.0833,8.0311
+22,1100,2.1329,8.4391
+22,1100,2.2881,9.8567
+22,1200,2.194,8.9706
+22,1200,2.4615,11.7227
+22,1300,2.4257,11.3102
+22,1300,2.2817,9.7936
+22,1400,2.2594,9.5774
+22,1400,2.5625,12.9683
+22,1500,2.2953,9.9279
+22,1500,2.1516,8.599
+22,1600,2.3245,10.2215
+22,1600,2.4977,12.1545
+22,1700,2.3146,10.1208
+22,1700,2.2119,9.1334
+22,1800,2.3918,10.9335
+22,1800,2.2168,9.1775
+22,1900,2.4785,11.9235
+22,1900,2.4493,11.5805
+22,2000,2.2433,9.4248
+22,2000,2.514,12.3543
+22,2100,2.3151,10.1256
+22,2100,2.2537,9.5231
+22,2200,2.1862,8.9009
+22,2200,2.3459,10.4427
+22,2300,2.4228,11.2773
+22,2300,2.2346,9.3426
+22,2400,2.2062,9.0816
+22,2400,2.4777,11.9139
+22,2500,2.1243,8.3674
+22,2500,2.3603,10.5946
+22,2600,2.3792,10.7965
+22,2600,2.3461,10.4452
+22,2700,2.3259,10.2359
+22,2700,2.3966,10.9861
+22,2800,2.4015,11.0399
+22,2800,2.4276,11.3319
+22,2900,2.5032,12.2218
+22,2900,2.3832,10.8394
+22,3000,2.1186,8.3195
+22,3000,2.3673,10.6688
+22,3100,2.3263,10.2401
+22,3100,2.2368,9.3634
+23,100,2.2735,9.7134
+23,100,2.2665,9.6457
+23,200,2.185,8.8906
+23,200,2.1517,8.5996
+23,300,2.2903,9.8783
+23,300,2.2574,9.5578
+23,400,2.4636,11.7469
+23,400,2.5305,12.5602
+23,500,2.262,9.6028
+23,500,2.434,11.4045
+23,600,2.2474,9.4631
+23,600,2.1875,8.9125
+23,700,2.1001,8.1667
+23,700,2.165,8.7143
+23,800,2.1119,8.2638
+23,800,2.2312,9.3107
+23,900,2.2211,9.2171
+23,900,2.3446,10.4295
+23,1000,2.242,9.412
+23,1000,2.4275,11.3308
+23,1100,2.1189,8.3216
+23,1100,2.3552,10.5405
+23,1200,2.1247,8.37
+23,1200,2.3776,10.7791
+23,1300,2.1127,8.2704
+23,1300,2.4015,11.0394
+23,1400,2.102,8.1822
+23,1400,2.4753,11.8853
+23,1500,2.2269,9.2714
+23,1500,2.4314,11.375
+23,1600,2.1121,8.2657
+23,1600,2.2185,9.1938
+23,1700,1.9466,7.0051
+23,1700,2.212,9.1336
+23,1800,2.2108,9.1232
+23,1800,2.2967,9.9416
+23,1900,2.2285,9.2856
+23,1900,2.4373,11.4426
+23,2000,2.2417,9.4092
+23,2000,2.3913,10.9281
+23,2100,2.2783,9.7597
+23,2100,2.2783,9.7605
+23,2200,2.1256,8.3781
+23,2200,2.6455,14.0907
+23,2300,2.4059,11.0888
+23,2300,2.3268,10.2454
+23,2400,2.2476,9.4653
+23,2400,2.3324,10.3022
+23,2500,1.9663,7.1443
+23,2500,2.1346,8.4539
+23,2600,2.2005,9.0293
+23,2600,2.3986,11.008
+23,2700,2.2762,9.7399
+23,2700,2.3777,10.78
+23,2800,2.3507,10.493
+23,2800,2.4017,11.0422
+23,2900,2.1719,8.7753
+23,2900,2.6419,14.0403
+23,3000,2.1339,8.4474
+23,3000,2.4274,11.3291
+23,3100,2.3432,10.414
+23,3100,2.4689,11.8095
+24,100,2.3766,10.7685
+24,100,2.1095,8.2443
+24,200,2.2492,9.4799
+24,200,2.2281,9.2821
+24,300,2.0896,8.0816
+24,300,2.3703,10.7003
+24,400,2.123,8.356
+24,400,2.305,10.0242
+24,500,2.2929,9.9039
+24,500,2.2863,9.8383
+24,600,2.2186,9.1948
+24,600,2.252,9.507
+24,700,2.3652,10.6457
+24,700,2.2716,9.6947
+24,800,2.4291,11.349
+24,800,2.3995,11.0173
+24,900,2.0992,8.1598
+24,900,2.3343,10.3224
+24,1000,2.2295,9.2953
+24,1000,2.2976,9.9507
+24,1100,2.2423,9.4153
+24,1100,2.3547,10.5347
+24,1200,2.1982,9.0089
+24,1200,2.3108,10.0823
+24,1300,2.3898,10.9117
+24,1300,2.3038,10.0118
+24,1400,2.4188,11.2319
+24,1400,2.4687,11.8066
+24,1500,2.2394,9.3875
+24,1500,2.2251,9.2544
+24,1600,2.3871,10.8814
+24,1600,2.4617,11.7248
+24,1700,2.4062,11.0917
+24,1700,2.2415,9.407
+24,1800,2.2069,9.0873
+24,1800,2.2795,9.7719
+24,1900,2.1929,8.9615
+24,1900,2.5203,12.4326
+24,2000,2.2991,9.9652
+24,2000,2.463,11.7403
+24,2100,2.2047,9.0674
+24,2100,2.3497,10.4825
+24,2200,2.1788,8.836
+24,2200,2.4901,12.0629
+24,2300,2.3368,10.3484
+24,2300,2.1345,8.453
+24,2400,2.3685,10.6811
+24,2400,2.3948,10.9664
+24,2500,2.1582,8.6555
+24,2500,2.1817,8.8612
+24,2600,2.2999,9.9734
+24,2600,2.4069,11.0995
+24,2700,2.1185,8.3184
+24,2700,2.4753,11.8856
+24,2800,2.5695,13.0598
+24,2800,2.4636,11.7471
+24,2900,2.256,9.545
+24,2900,2.681,14.5996
+24,3000,2.3286,10.2635
+24,3000,2.2329,9.3272
+24,3100,2.2094,9.1106
+24,3100,2.1047,8.2049
+25,100,2.0074,7.4441
+25,100,2.0677,7.9063
+25,200,2.0555,7.8107
+25,200,2.311,10.0845
+25,300,2.3032,10.0063
+25,300,2.0452,7.7308
+25,400,2.1707,8.7648
+25,400,2.3708,10.7054
+25,500,2.1961,8.9898
+25,500,2.2842,9.8177
+25,600,2.0564,7.8181
+25,600,2.465,11.7635
+25,700,2.0773,7.9832
+25,700,2.2797,9.7735
+25,800,2.3222,10.1984
+25,800,2.4073,11.1045
+25,900,2.1284,8.4017
+25,900,1.9411,6.9664
+25,1000,2.1676,8.737
+25,1000,2.1742,8.7955
+25,1100,2.2209,9.2159
+25,1100,2.2986,9.9598
+25,1200,2.1686,8.7457
+25,1200,2.2249,9.2525
+25,1300,2.313,10.1044
+25,1300,2.2743,9.7211
+25,1400,2.2171,9.1806
+25,1400,2.4074,11.1048
+25,1500,2.2474,9.4627
+25,1500,2.3521,10.5077
+25,1600,2.2844,9.8197
+25,1600,2.1566,8.642
+25,1700,2.2792,9.7693
+25,1700,2.5753,13.135
+25,1800,2.2396,9.3897
+25,1800,2.5129,12.3405
+25,1900,2.0225,7.5576
+25,1900,2.1021,8.1835
+25,2000,2.236,9.3554
+25,2000,2.2199,9.2062
+25,2100,2.071,7.9326
+25,2100,2.2128,9.1416
+25,2200,2.3347,10.3267
+25,2200,2.4628,11.7379
+25,2300,2.035,7.6525
+25,2300,2.2937,9.9117
+25,2400,2.2684,9.6638
+25,2400,2.3119,10.0937
+25,2500,2.2548,9.5332
+25,2500,2.4301,11.3601
+25,2600,2.0938,8.1161
+25,2600,2.3248,10.2245
+25,2700,2.1231,8.3569
+25,2700,2.1451,8.5426
+25,2800,2.1211,8.3405
+25,2800,2.4403,11.4766
+25,2900,2.2003,9.0279
+25,2900,2.5203,12.4329
+25,3000,2.2365,9.3608
+25,3000,2.2823,9.7993
+25,3100,2.1602,8.6729
+25,3100,2.2913,9.8882
+26,100,2.1935,8.9668
+26,100,2.1781,8.8298
+26,200,1.9728,7.1905
+26,200,2.1307,8.4208
+26,300,2.2561,9.5454
+26,300,2.1756,8.8076
+26,400,2.0493,7.7628
+26,400,1.9878,7.2998
+26,500,2.2319,9.318
+26,500,2.365,10.6442
+26,600,2.1954,8.9836
+26,600,2.5252,12.494
+26,700,2.1557,8.6338
+26,700,2.4526,11.6181
+26,800,2.4767,11.9019
+26,800,2.1245,8.3691
+26,900,2.2038,9.0594
+26,900,2.0997,8.1638
+26,1000,2.1657,8.7203
+26,1000,2.3237,10.2132
+26,1100,2.0512,7.7773
+26,1100,2.1471,8.5599
+26,1200,2.2466,9.4556
+26,1200,2.4894,12.0542
+26,1300,2.1989,9.0151
+26,1300,2.2348,9.3447
+26,1400,2.258,9.5638
+26,1400,2.2782,9.7595
+26,1500,2.3685,10.6812
+26,1500,2.4081,11.1129
+26,1600,2.1787,8.8347
+26,1600,2.1137,8.2785
+26,1700,2.2104,9.1193
+26,1700,2.3987,11.0087
+26,1800,2.0694,7.9204
+26,1800,2.0616,7.8588
+26,1900,2.1163,8.3004
+26,1900,2.1317,8.4295
+26,2000,2.2775,9.7523
+26,2000,2.2971,9.9451
+26,2100,2.4668,11.785
+26,2100,2.2505,9.4921
+26,2200,2.0802,8.0062
+26,2200,2.1467,8.5564
+26,2300,2.2296,9.2964
+26,2300,1.9914,7.326
+26,2400,2.0793,7.9986
+26,2400,2.0928,8.1075
+26,2500,2.1663,8.7259
+26,2500,2.4869,12.0239
+26,2600,1.9672,7.1505
+26,2600,2.0959,8.1325
+26,2700,2.3145,10.1195
+26,2700,2.1937,8.9682
+26,2800,2.1893,8.9292
+26,2800,2.1959,8.9884
+26,2900,2.0229,7.56
+26,2900,2.0231,7.5616
+26,3000,2.4255,11.3075
+26,3000,2.2614,9.5964
+26,3100,2.257,9.5549
+26,3100,2.3379,10.3591
+27,100,2.0213,7.5483
+27,100,2.451,11.6003
+27,200,2.0965,8.1376
+27,200,2.1434,8.5288
+27,300,2.1239,8.3635
+27,300,2.2218,9.2243
+27,400,2.062,7.8614
+27,400,2.3096,10.07
+27,500,2.2273,9.2751
+27,500,1.9651,7.1359
+27,600,1.9186,6.8115
+27,600,2.1665,8.7279
+27,700,2.1434,8.5282
+27,700,1.9937,7.3428
+27,800,2.0196,7.5353
+27,800,2.3485,10.4698
+27,900,1.9703,7.1727
+27,900,2.2921,9.8954
+27,1000,1.9125,6.77
+27,1000,2.1013,8.1766
+27,1100,2.1512,8.5948
+27,1100,2.0653,7.8879
+27,1200,2.1597,8.6683
+27,1200,2.2314,9.3128
+27,1300,2.1628,8.6955
+27,1300,2.1889,8.9255
+27,1400,2.1293,8.4093
+27,1400,2.2393,9.3871
+27,1500,1.9977,7.3723
+27,1500,2.0922,8.1025
+27,1600,2.1225,8.3524
+27,1600,2.0977,8.1471
+27,1700,2.0431,7.7143
+27,1700,2.1646,8.711
+27,1800,2.1295,8.4103
+27,1800,2.0869,8.0598
+27,1900,2.1814,8.859
+27,1900,2.215,9.1616
+27,2000,2.1035,8.1949
+27,2000,2.3303,10.2812
+27,2100,2.2474,9.4627
+27,2100,2.0849,8.0437
+27,2200,2.174,8.7938
+27,2200,2.1256,8.3781
+27,2300,2.049,7.7599
+27,2300,2.415,11.1895
+27,2400,2.0989,8.1575
+27,2400,2.2232,9.2372
+27,2500,2.1561,8.6375
+27,2500,2.0479,7.7517
+27,2600,2.0021,7.4043
+27,2600,2.2103,9.1183
+27,2700,2.1921,8.9542
+27,2700,2.0779,7.9875
+27,2800,2.0942,8.1186
+27,2800,2.226,9.2629
+27,2900,2.0986,8.1548
+27,2900,1.9721,7.1857
+27,3000,2.0972,8.1434
+27,3000,2.0447,7.7268
+27,3100,2.3328,10.3071
+27,3100,2.3461,10.4452
+28,100,2.3226,10.2017
+28,200,2.0446,7.7264
+28,300,2.1842,8.8834
+28,400,2.1027,8.1881
+28,500,2.065,7.8852
+28,600,2.1477,8.5651
+28,700,2.2763,9.7404
+28,800,2.0723,7.943
+28,900,2.2289,9.2896
+28,1000,2.0397,7.6886
+28,1100,2.0839,8.0357
+28,1200,2.1007,8.1722
+28,1300,2.0338,7.6428
+28,1400,2.2426,9.4181
+28,1500,2.0902,8.0865
+28,1600,2.053,7.7911
+28,1700,2.1393,8.4933
+28,1800,2.2889,9.864
+28,1900,2.0841,8.037
+28,2000,2.1644,8.7093
+28,2100,2.3094,10.0683
+28,2200,2.1274,8.3931
+28,2300,2.1598,8.6696
+28,2400,2.0426,7.7105
+28,2500,2.0391,7.6833
+28,2600,2.1521,8.6025
+28,2700,2.0867,8.0583
+28,2800,1.9385,6.9486
+28,2900,2.166,8.7234
+28,3000,2.3245,10.2219
+28,3100,2.1882,8.9189
+29,100,1.8376,6.2816
+29,200,2.0208,7.5444
+29,300,2.4057,11.0859
+29,400,1.9454,6.9966
+29,500,2.0933,8.1118
+29,600,2.1162,8.2996
+29,700,2.0233,7.5629
+29,800,2.1161,8.2987
+29,900,2.3258,10.2354
+29,1000,2.0303,7.6167
+29,1100,2.2297,9.297
+29,1200,2.1602,8.6727
+29,1300,2.0937,8.1153
+29,1400,2.1163,8.3006
+29,1500,2.177,8.8202
+29,1600,2.0694,7.9199
+29,1700,2.2901,9.8764
+29,1800,2.0297,7.6121
+29,1900,1.9835,7.2681
+29,2000,2.1101,8.2492
+29,2100,2.0714,7.9361
+29,2200,2.3346,10.3258
+29,2300,2.125,8.3732
+29,2400,2.2709,9.6881
+29,2500,2.1652,8.7159
+29,2600,2.4139,11.1778
+29,2700,2.1402,8.5011
+29,2800,2.0135,7.4895
+29,2900,2.1379,8.4812
+29,3000,2.2424,9.4156
+29,3100,2.1572,8.6465
+30,100,2.1938,8.9692
+30,200,2.1014,8.178
+30,300,2.2413,9.4058
+30,400,2.027,7.5915
+30,500,1.9092,6.7479
+30,600,2.1688,8.748
+30,700,2.1492,8.5776
+30,800,2.1651,8.7158
+30,900,2.0124,7.4812
+30,1000,2.3341,10.3199
+30,1100,2.0736,7.9533
+30,1200,2.2919,9.8934
+30,1300,1.8976,6.67
+30,1400,2.1037,8.1963
+30,1500,1.9812,7.2515
+30,1600,1.8422,6.3105
+30,1700,2.1148,8.2879
+30,1800,2.0392,7.6841
+30,1900,2.1974,9.0015
+30,2000,1.99,7.3157
+30,2100,2.1626,8.6936
+30,2200,2.0304,7.6174
+30,2300,2.1228,8.3543
+30,2400,2.1954,8.9837
+30,2500,2.1629,8.6964
+30,2600,2.4293,11.3511
+30,2700,1.8879,6.6057
+30,2800,2.2829,9.8055
+30,2900,2.2601,9.5845
+30,3000,1.9881,7.302
+30,3100,2.1743,8.796

text_files/validation_metrics.csv ADDED Viewed

	@@ -0,0 +1,30 @@

+Epoch,Validation_Loss,Validation_Perplexity
+1,3.1313,22.9038
+2,2.8683,17.6076
+3,2.7487,15.6226
+4,2.6835,14.6358
+5,2.6448,14.08
+6,2.6184,13.7138
+7,2.6021,13.4918
+8,2.5887,13.3131
+10,2.5347,12.6126
+11,2.5207,12.4378
+12,2.5151,12.3681
+13,2.5155,12.3724
+14,2.5215,12.4476
+15,2.5179,12.4026
+16,2.5298,12.5513
+17,2.4818,11.9623
+18,2.4807,11.95
+19,2.4842,11.9914
+20,2.4874,12.0298
+21,2.4595,11.6989
+22,2.4655,11.7698
+23,2.4694,11.8158
+24,2.4789,11.9279
+25,2.4919,12.0847
+26,2.4963,12.1371
+27,2.5049,12.2423
+28,2.5089,12.2919
+29,2.521,12.4409
+30,2.5266,12.5108

text_files/validation_script.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import re
+import pandas as pd
+import os
+# Define a list of your log file names
+log_files = [
+    'training (2).txt',
+    'training_log_1_18.txt',
+    'training_log_17_27.txt',
+    'training_log_21_30.txt'
+]
+# Create an empty list to store parsed validation data
+validation_data = []
+# Regex to capture the Epoch number from training progress lines
+epoch_pattern = re.compile(r"Epoch\s\[(\d+)/\d+],")
+# Regex to capture Validation Avg Loss and Perplexity
+validation_pattern = re.compile(
+    r"Validation Avg Loss:\s([\d.]+),\sPerplexity:\s([\d.]+)"
+)
+current_epoch = None # Variable to keep track of the current epoch
+print("Starting validation metrics parsing...")
+# Loop through each log file
+for file_name in log_files:
+    if not os.path.exists(file_name):
+        print(f"Warning: File not found - {file_name}. Skipping.")
+        continue
+    print(f"Processing {file_name} for validation metrics...")
+    with open(file_name, 'r', encoding='utf-8') as f: # Use UTF-8 encoding
+        for line in f:
+            # Check for epoch line first to update current_epoch
+            epoch_match = epoch_pattern.search(line)
+            if epoch_match:
+                current_epoch = int(epoch_match.group(1))
+            # Check for validation metrics line
+            validation_match = validation_pattern.search(line)
+            if validation_match:
+                val_loss = float(validation_match.group(1))
+                val_perplexity = float(validation_match.group(2))
+                # Only add if we have an associated epoch
+                if current_epoch is not None:
+                    validation_data.append({
+                        'Epoch': current_epoch,
+                        'Validation_Loss': val_loss,
+                        'Validation_Perplexity': val_perplexity
+                    })
+                else:
+                    print(f"Warning: Found validation metrics without a preceding epoch in {file_name}. Skipping this entry.")
+# Create a Pandas DataFrame from the parsed validation data
+df_validation = pd.DataFrame(validation_data)
+# In case multiple validation metrics are logged per epoch (e.g., if re-running part of a log),
+# we'll keep the last entry for that epoch.
+df_validation_unique = df_validation.drop_duplicates(subset=['Epoch'], keep='last')
+# Sort the data by Epoch
+df_validation_sorted = df_validation_unique.sort_values(by=['Epoch']).reset_index(drop=True)
+# Save the DataFrame to a CSV file
+output_csv_file = 'validation_metrics.csv'
+df_validation_sorted.to_csv(output_csv_file, index=False)
+print(f"\nSuccessfully parsed validation metrics and saved data to {output_csv_file}")
+print("You can now import this CSV file into Power BI to create your visualizations.")
+print("\nFirst few rows of the generated CSV:")
+print(df_validation_sorted.head())

web_app.py ADDED Viewed

	@@ -0,0 +1,650 @@

+import os
+import base64
+import json
+import numpy as np
+import cv2 # For image manipulation and plotting segmentation results
+import pickle # To load vocabulary (for captioning internal loading)
+import logging
+from io import BytesIO
+import sys
+# import jsonify # This was the missing import causing issues!
+import face_recognition # For facial recognition tasks
+import torch
+from werkzeug.utils import secure_filename
+from flask import Flask, render_template, request, redirect, url_for, session, flash, g, jsonify # Corrected import: added jsonify
+from flask_sqlalchemy import SQLAlchemy
+from werkzeug.security import generate_password_hash, check_password_hash
+from functools import wraps
+from PIL import Image # Used for both image processing and face_recognition
+# Add the 'src' directory to Python's path so we can import from it.
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'src')))
+# IMPORTANT: Explicitly import the CLASSES directly into the __main__ scope.
+# This ensures their definitions are available to torch.load when it attempts
+# to deserialize pickled objects (like COCOVocabulary or ImageCaptioningModel
+# instances) that might have been saved with a __main__ module reference.
+from src.data_preprocessing import COCOVocabulary
+from src.model import ImageCaptioningModel
+# Now import the necessary functions and modules from your project
+from src.inference_api import generate_caption_for_image # Your existing captioning function
+from src.utils import get_logger # Your existing logger utility
+# Import YOLO for segmentation - adapted from your file.py
+try:
+    from ultralytics import YOLO
+    # Logger initialization moved here to ensure it's after all necessary imports
+    logger = get_logger(__name__)
+except ImportError:
+    logger = get_logger(__name__)
+    logger.error("ultralytics library not found. Please install it: pip install ultralytics")
+    YOLO = None # Set to None if import fails
+# --- Flask App Setup ---
+app = Flask(__name__)
+# --- Configuration ---
+# Strong secret key for session management (IMPORTANT: Change this in production!)
+app.config['SECRET_KEY'] = os.urandom(24)
+# SQLite database for users
+app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///users.db'
+app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
+# Define the folder to store uploaded images temporarily within the static directory
+UPLOAD_FOLDER = os.path.join('static', 'uploads')
+ALLOWED_EXTENSIONS = {'png', 'jpg', 'jpeg', 'gif'}
+app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
+# Ensure the upload folder exists
+os.makedirs(os.path.join(app.root_path, UPLOAD_FOLDER), exist_ok=True)
+logger.info(f"Upload folder '{UPLOAD_FOLDER}' ensured at {os.path.join(app.root_path, UPLOAD_FOLDER)}")
+# --- Database Initialization ---
+db = SQLAlchemy(app)
+# Suppress some logging for cleaner output, but keep our custom prints
+logging.getLogger("werkzeug").setLevel(logging.ERROR)
+# --- Database Model (from auth_app.py) ---
+class User(db.Model):
+    id = db.Column(db.Integer, primary_key=True)
+    email = db.Column(db.String(120), unique=True, nullable=False)
+    password_hash = db.Column(db.String(256), nullable=False)
+    # Store face encodings as JSON string of a list of floats (numpy arrays are not directly JSON serializable)
+    face_encodings_json = db.Column(db.Text, nullable=True)
+    def __repr__(self):
+        return f'<User {self.email}>'
+# Create database tables if they don't exist
+with app.app_context():
+    db.create_all()
+    print("Database tables created/checked.") # Kept print from your auth_app.py
+# --- Global Segmentation Model Loading ---
+# Captioning model is assumed to be loaded/handled by generate_caption_for_image internally.
+segmentation_model_yolo = None
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logger.info(f"Using device for models: {device}")
+try:
+    if YOLO: # Only try to load if ultralytics import was successful
+        segmentation_model_yolo = YOLO('yolov8x-seg.pt') # YOLOv8x-seg is a large segmentation model
+        segmentation_model_yolo.to(device) # Move model to appropriate device
+        logger.info("Segmentation Model (YOLOv8x-seg) loaded successfully.")
+    else:
+        logger.warning("YOLO library not available, skipping segmentation model loading.")
+except Exception as e:
+    logger.critical(f"Error loading Segmentation Model (YOLOv8x-seg): {e}", exc_info=True)
+    segmentation_model_yolo = None
+# --- Helper Functions for Facial Recognition (Copied directly from your working auth_app.py) ---
+def get_face_encoding_from_image(image_data_b64):
+    """
+    Decodes base64 image data, finds faces, and returns the first face's encoding.
+    Returns None if no face is found or on error.
+    """
+    try:
+        print(f"Processing image data of length: {len(image_data_b64)}") # From your auth_app.py
+        # Handle both formats: with and without data URL prefix
+        if ',' in image_data_b64:
+            # Remove data URL prefix (e.g., "data:image/jpeg;base64,")
+            image_data_clean = image_data_b64.split(',')[1]
+        else:
+            image_data_clean = image_data_b64
+        # Add padding if needed (base64 strings must be multiples of 4)
+        missing_padding = len(image_data_clean) % 4
+        if missing_padding:
+            image_data_clean += '=' * (4 - missing_padding)
+        try:
+            image_bytes = base64.b64decode(image_data_clean)
+        except Exception as decode_error:
+            print(f"Base64 decode error: {decode_error}") # From your auth_app.py
+            return None
+        print(f"Decoded image bytes length: {len(image_bytes)}") # From your auth_app.py
+        # Open and convert image
+        try:
+            img = Image.open(BytesIO(image_bytes))
+            print(f"Image opened successfully. Format: {img.format}, Size: {img.size}, Mode: {img.mode}") # From your auth_app.py
+            # Convert to RGB if needed
+            if img.mode != 'RGB':
+                img = img.convert('RGB')
+                print(f"Converted image to RGB mode") # From your auth_app.py
+        except Exception as img_error:
+            print(f"Image opening/conversion error: {img_error}") # From your auth_app.py
+            return None
+        # Convert PIL Image to numpy array (face_recognition expects numpy array)
+        img_np = np.array(img)
+        print(f"Numpy array shape: {img_np.shape}") # From your auth_app.py
+        # Find face encodings
+        try:
+            face_locations = face_recognition.face_locations(img_np)
+            print(f"Found {len(face_locations)} face location(s)") # From your auth_app.py
+            if len(face_locations) == 0:
+                print("No faces detected in the image") # From your auth_app.py
+                return None
+            face_encodings = face_recognition.face_encodings(img_np, face_locations)
+            print(f"Generated {len(face_encodings)} face encoding(s)") # From your auth_app.py
+            if len(face_encodings) > 0:
+                encoding = face_encodings[0]
+                print(f"Face encoding shape: {encoding.shape}") # From your auth_app.py
+                return encoding.tolist() # Convert numpy array to list for JSON serialization
+            else:
+                print("No face encodings generated despite face locations found") # From your auth_app.py
+                return None
+        except Exception as face_error:
+            print(f"Face recognition processing error: {face_error}") # From your auth_app.py
+            return None
+    except Exception as e:
+        print(f"General error processing image for face encoding: {e}") # From your auth_app.py
+        return None
+def compare_face_encoding_to_stored(live_encoding, stored_encodings_json):
+    """
+    Compares a live face encoding to a list of stored encodings for a user.
+    Returns True if a match is found, False otherwise.
+    """
+    if not live_encoding:
+        print("Live encoding is None, cannot compare.") # From your auth_app.py
+        return False
+    if not stored_encodings_json:
+        print("Stored encodings JSON is None, cannot compare.") # From your auth_app.py
+        return False
+    try:
+        # Convert JSON string back to a list of numpy arrays
+        stored_encodings_list = json.loads(stored_encodings_json)
+        if not stored_encodings_list:
+            print("No stored encodings found in JSON") # From your auth_app.py
+            return False
+        stored_encodings = [np.array(e) for e in stored_encodings_list]
+        print(f"Comparing against {len(stored_encodings)} stored encodings") # From your auth_app.py
+        # Compare the live encoding against all stored encodings for this user
+        # tolerance: lower value means stricter match (0.6 is common default)
+        matches = face_recognition.compare_faces(stored_encodings, np.array(live_encoding), tolerance=0.6)
+        match_found = True in matches
+        print(f"Face comparison result: {match_found}. Matches: {matches}") # From your auth_app.py
+        return match_found
+    except Exception as e:
+        print(f"Error comparing face encodings: {e}") # From your auth_app.py
+        return False
+# --- Segmentation Helper Functions (from web_app.py, adapted from file.py) ---
+def calculate_segmentation_metrics(results, segmentation_model_ref):
+    """
+    Calculates basic segmentation metrics (detected objects).
+    Adapted to remove Streamlit dependencies and reliance on mock GT.
+    """
+    metrics = {
+        'detected_objects': [],
+        'num_objects': 0,
+        'status': 'Processed',
+        'error': None
+    }
+    if not segmentation_model_ref:
+        metrics['error'] = "Segmentation model not loaded."
+        metrics['status'] = "Error: Segmentation model unavailable."
+        return metrics
+    if not results or results[0].masks is None or len(results[0].masks) == 0:
+        metrics['status'] = "No objects detected."
+        return metrics
+    try:
+        detected_objects_info = []
+        for r_box in results[0].boxes.data.tolist():
+            class_id = int(r_box[5])
+            confidence = round(r_box[4], 2)
+            # Ensure class_id exists in model.names
+            class_name = segmentation_model_ref.names.get(class_id, f"Class {class_id}")
+            detected_objects_info.append(f"{class_name} (Conf: {confidence})")
+        metrics['detected_objects'] = detected_objects_info
+        metrics['num_objects'] = len(detected_objects_info)
+    except Exception as e:
+        metrics['error'] = f"Metric calculation failed: {str(e)}"
+        metrics['status'] = "Error during metric calculation."
+        logger.error(f"Metric calculation failed: {e}", exc_info=True)
+    return metrics
+def perform_segmentation(image_path, model_ref, upload_folder, filename_stem):
+    """
+    Performs segmentation on an image and returns the URL of the segmented image
+    and a dictionary of metrics.
+    """
+    segmented_image_url = None
+    metrics = {}
+    if not model_ref:
+        metrics = {'error': "Segmentation model not loaded."}
+        return segmented_image_url, metrics
+    try:
+        img_pil = Image.open(image_path).convert('RGB')
+        img_np = np.array(img_pil) # Convert to NumPy array
+        img_cv2 = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR) # YOLO expects BGR
+        # Perform inference
+        results = model_ref(img_cv2, verbose=False) # verbose=False suppresses console output
+        if results and results[0].masks is not None and len(results[0].masks) > 0:
+            # Plot results directly onto the image
+            annotated_image = results[0].plot() # This returns a numpy array (BGR)
+            # Convert BGR (OpenCV default) to RGB for PIL and saving
+            annotated_image_rgb = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
+            img_segmented_pil = Image.fromarray(annotated_image_rgb)
+            # Save the segmented image
+            segmented_filename = f"segmented_{filename_stem}.png" # Ensure .png extension for segmented output
+            segmented_filepath = os.path.join(upload_folder, segmented_filename)
+            img_segmented_pil.save(segmented_filepath)
+            segmented_image_url = url_for('static', filename=f'uploads/{segmented_filename}')
+            logger.info(f"Segmented image saved to: {segmented_filepath}")
+            # Calculate and return metrics
+            metrics = calculate_segmentation_metrics(results, model_ref)
+            metrics['status'] = "Segmentation successful."
+        else:
+            metrics['status'] = "No objects detected for segmentation."
+            logger.info(f"No objects detected for segmentation in {image_path}.")
+    except Exception as e:
+        metrics['error'] = str(e)
+        metrics['status'] = "Error during segmentation processing."
+        logger.critical(f"Error in perform_segmentation for {image_path}: {e}", exc_info=True)
+    return segmented_image_url, metrics
+# --- Helper for file extension check ---
+def allowed_file(filename):
+    return '.' in filename and \
+           filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+# --- Before/After Request Hooks ---
+@app.before_request
+def load_logged_in_user():
+    user_id = session.get('user_id')
+    if user_id is None:
+        g.user = None
+    else:
+        g.user = User.query.get(user_id)
+# --- Authentication Decorator ---
+def login_required(view):
+    @wraps(view)
+    def wrapped_view(**kwargs):
+        if g.user is None:
+            flash("Please log in to access this page.", "info")
+            return redirect(url_for('auth_page'))
+        return view(**kwargs)
+    return wrapped_view
+# --- Routes (Combined from both previous apps, authentication parts use print for logs) ---
+# Authentication Page Route (Root)
+@app.route('/')
+def auth_page():
+    """Serves the authentication HTML page. Redirects to main_app if logged in."""
+    if 'user_id' in session: # Use session directly as in your original auth_app.py
+        user = User.query.get(session['user_id'])
+        if user:
+            # If already logged in, redirect to main_app
+            print(f"User {user.email} already logged in, redirecting to main_app.")
+            return redirect(url_for('main_app'))
+        else:
+            session.pop('user_id', None) # Clear invalid session
+            print("Invalid user_id in session, redirecting to auth_page.")
+            return redirect(url_for('auth_page'))
+    print("Serving auth.html for login/registration.")
+    return render_template('auth.html')
+@app.route('/register', methods=['POST'])
+def register():
+    """Handles traditional email/password registration."""
+    email = request.form['email'].strip() # Use .strip() to remove whitespace
+    password = request.form['password']
+    print(f"Received traditional registration request for: {email}") # Kept print from your auth_app.py
+    if not email or not password:
+        print("Error: Email or password missing for traditional registration.") # From auth_app.py
+        return jsonify({'success': False, 'message': 'Email and password are required.'}), 400
+    import re # Make sure re is imported
+    email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+    if not re.match(email_pattern, email):
+        print(f"Error: Invalid email format: {email}")
+        return jsonify({'success': False, 'message': 'Please enter a valid email address.'}), 400
+    if len(password) < 6:
+        print("Error: Password too short.") # From auth_app.py
+        return jsonify({'success': False, 'message': 'Password must be at least 6 characters long.'}), 400
+    existing_user = User.query.filter_by(email=email).first()
+    if existing_user:
+        print(f"Error: Email {email} already registered.") # From auth_app.py
+        return jsonify({'success': False, 'message': 'Email already registered.'}), 409
+    hashed_password = generate_password_hash(password)
+    new_user = User(email=email, password_hash=hashed_password)
+    try:
+        db.session.add(new_user)
+        db.session.commit()
+        print(f"Traditional registration successful for: {email}") # From auth_app.py
+        return jsonify({'success': True, 'message': 'Registration successful. You can now log in.'})
+    except Exception as e:
+        db.session.rollback()
+        print(f"Database error during traditional registration: {e}") # From auth_app.py
+        return jsonify({'success': False, 'message': 'Database error during registration.'}), 500
+@app.route('/login', methods=['POST'])
+def login():
+    """Handles traditional email/password login."""
+    email = request.form['email'].strip() # Use .strip()
+    password = request.form['password']
+    print(f"Received traditional login request for: {email}") # From auth_app.py
+    user = User.query.filter_by(email=email).first()
+    if user and check_password_hash(user.password_hash, password):
+        session['user_id'] = user.id
+        print(f"Traditional login successful for: {email}") # From auth_app.py
+        return jsonify({'success': True, 'message': 'Login successful.'})
+    else:
+        print(f"Traditional login failed for: {email}") # From auth_app.py
+        return jsonify({'success': False, 'message': 'Invalid email or password.'}), 401
+@app.route('/face_register', methods=['POST'])
+def face_register():
+    """
+    Receives face images for registration.
+    Expects email, password, and a list of base64 image data from the frontend.
+    """
+    try:
+        if not request.is_json:
+            print("Error: Request is not JSON for face_register") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Invalid request format. JSON expected.'}), 400
+        data = request.get_json()
+        if not data:
+            print("Error: No JSON data received for face_register") # From auth_app.py
+            return jsonify({'success': False, 'message': 'No data received.'}), 400
+        email = data.get('email', '').strip() # Use .strip()
+        password = data.get('password', '')
+        image_data_list = data.get('images', [])
+        print(f"Received face registration request for: {email} with {len(image_data_list)} images.") # From auth_app.py
+        if not email or not password or not image_data_list:
+            print("Error: Missing email, password or image data for face registration.") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Email, password, and face images are required.'}), 400
+        import re # Make sure re is imported
+        email_pattern = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$'
+        if not re.match(email_pattern, email):
+            print(f"Error: Invalid email format: {email}")
+            return jsonify({'success': False, 'message': 'Please enter a valid email address.'}), 400
+        if len(password) < 6:
+            print("Error: Password too short for face registration.") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Password must be at least 6 characters long.'}), 400
+        existing_user = User.query.filter_by(email=email).first()
+        if existing_user:
+            print(f"Error: Email {email} already registered for face registration.") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Email already registered.'}), 409
+        all_encodings = []
+        for i, img_b64 in enumerate(image_data_list):
+            print(f"Processing image {i+1}/{len(image_data_list)} for face encoding...") # From auth_app.py
+            if not img_b64:
+                print(f"Warning: Image {i+1} is empty, skipping.") # From auth_app.py
+                continue
+            encoding = get_face_encoding_from_image(img_b64)
+            if encoding:
+                all_encodings.append(encoding)
+                print(f"Successfully processed image {i+1}") # From auth_app.py
+            else:
+                print(f"Failed to process image {i+1} - no face detected or processing error.") # From auth_app.py
+        if not all_encodings:
+            print("Error: No detectable faces found in any of the provided images for face registration.") # From auth_app.py
+            return jsonify({'success': False, 'message': 'No detectable faces in the provided images. Please try again with clearer images showing your face clearly.'}), 400
+        hashed_password = generate_password_hash(password)
+        face_encodings_json = json.dumps(all_encodings)
+        new_user = User(email=email, password_hash=hashed_password, face_encodings_json=face_encodings_json)
+        try:
+            db.session.add(new_user)
+            db.session.commit()
+            print(f"Face registration successful for: {email}. Stored {len(all_encodings)} encodings.") # From auth_app.py
+            return jsonify({'success': True, 'message': f'Face registration successful with {len(all_encodings)} face samples. You can now log in with your face.'})
+        except Exception as db_error:
+            db.session.rollback()
+            print(f"Database error during face registration: {db_error}") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Database error during registration. Please try again.'}), 500
+    except Exception as e:
+        print(f"Unexpected error during face registration: {e}") # From auth_app.py
+        import traceback
+        traceback.print_exc()
+        return jsonify({'success': False, 'message': 'An unexpected error occurred. Please try again.'}), 500
+@app.route('/face_login', methods=['POST'])
+def face_login():
+    """
+    Receives a single live face image for login.
+    Compares it against all registered users' face encodings.
+    """
+    try:
+        if not request.is_json:
+            print("Error: Request is not JSON for face_login") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Invalid request format. JSON expected.'}), 400
+        data = request.get_json()
+        if not data:
+            print("Error: No JSON data received for face_login") # From auth_app.py
+            return jsonify({'success': False, 'message': 'No data received.'}), 400
+        image_data = data.get('image')
+        print("Received face login request.") # From auth_app.py
+        if not image_data:
+            print("Error: Face image required for login.") # From auth_app.py
+            return jsonify({'success': False, 'message': 'Face image required for login.'}), 400
+        live_encoding = get_face_encoding_from_image(image_data)
+        if not live_encoding:
+            print("No face detected in the live image for login.") # From auth_app.py
+            return jsonify({'success': False, 'message': 'No face detected. Please position your face clearly in the camera and ensure good lighting.'}), 400
+        users = User.query.filter(User.face_encodings_json.isnot(None)).all()
+        print(f"Attempting to match against {len(users)} registered users with face data...") # From auth_app.py
+        for user in users:
+            if user.face_encodings_json:
+                print(f"Comparing live encoding with stored encodings for user: {user.email}") # From auth_app.py
+                if compare_face_encoding_to_stored(live_encoding, user.face_encodings_json):
+                    session['user_id'] = user.id
+                    print(f"Face login successful for user: {user.email}") # From auth_app.py
+                    return jsonify({'success': True, 'message': f'Welcome back, {user.email}!'})
+        print("Face not recognized against any registered user.") # From auth_app.py
+        return jsonify({'success': False, 'message': 'Face not recognized. Please try again or use email/password login.'}), 401
+    except Exception as e:
+        print(f"Unexpected error during face login: {e}") # From auth_app.py
+        import traceback
+        traceback.print_exc()
+        return jsonify({'success': False, 'message': 'An error occurred during face login. Please try again.'}), 500
+@app.route('/logout')
+def logout():
+    """Logs out the current user."""
+    print(f"User {session.get('user_id')} logging out.") # From auth_app.py
+    session.pop('user_id', None)
+    flash("You have been logged out.", "info") # Added for consistency with other parts
+    return redirect(url_for('auth_page'))
+# Main application route (protected)
+@app.route('/main_app')
+@login_required
+def main_app():
+    """
+    Renders the main image processing page only if the user is authenticated.
+    """
+    return render_template('index.html',
+                           caption=None,
+                           uploaded_image_url=None,
+                           segmentation_image_url=None,
+                           segmentation_metrics={})
+# Predict route (protected)
+@app.route('/predict', methods=['POST'])
+@login_required
+def predict():
+    """
+    Handles image upload, performs captioning and segmentation,
+    and renders the results.
+    """
+    logger.info("Received request to /predict.")
+    # Initialize variables for the template
+    generated_caption = "N/A"
+    uploaded_image_url = None
+    segmentation_image_url = None
+    segmentation_metrics = {}
+    if 'file' not in request.files:
+        flash('No file part in the request.', 'error')
+        logger.warning("No file part.")
+        return redirect(request.url)
+    file = request.files['file']
+    if file.filename == '':
+        flash('No selected file.', 'error')
+        logger.warning("Empty filename.")
+        return redirect(request.url)
+    if file and allowed_file(file.filename):
+        filename = secure_filename(file.filename)
+        # Extract filename stem (without extension) for segmented image naming
+        filename_stem, file_ext = os.path.splitext(filename)
+        # Construct the full path to save the original image
+        original_filepath = os.path.join(app.root_path, app.config['UPLOAD_FOLDER'], filename)
+        file.save(original_filepath)
+        uploaded_image_url = url_for('static', filename=f'uploads/{filename}')
+        logger.info(f"Original image saved to: {original_filepath}")
+        # --- Perform Image Captioning (using your provided correct method) ---
+        try:
+            logger.info(f"Starting caption generation for {original_filepath}...")
+            # This is your original working call for captioning
+            generated_caption = generate_caption_for_image(original_filepath)
+            logger.info(f"Caption generated: '{generated_caption}'")
+            flash("Image caption generated successfully!", 'success')
+        except FileNotFoundError:
+            flash(f"Error: Captioning model or vocabulary file not found for {original_filepath}. Check server logs.", 'error')
+            logger.error(f"Captioning model/vocab not found during inference for {original_filepath}.")
+            generated_caption = "Error: Captioning model/vocab not found."
+        except RuntimeError as re:
+            flash(f"Error: Captioning model not initialized or device issue. Check server logs.", 'error')
+            logger.critical(f"Captioning model not initialized: {re}", exc_info=True)
+            generated_caption = "Error: Captioning service unavailable."
+        except Exception as e:
+            flash(f"An unexpected error occurred during caption generation. Check server logs.", 'error')
+            logger.critical(f"Error generating caption for {original_filepath}: {e}", exc_info=True)
+            generated_caption = "Error: Could not generate caption."
+        # --- Perform Image Segmentation ---
+        if segmentation_model_yolo:
+            logger.info(f"Starting segmentation for {original_filepath} using YOLO...")
+            segmentation_image_url, segmentation_metrics = perform_segmentation(
+                image_path=original_filepath,
+                model_ref=segmentation_model_yolo,
+                upload_folder=os.path.join(app.root_path, app.config['UPLOAD_FOLDER']),
+                filename_stem=filename_stem
+            )
+            if 'error' in segmentation_metrics and segmentation_metrics['error']:
+                 flash(f"Segmentation Error: {segmentation_metrics['error']}", 'error')
+            elif segmentation_image_url:
+                flash("Image segmentation performed successfully!", 'success')
+            else:
+                flash(f"Segmentation: {segmentation_metrics.get('status', 'No specific status.')}", 'info')
+        else:
+            flash("Segmentation model not initialized. Ensure 'ultralytics' is installed and model loaded.", 'error')
+            logger.error("Segmentation model (YOLO) is not available.")
+            segmentation_metrics['error'] = "Segmentation service unavailable."
+        # Render the template with results for both tasks
+        return render_template('index.html',
+                               caption=generated_caption,
+                               uploaded_image_url=uploaded_image_url,
+                               segmentation_image_url=segmentation_image_url,
+                               segmentation_metrics=segmentation_metrics)
+    else:
+        flash('Allowed image types are png, jpg, jpeg, gif.', 'error')
+        logger.warning(f"Disallowed file type uploaded: {file.filename}")
+        return redirect(request.url)
+if __name__ == '__main__':
+    logger.info("Starting Flask web application with integrated auth...")
+    # This app will run on port 5000, handling both auth and image processing.
+    # app.run(debug=True, host='0.0.0.0', port=5000)