Upload 4 files

Browse files

Files changed (4) hide show

6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_delivery_model.py +751 -0
6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_price_model.py +751 -0
6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_product_model.py +741 -0
6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_service_model.py +748 -0

6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_delivery_model.py ADDED Viewed

	@@ -0,0 +1,751 @@

+import pandas as pd
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from transformers import AutoTokenizer, GemmaModel
+from peft import LoraConfig, get_peft_model, TaskType
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, hamming_loss, accuracy_score, precision_score, recall_score, f1_score
+import numpy as np
+import random
+import matplotlib.pyplot as plt
+import os
+# For UTF-8 characters in output
+import sys
+sys.stdout.reconfigure(encoding='utf-8')
+# Set random seeds for reproducibility
+seed_value = 42
+random.seed(seed_value)
+np.random.seed(seed_value)
+torch.manual_seed(seed_value)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed_value)
+# Parameters
+MODEL_ID = 'google/gemma-3-1b-pt'
+BATCH_SIZE = 8
+EPOCHS = 10
+LR = 5e-5
+# Load data - delivery-specific
+print("Loading training data from delivery_train_dataset.csv...")
+train_df = pd.read_csv('datasets/gemini/delivery_train_dataset.csv')
+print("Loading test data from Test_delivery_dataset.csv...")
+test_df = pd.read_csv('datasets/test_delivery_dataset.csv')
+# Define label columns (Delivery sub-aspects)
+label_cols = [
+    'Condition_DEL',
+    'Correctness_DEL',
+    'Timeliness_DEL',
+    'General_DEL'
+]
+# Prepare training data with 80/20 train/validation split
+train_X_full = train_df['Review'].astype(str).tolist()
+train_Y_full = train_df[label_cols].values.astype(np.float32)
+train_X, val_X, train_Y, val_Y = train_test_split(
+    train_X_full, train_Y_full,
+    test_size=0.2,
+    random_state=42
+)
+# Prepare test data
+test_X = test_df['Review'].astype(str).tolist()
+test_Y = test_df[label_cols].values.astype(np.float32)
+print(f"\nDataset sizes:")
+print(f"Training samples: {len(train_X)}")
+print(f"Validation samples: {len(val_X)}")
+print(f"Test samples: {len(test_X)}")
+print(f"Number of labels: {len(label_cols)}")
+# Compute class weights for imbalanced dataset
+def compute_class_weights(labels, label_names):
+    """
+    Compute class weights for multi-label classification
+    using the inverse of class frequency.
+    Args:
+        labels: numpy array of shape (n_samples, n_labels)
+        label_names: list of label column names
+    Returns:
+        pos_weight: torch tensor of positive class weights
+    """
+    n_samples = labels.shape[0]
+    n_labels = labels.shape[1]
+    pos_weights = []
+    print("\n" + "="*60)
+    print("CLASS IMBALANCE ANALYSIS")
+    print("="*60)
+    for i, label_name in enumerate(label_names):
+        pos_count = np.sum(labels[:, i] == 1)
+        neg_count = np.sum(labels[:, i] == 0)
+        # Calculate positive class weight (ratio of negative to positive)
+        if pos_count > 0:
+            raw_ratio = neg_count / pos_count
+            # Apply square root dampening to avoid extreme weights
+            pos_weight = np.sqrt(raw_ratio)
+        else:
+            pos_weight = 1.0
+        pos_weights.append(pos_weight)
+        print(f"\n{label_name}:")
+        print(f"  Positive samples: {pos_count} ({pos_count/n_samples*100:.2f}%)")
+        print(f"  Negative samples: {neg_count} ({neg_count/n_samples*100:.2f}%)")
+        print(f"  Raw imbalance ratio (neg/pos): {neg_count/pos_count if pos_count > 0 else 1.0:.4f}")
+        print(f"  Dampened weight (sqrt of ratio): {pos_weight:.4f}")
+    print("="*60 + "\n")
+    return torch.FloatTensor(pos_weights)
+def find_optimal_thresholds(model, dataloader, label_cols, device):
+    """
+    Find optimal decision threshold for each class independently
+    by maximizing F1-score on the validation set.
+    Args:
+        model: trained model
+        dataloader: validation data loader
+        label_cols: list of label column names
+        device: torch device
+    Returns:
+        optimal_thresholds: numpy array of optimal thresholds for each class
+    """
+    from sklearn.metrics import f1_score
+    print("\n" + "="*60)
+    print("OPTIMIZING DECISION THRESHOLDS")
+    print("="*60)
+    # Collect all predictions and labels
+    model.eval()
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            all_probs.append(probs)
+            all_labels.append(labels.cpu().numpy())
+    all_probs = np.vstack(all_probs)
+    all_labels = np.vstack(all_labels)
+    # Find optimal threshold for each class
+    optimal_thresholds = []
+    threshold_range = np.arange(0.1, 0.91, 0.05)  # 0.1 to 0.9 in steps of 0.05
+    for i, label_name in enumerate(label_cols):
+        best_threshold = 0.5
+        best_f1 = 0.0
+        for threshold in threshold_range:
+            preds = (all_probs[:, i] > threshold).astype(int)
+            f1 = f1_score(all_labels[:, i], preds, zero_division=0)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_threshold = threshold
+        optimal_thresholds.append(best_threshold)
+        print(f"\n{label_name}:")
+        print(f"  Optimal threshold: {best_threshold:.2f}")
+        print(f"  Best F1-score: {best_f1:.4f}")
+        print(f"  (Default 0.5 threshold F1: {f1_score(all_labels[:, i], (all_probs[:, i] > 0.5).astype(int), zero_division=0):.4f})")
+    print("="*60 + "\n")
+    return np.array(optimal_thresholds)
+def predict_with_thresholds(model, dataloader, thresholds, device):
+    """
+    Make predictions using custom thresholds for each class.
+    Args:
+        model: trained model
+        dataloader: data loader
+        thresholds: numpy array of thresholds for each class
+        device: torch device
+    Returns:
+        predictions: numpy array of predictions
+        labels: numpy array of true labels
+    """
+    model.eval()
+    all_preds = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            # Apply custom thresholds for each class
+            preds = np.zeros_like(probs, dtype=int)
+            for i in range(len(thresholds)):
+                preds[:, i] = (probs[:, i] > thresholds[i]).astype(int)
+            all_preds.append(preds)
+            all_labels.append(labels.cpu().numpy())
+    return np.vstack(all_preds), np.vstack(all_labels)
+# Dataset class
+class ReviewDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.texts = texts
+        self.labels = labels
+    def __len__(self):
+        return len(self.texts)
+    def __getitem__(self, idx):
+        encoding = tokenizer(
+            self.texts[idx],
+            padding='max_length',
+            truncation=True,
+            max_length=256,
+            return_tensors='pt'
+        )
+        input_ids = encoding['input_ids'].squeeze()
+        attention_mask = encoding['attention_mask'].squeeze()
+        label = torch.FloatTensor(self.labels[idx])
+        return input_ids, attention_mask, label
+# Initialize tokenizer
+print("\nInitializing tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=True)
+# Create datasets
+train_dataset = ReviewDataset(train_X, train_Y)
+val_dataset = ReviewDataset(val_X, val_Y)
+test_dataset = ReviewDataset(test_X, test_Y)
+# Create data loaders
+train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
+val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
+test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
+# Compute class weights based on training data
+print("Computing class weights for imbalanced dataset...")
+pos_weights = compute_class_weights(train_Y, label_cols)
+# Initialize model with LoRA
+print("Initializing model with LoRA...")
+backbone = GemmaModel.from_pretrained(MODEL_ID, token=True, dtype=torch.bfloat16)
+lora_config = LoraConfig(
+    task_type=TaskType.FEATURE_EXTRACTION,
+    r=8,
+    lora_alpha=16,
+    lora_dropout=0.05,
+    target_modules=["q_proj", "v_proj"]
+)
+backbone = get_peft_model(backbone, lora_config)
+# Classifier model
+class GemmaClassifier(nn.Module):
+    def __init__(self, backbone, num_labels):
+        super().__init__()
+        self.backbone = backbone
+        self.pooler = nn.AdaptiveAvgPool1d(1)
+        self.classifier = nn.Linear(backbone.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
+        output = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
+        hidden = output.last_hidden_state
+        pooled = self.pooler(hidden.permute(0, 2, 1)).squeeze(-1)
+        logits = self.classifier(pooled.float())
+        return logits
+# Initialize model, optimizer, and loss function
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Using device: {device}")
+model = GemmaClassifier(backbone, len(label_cols)).to(device)
+optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
+# Use computed pos_weight to handle class imbalance
+criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights.to(device))
+print(f"\nInitialized BCEWithLogitsLoss with pos_weight: {pos_weights.cpu().numpy()}")
+# Initialize loss tracking
+train_losses = []
+val_losses = []
+train_batch_losses = []  # Per-batch training losses
+val_batch_losses = []    # Per-batch validation losses
+# Early stopping variables
+best_val_loss = float('inf')
+best_epoch = 0
+best_model_state = None
+patience = 5  # Number of epochs to wait for improvement
+patience_counter = 0
+# Training loop
+print("\n" + "="*60)
+print("TRAINING")
+print("="*60)
+for epoch in range(EPOCHS):
+    model.train()
+    total_loss = 0
+    batch_count = 0
+    for input_ids, attention_mask, labels in train_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        labels = labels.to(device)
+        optimizer.zero_grad()
+        logits = model(input_ids, attention_mask)
+        loss = criterion(logits, labels)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+        batch_count += 1
+        train_batch_losses.append(loss.item())  # Store per-batch loss
+        # Print progress every 100 batches
+        if batch_count % 100 == 0:
+            print(f"  Epoch {epoch+1} | Batch {batch_count}/{len(train_loader)} | Current Loss: {loss.item():.4f}")
+    avg_train_loss = total_loss / len(train_loader)
+    train_losses.append(avg_train_loss)
+    print(f"\nEpoch {epoch+1}/{EPOCHS} completed")
+    print(f"Average Training Loss: {avg_train_loss:.4f}")
+    # Validation on validation set
+    model.eval()
+    val_loss = 0
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in val_loader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            labels = labels.to(device)
+            logits = model(input_ids, attention_mask)
+            loss = criterion(logits, labels)
+            val_loss += loss.item()
+            val_batch_losses.append(loss.item())  # Store per-batch validation loss
+    avg_val_loss = val_loss / len(val_loader)
+    val_losses.append(avg_val_loss)
+    print(f"Validation Loss: {avg_val_loss:.4f}")
+    # Early stopping check
+    if avg_val_loss < best_val_loss:
+        best_val_loss = avg_val_loss
+        best_epoch = epoch + 1
+        best_model_state = model.state_dict().copy()
+        patience_counter = 0
+        print(f"✓ New best validation loss: {best_val_loss:.4f} (Epoch {best_epoch})")
+    else:
+        patience_counter += 1
+        print(f"  No improvement for {patience_counter} epoch(s)")
+        if patience_counter >= patience:
+            print(f"\nEarly stopping triggered! Best validation loss: {best_val_loss:.4f} at epoch {best_epoch}")
+            break
+    print("-" * 60)
+# Load best model state
+if best_model_state is not None:
+    print(f"\nLoading best model from epoch {best_epoch} with validation loss: {best_val_loss:.4f}")
+    model.load_state_dict(best_model_state)
+else:
+    print("\nNo best model found, using final model state")
+# Optimize decision thresholds using validation set
+print("Finding optimal decision thresholds for each class...")
+optimal_thresholds = find_optimal_thresholds(model, val_loader, label_cols, device)
+print(f"Optimal thresholds: {optimal_thresholds}")
+# SAVE MODEL AFTER TRAINING
+# SAVE_PATH = "gemma_delivery_specific.pt"
+# torch.save(model.state_dict(), SAVE_PATH)
+# print(f"\nModel saved to: {SAVE_PATH}")
+SAVE_DIR = r"C:\temp\new_models"  # make sure this folder exists
+os.makedirs(SAVE_DIR, exist_ok=True)
+SAVE_PATH = os.path.join(SAVE_DIR, "gemma_delivery_specific.pt")
+torch.save(model.to('cpu').state_dict(), SAVE_PATH)
+model.to(device)  # Move model back to device after saving
+print(f"\nModel saved to: {SAVE_PATH}")
+# Plot training and validation loss
+print("\n" + "="*60)
+print("PLOTTING TRAINING CURVES")
+print("="*60)
+plt.figure(figsize=(10, 6))
+epochs_range = range(1, EPOCHS + 1)
+plt.plot(epochs_range, train_losses, 'b-o', label='Training Loss', linewidth=2, markersize=8)
+plt.plot(epochs_range, val_losses, 'r-s', label='Validation Loss', linewidth=2, markersize=8)
+plt.xlabel('Epoch', fontsize=12)
+plt.ylabel('Loss', fontsize=12)
+plt.title('Training and Validation Loss Over Epochs', fontsize=14, fontweight='bold')
+plt.legend(fontsize=10)
+plt.grid(True, alpha=0.3)
+plt.tight_layout()
+# Save the plot
+plot_path = 'training_loss_plot_delivery.png'
+plt.savefig(plot_path, dpi=300, bbox_inches='tight')
+print(f"Training loss plot saved to: {plot_path}")
+# Display loss values
+print("\nLoss values per epoch:")
+print("-" * 40)
+for i, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses), 1):
+    print(f"Epoch {i}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
+print("-" * 40)
+# Plot detailed per-batch loss curves
+print("\nGenerating detailed per-batch loss plot...")
+# Create figure with two subplots
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
+# Calculate moving average for smoothing (window size = 50 batches)
+def moving_average(data, window_size):
+    if len(data) < window_size:
+        window_size = max(1, len(data) // 2)
+    cumsum = np.cumsum(np.insert(data, 0, 0))
+    return (cumsum[window_size:] - cumsum[:-window_size]) / window_size
+train_ma = moving_average(train_batch_losses, 50)
+val_ma = moving_average(val_batch_losses, 50)
+# Subplot 1: Training loss per batch
+ax1.plot(train_batch_losses, alpha=0.3, color='lightblue', linewidth=0.5, label='Raw Training Loss')
+ax1.plot(range(len(train_ma)), train_ma, color='blue', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax1.set_xlabel('Training Batch', fontsize=11)
+ax1.set_ylabel('Loss', fontsize=11)
+ax1.set_title('Training Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax1.legend(fontsize=9)
+ax1.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+batches_per_epoch = len(train_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax1.axvline(x=epoch_idx * batches_per_epoch, color='red', linestyle='--', linewidth=1, alpha=0.5)
+# Subplot 2: Validation loss per batch
+ax2.plot(val_batch_losses, alpha=0.3, color='lightcoral', linewidth=0.5, label='Raw Validation Loss')
+ax2.plot(range(len(val_ma)), val_ma, color='red', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax2.set_xlabel('Validation Batch', fontsize=11)
+ax2.set_ylabel('Loss', fontsize=11)
+ax2.set_title('Validation Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax2.legend(fontsize=9)
+ax2.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+val_batches_per_epoch = len(val_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax2.axvline(x=epoch_idx * val_batches_per_epoch, color='blue', linestyle='--', linewidth=1, alpha=0.5)
+plt.tight_layout()
+# Save the detailed plot
+detailed_plot_path = 'training_loss_per_batch_detailed_delivery.png'
+plt.savefig(detailed_plot_path, dpi=300, bbox_inches='tight')
+print(f"Detailed per-batch loss plot saved to: {detailed_plot_path}")
+# Print batch loss statistics
+print("\nBatch Loss Statistics:")
+print("-" * 60)
+print(f"Training batches: {len(train_batch_losses)}")
+print(f"  Min loss: {min(train_batch_losses):.4f}")
+print(f"  Max loss: {max(train_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(train_batch_losses):.4f}")
+print(f"  Std dev: {np.std(train_batch_losses):.4f}")
+print(f"\nValidation batches: {len(val_batch_losses)}")
+print(f"  Min loss: {min(val_batch_losses):.4f}")
+print(f"  Max loss: {max(val_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(val_batch_losses):.4f}")
+print(f"  Std dev: {np.std(val_batch_losses):.4f}")
+print("-" * 60)
+# VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+val_preds, val_labels_eval = predict_with_thresholds(model, val_loader, optimal_thresholds, device)
+# Also get predictions with default threshold for comparison
+model.eval()
+val_preds_default = []
+with torch.no_grad():
+    for input_ids, attention_mask, labels in val_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        logits = model(input_ids, attention_mask)
+        probs = torch.sigmoid(logits).cpu().numpy()
+        preds = (probs > 0.5).astype(int)
+        val_preds_default.append(preds)
+val_preds_default = np.vstack(val_preds_default)
+print(f"\nPredicted data shape: {val_preds.shape}")
+print(f"Ground truth data shape: {val_labels_eval.shape}")
+# Comparison: Default vs Optimized Thresholds
+print("\n" + "="*60)
+print("COMPARISON: Default vs Optimized Thresholds")
+print("="*60)
+print("\nDefault Threshold (0.5):")
+for i, label in enumerate(label_cols):
+    f1_default = f1_score(val_labels_eval[:, i], val_preds_default[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_default:.4f}")
+print("\nOptimized Thresholds:")
+for i, label in enumerate(label_cols):
+    f1_optimized = f1_score(val_labels_eval[:, i], val_preds[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_optimized:.4f} (threshold = {optimal_thresholds[i]:.2f})")
+print("="*60 + "\n")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT (VALIDATION)')
+print('='*60)
+print(classification_report(val_labels_eval, val_preds, target_names=label_cols))
+# Hamming Loss
+val_hamming_loss = hamming_loss(val_labels_eval, val_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {val_hamming_loss:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {val_hamming_loss:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS (VALIDATION)")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = val_labels_eval[:, i]
+    y_pred = val_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+val_exact_matches = np.all(val_preds == val_labels_eval, axis=1)
+val_exact_match_acc = np.mean(val_exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(val_exact_matches)}/{len(val_exact_matches)}")
+print(f"Exact Match Accuracy: {val_exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+partial_match_scores = []
+for i in range(len(val_labels_eval)):
+    correct_labels = np.sum(val_preds[i] == val_labels_eval[i])
+    partial_match_scores.append(correct_labels / len(label_cols))
+partial_match_scores = np.array(partial_match_scores)
+avg_partial_match = np.mean(partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_partial_match:.4f} ({avg_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions with match/mismatch
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH (VALIDATION)")
+print("="*60)
+num_samples = min(10, len(val_X))
+print(f"\nShowing {num_samples} validation samples:\n")
+for idx in range(num_samples):
+    review = val_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(val_labels_eval[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(val_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Final Evaluation on Test Set (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("FINAL EVALUATION ON TEST SET (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+all_preds, all_labels = predict_with_thresholds(model, test_loader, optimal_thresholds, device)
+print(f"\nPredicted data shape: {all_preds.shape}")
+print(f"Ground truth data shape: {all_labels.shape}")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT')
+print('='*60)
+print(classification_report(all_labels, all_preds, target_names=label_cols))
+# Hamming Loss
+hamming_loss_value = hamming_loss(all_labels, all_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {hamming_loss_value:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {hamming_loss_value:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = all_labels[:, i]
+    y_pred = all_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+exact_matches = np.all(all_preds == all_labels, axis=1)
+exact_match_acc = np.mean(exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(exact_matches)}/{len(exact_matches)}")
+print(f"Exact Match Accuracy: {exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+test_partial_match_scores = []
+for i in range(len(all_labels)):
+    correct_labels = np.sum(all_preds[i] == all_labels[i])
+    test_partial_match_scores.append(correct_labels / len(label_cols))
+test_partial_match_scores = np.array(test_partial_match_scores)
+avg_test_partial_match = np.mean(test_partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_test_partial_match:.4f} ({avg_test_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH")
+print("="*60)
+num_samples = min(10, len(test_X))
+print(f"\nShowing {num_samples} test samples:\n")
+for idx in range(num_samples):
+    review = test_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(all_labels[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(all_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Save model interactively (optional)
+# model_save_path = 'gemma_delivery_classifier.pth'
+# torch.save({
+#     'epoch': EPOCHS,
+#     'model_state_dict': model.state_dict(),
+#     'optimizer_state_dict': optimizer.state_dict(),
+#     'train_loss': avg_train_loss,
+#     'test_loss': avg_test_loss,
+# }, model_save_path)
+# print(f"Model saved to {model_save_path}")
+model_save_path = os.path.join(SAVE_DIR, 'gemma_delivery_classifier.pth')
+torch.save({
+    'epoch': best_epoch if best_model_state is not None else EPOCHS,
+    'model_state_dict': model.state_dict(),
+    'optimizer_state_dict': optimizer.state_dict(),
+    'train_loss': train_losses[best_epoch - 1] if best_model_state is not None else train_losses[-1] if train_losses else 0,
+    'val_loss': best_val_loss if best_model_state is not None else (val_losses[-1] if val_losses else 0),
+    'best_epoch': best_epoch,
+    'best_val_loss': best_val_loss,
+    'optimal_thresholds': optimal_thresholds,
+}, model_save_path)
+print(f"Model saved to {model_save_path}")
+print("\n" + "="*60)
+print("TRAINING COMPLETE")
+print("="*60)

6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_price_model.py ADDED Viewed

	@@ -0,0 +1,751 @@

+import pandas as pd
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from transformers import AutoTokenizer, GemmaModel
+from peft import LoraConfig, get_peft_model, TaskType
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, hamming_loss, accuracy_score, precision_score, recall_score, f1_score
+import numpy as np
+import random
+import matplotlib.pyplot as plt
+import os
+# For UTF-8 characters in output
+import sys
+sys.stdout.reconfigure(encoding='utf-8')
+# Set random seeds for reproducibility
+seed_value = 42
+random.seed(seed_value)
+np.random.seed(seed_value)
+torch.manual_seed(seed_value)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed_value)
+# Parameters
+MODEL_ID = 'google/gemma-3-1b-pt'
+BATCH_SIZE = 8
+EPOCHS = 10
+LR = 5e-5
+# Load data - price-specific
+print("Loading training data from price_train_dataset.csv...")
+train_df = pd.read_csv('datasets/gemini/price_train_dataset.csv')
+print("Loading test data from Test_price_dataset.csv...")
+test_df = pd.read_csv('datasets/test_price_dataset.csv')
+# Define label columns (Price sub-aspects)
+label_cols = [
+    'Affordability_PRICE',
+    'Value_for_Money_PRICE',
+    'General_PRICE'
+]
+# Prepare training data with 80/20 train/validation split
+train_X_full = train_df['Review'].astype(str).tolist()
+train_Y_full = train_df[label_cols].values.astype(np.float32)
+train_X, val_X, train_Y, val_Y = train_test_split(
+    train_X_full, train_Y_full,
+    test_size=0.2,
+    random_state=42
+)
+# Prepare test data
+test_X = test_df['Review'].astype(str).tolist()
+test_Y = test_df[label_cols].values.astype(np.float32)
+print(f"\nDataset sizes:")
+print(f"Training samples: {len(train_X)}")
+print(f"Validation samples: {len(val_X)}")
+print(f"Test samples: {len(test_X)}")
+print(f"Number of labels: {len(label_cols)}")
+# Compute class weights for imbalanced dataset
+def compute_class_weights(labels, label_names):
+    """
+    Compute class weights for multi-label classification
+    using the inverse of class frequency.
+    Args:
+        labels: numpy array of shape (n_samples, n_labels)
+        label_names: list of label column names
+    Returns:
+        pos_weight: torch tensor of positive class weights
+    """
+    n_samples = labels.shape[0]
+    n_labels = labels.shape[1]
+    pos_weights = []
+    print("\n" + "="*60)
+    print("CLASS IMBALANCE ANALYSIS")
+    print("="*60)
+    for i, label_name in enumerate(label_names):
+        pos_count = np.sum(labels[:, i] == 1)
+        neg_count = np.sum(labels[:, i] == 0)
+        # Calculate positive class weight (ratio of negative to positive)
+        if pos_count > 0:
+            raw_ratio = neg_count / pos_count
+            # Apply square root dampening to avoid extreme weights
+            pos_weight = np.sqrt(raw_ratio)
+        else:
+            pos_weight = 1.0
+        pos_weights.append(pos_weight)
+        print(f"\n{label_name}:")
+        print(f"  Positive samples: {pos_count} ({pos_count/n_samples*100:.2f}%)")
+        print(f"  Negative samples: {neg_count} ({neg_count/n_samples*100:.2f}%)")
+        print(f"  Raw imbalance ratio (neg/pos): {neg_count/pos_count if pos_count > 0 else 1.0:.4f}")
+        print(f"  Dampened weight (sqrt of ratio): {pos_weight:.4f}")
+    print("="*60 + "\n")
+    return torch.FloatTensor(pos_weights)
+def find_optimal_thresholds(model, dataloader, label_cols, device):
+    """
+    Find optimal decision threshold for each class independently
+    by maximizing F1-score on the validation set.
+    Args:
+        model: trained model
+        dataloader: validation data loader
+        label_cols: list of label column names
+        device: torch device
+    Returns:
+        optimal_thresholds: numpy array of optimal thresholds for each class
+    """
+    from sklearn.metrics import f1_score
+    print("\n" + "="*60)
+    print("OPTIMIZING DECISION THRESHOLDS")
+    print("="*60)
+    # Collect all predictions and labels
+    model.eval()
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            all_probs.append(probs)
+            all_labels.append(labels.cpu().numpy())
+    all_probs = np.vstack(all_probs)
+    all_labels = np.vstack(all_labels)
+    # Find optimal threshold for each class
+    optimal_thresholds = []
+    threshold_range = np.arange(0.1, 0.91, 0.05)  # 0.1 to 0.9 in steps of 0.05
+    for i, label_name in enumerate(label_cols):
+        best_threshold = 0.5
+        best_f1 = 0.0
+        for threshold in threshold_range:
+            preds = (all_probs[:, i] > threshold).astype(int)
+            f1 = f1_score(all_labels[:, i], preds, zero_division=0)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_threshold = threshold
+        optimal_thresholds.append(best_threshold)
+        print(f"\n{label_name}:")
+        print(f"  Optimal threshold: {best_threshold:.2f}")
+        print(f"  Best F1-score: {best_f1:.4f}")
+        print(f"  (Default 0.5 threshold F1: {f1_score(all_labels[:, i], (all_probs[:, i] > 0.5).astype(int), zero_division=0):.4f})")
+    print("="*60 + "\n")
+    return np.array(optimal_thresholds)
+def predict_with_thresholds(model, dataloader, thresholds, device):
+    """
+    Make predictions using custom thresholds for each class.
+    Args:
+        model: trained model
+        dataloader: data loader
+        thresholds: numpy array of thresholds for each class
+        device: torch device
+    Returns:
+        predictions: numpy array of predictions
+        labels: numpy array of true labels
+    """
+    model.eval()
+    all_preds = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            # Apply custom thresholds for each class
+            preds = np.zeros_like(probs, dtype=int)
+            for i in range(len(thresholds)):
+                preds[:, i] = (probs[:, i] > thresholds[i]).astype(int)
+            all_preds.append(preds)
+            all_labels.append(labels.cpu().numpy())
+    return np.vstack(all_preds), np.vstack(all_labels)
+# Dataset class
+class ReviewDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.texts = texts
+        self.labels = labels
+    def __len__(self):
+        return len(self.texts)
+    def __getitem__(self, idx):
+        encoding = tokenizer(
+            self.texts[idx],
+            padding='max_length',
+            truncation=True,
+            max_length=256,
+            return_tensors='pt'
+        )
+        input_ids = encoding['input_ids'].squeeze()
+        attention_mask = encoding['attention_mask'].squeeze()
+        label = torch.FloatTensor(self.labels[idx])
+        return input_ids, attention_mask, label
+# Initialize tokenizer
+print("\nInitializing tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=True)
+# Create datasets
+train_dataset = ReviewDataset(train_X, train_Y)
+val_dataset = ReviewDataset(val_X, val_Y)
+test_dataset = ReviewDataset(test_X, test_Y)
+# Create data loaders
+train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
+val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
+test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
+# Compute class weights based on training data
+print("Computing class weights for imbalanced dataset...")
+pos_weights = compute_class_weights(train_Y, label_cols)
+# Initialize model with LoRA
+print("Initializing model with LoRA...")
+backbone = GemmaModel.from_pretrained(MODEL_ID, token=True, dtype=torch.bfloat16)
+lora_config = LoraConfig(
+    task_type=TaskType.FEATURE_EXTRACTION,
+    r=8,
+    lora_alpha=16,
+    lora_dropout=0.05,
+    target_modules=["q_proj", "v_proj"]
+)
+backbone = get_peft_model(backbone, lora_config)
+# Classifier model
+class GemmaClassifier(nn.Module):
+    def __init__(self, backbone, num_labels):
+        super().__init__()
+        self.backbone = backbone
+        self.pooler = nn.AdaptiveAvgPool1d(1)
+        self.classifier = nn.Linear(backbone.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
+        output = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
+        hidden = output.last_hidden_state
+        pooled = self.pooler(hidden.permute(0, 2, 1)).squeeze(-1)
+        logits = self.classifier(pooled.float())
+        return logits
+# Initialize model, optimizer, and loss function
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Using device: {device}")
+model = GemmaClassifier(backbone, len(label_cols)).to(device)
+optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
+# Use computed pos_weight to handle class imbalance
+criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights.to(device))
+print(f"\nInitialized BCEWithLogitsLoss with pos_weight: {pos_weights.cpu().numpy()}")
+# Initialize loss tracking
+train_losses = []
+val_losses = []
+train_batch_losses = []  # Per-batch training losses
+val_batch_losses = []    # Per-batch validation losses
+# Early stopping variables
+best_val_loss = float('inf')
+best_epoch = 0
+best_model_state = None
+patience = 5  # Number of epochs to wait for improvement
+patience_counter = 0
+# Training loop
+print("\n" + "="*60)
+print("TRAINING")
+print("="*60)
+for epoch in range(EPOCHS):
+    model.train()
+    total_loss = 0
+    batch_count = 0
+    for input_ids, attention_mask, labels in train_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        labels = labels.to(device)
+        optimizer.zero_grad()
+        logits = model(input_ids, attention_mask)
+        loss = criterion(logits, labels)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+        batch_count += 1
+        train_batch_losses.append(loss.item())  # Store per-batch loss
+        # Print progress every 100 batches
+        if batch_count % 100 == 0:
+            print(f"  Epoch {epoch+1} | Batch {batch_count}/{len(train_loader)} | Current Loss: {loss.item():.4f}")
+    avg_train_loss = total_loss / len(train_loader)
+    train_losses.append(avg_train_loss)
+    print(f"\nEpoch {epoch+1}/{EPOCHS} completed")
+    print(f"Average Training Loss: {avg_train_loss:.4f}")
+    # Validation on validation set
+    model.eval()
+    val_loss = 0
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in val_loader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            labels = labels.to(device)
+            logits = model(input_ids, attention_mask)
+            loss = criterion(logits, labels)
+            val_loss += loss.item()
+            val_batch_losses.append(loss.item())  # Store per-batch validation loss
+    avg_val_loss = val_loss / len(val_loader)
+    val_losses.append(avg_val_loss)
+    print(f"Validation Loss: {avg_val_loss:.4f}")
+    # Early stopping check
+    if avg_val_loss < best_val_loss:
+        best_val_loss = avg_val_loss
+        best_epoch = epoch + 1
+        best_model_state = model.state_dict().copy()
+        patience_counter = 0
+        print(f"✓ New best validation loss: {best_val_loss:.4f} (Epoch {best_epoch})")
+    else:
+        patience_counter += 1
+        print(f"  No improvement for {patience_counter} epoch(s)")
+        if patience_counter >= patience:
+            print(f"\nEarly stopping triggered! Best validation loss: {best_val_loss:.4f} at epoch {best_epoch}")
+            break
+    print("-" * 60)
+# Load best model state
+if best_model_state is not None:
+    print(f"\nLoading best model from epoch {best_epoch} with validation loss: {best_val_loss:.4f}")
+    model.load_state_dict(best_model_state)
+else:
+    print("\nNo best model found, using final model state")
+# Optimize decision thresholds using validation set
+print("Finding optimal decision thresholds for each class...")
+optimal_thresholds = find_optimal_thresholds(model, val_loader, label_cols, device)
+print(f"Optimal thresholds: {optimal_thresholds}")
+# SAVE MODEL AFTER TRAINING
+# SAVE_PATH = "gemma_price_specific.pt"
+# torch.save(model.state_dict(), SAVE_PATH)
+# print(f"\nModel saved to: {SAVE_PATH}")
+SAVE_DIR = r"C:\temp\new_models"  # make sure this folder exists
+os.makedirs(SAVE_DIR, exist_ok=True)
+SAVE_PATH = os.path.join(SAVE_DIR, "gemma_price_specific.pt")
+torch.save(model.to('cpu').state_dict(), SAVE_PATH)
+model.to(device)  # Move model back to device after saving
+print(f"\nModel saved to: {SAVE_PATH}")
+# Plot training and validation loss
+print("\n" + "="*60)
+print("PLOTTING TRAINING CURVES")
+print("="*60)
+plt.figure(figsize=(10, 6))
+epochs_range = range(1, EPOCHS + 1)
+plt.plot(epochs_range, train_losses, 'b-o', label='Training Loss', linewidth=2, markersize=8)
+plt.plot(epochs_range, val_losses, 'r-s', label='Validation Loss', linewidth=2, markersize=8)
+plt.xlabel('Epoch', fontsize=12)
+plt.ylabel('Loss', fontsize=12)
+plt.title('Training and Validation Loss Over Epochs', fontsize=14, fontweight='bold')
+plt.legend(fontsize=10)
+plt.grid(True, alpha=0.3)
+plt.tight_layout()
+# Save the plot
+plot_path = 'training_loss_plot_price.png'
+plt.savefig(plot_path, dpi=300, bbox_inches='tight')
+print(f"Training loss plot saved to: {plot_path}")
+# Display loss values
+print("\nLoss values per epoch:")
+print("-" * 40)
+for i, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses), 1):
+    print(f"Epoch {i}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
+print("-" * 40)
+# Plot detailed per-batch loss curves
+print("\nGenerating detailed per-batch loss plot...")
+# Create figure with two subplots
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
+# Calculate moving average for smoothing (window size = 50 batches)
+def moving_average(data, window_size):
+    if len(data) < window_size:
+        window_size = max(1, len(data) // 2)
+    cumsum = np.cumsum(np.insert(data, 0, 0))
+    return (cumsum[window_size:] - cumsum[:-window_size]) / window_size
+train_ma = moving_average(train_batch_losses, 50)
+val_ma = moving_average(val_batch_losses, 50)
+# Subplot 1: Training loss per batch
+ax1.plot(train_batch_losses, alpha=0.3, color='lightblue', linewidth=0.5, label='Raw Training Loss')
+ax1.plot(range(len(train_ma)), train_ma, color='blue', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax1.set_xlabel('Training Batch', fontsize=11)
+ax1.set_ylabel('Loss', fontsize=11)
+ax1.set_title('Training Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax1.legend(fontsize=9)
+ax1.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+batches_per_epoch = len(train_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax1.axvline(x=epoch_idx * batches_per_epoch, color='red', linestyle='--', linewidth=1, alpha=0.5)
+# Subplot 2: Validation loss per batch
+ax2.plot(val_batch_losses, alpha=0.3, color='lightcoral', linewidth=0.5, label='Raw Validation Loss')
+ax2.plot(range(len(val_ma)), val_ma, color='red', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax2.set_xlabel('Validation Batch', fontsize=11)
+ax2.set_ylabel('Loss', fontsize=11)
+ax2.set_title('Validation Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax2.legend(fontsize=9)
+ax2.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+val_batches_per_epoch = len(val_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax2.axvline(x=epoch_idx * val_batches_per_epoch, color='blue', linestyle='--', linewidth=1, alpha=0.5)
+plt.tight_layout()
+# Save the detailed plot
+detailed_plot_path = 'training_loss_per_batch_detailed_price.png'
+plt.savefig(detailed_plot_path, dpi=300, bbox_inches='tight')
+print(f"Detailed per-batch loss plot saved to: {detailed_plot_path}")
+# Print batch loss statistics
+print("\nBatch Loss Statistics:")
+print("-" * 60)
+print(f"Training batches: {len(train_batch_losses)}")
+print(f"  Min loss: {min(train_batch_losses):.4f}")
+print(f"  Max loss: {max(train_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(train_batch_losses):.4f}")
+print(f"  Std dev: {np.std(train_batch_losses):.4f}")
+print(f"\nValidation batches: {len(val_batch_losses)}")
+print(f"  Min loss: {min(val_batch_losses):.4f}")
+print(f"  Max loss: {max(val_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(val_batch_losses):.4f}")
+print(f"  Std dev: {np.std(val_batch_losses):.4f}")
+print("-" * 60)
+# VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+val_preds, val_labels_eval = predict_with_thresholds(model, val_loader, optimal_thresholds, device)
+# Also get predictions with default threshold for comparison
+model.eval()
+val_preds_default = []
+with torch.no_grad():
+    for input_ids, attention_mask, labels in val_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        logits = model(input_ids, attention_mask)
+        probs = torch.sigmoid(logits).cpu().numpy()
+        preds = (probs > 0.5).astype(int)
+        val_preds_default.append(preds)
+val_preds_default = np.vstack(val_preds_default)
+print(f"\nPredicted data shape: {val_preds.shape}")
+print(f"Ground truth data shape: {val_labels_eval.shape}")
+# Comparison: Default vs Optimized Thresholds
+print("\n" + "="*60)
+print("COMPARISON: Default vs Optimized Thresholds")
+print("="*60)
+print("\nDefault Threshold (0.5):")
+for i, label in enumerate(label_cols):
+    f1_default = f1_score(val_labels_eval[:, i], val_preds_default[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_default:.4f}")
+print("\nOptimized Thresholds:")
+for i, label in enumerate(label_cols):
+    f1_optimized = f1_score(val_labels_eval[:, i], val_preds[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_optimized:.4f} (threshold = {optimal_thresholds[i]:.2f})")
+print("="*60 + "\n")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT (VALIDATION)')
+print('='*60)
+print(classification_report(val_labels_eval, val_preds, target_names=label_cols))
+# Hamming Loss
+val_hamming_loss = hamming_loss(val_labels_eval, val_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {val_hamming_loss:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {val_hamming_loss:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS (VALIDATION)")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = val_labels_eval[:, i]
+    y_pred = val_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+val_exact_matches = np.all(val_preds == val_labels_eval, axis=1)
+val_exact_match_acc = np.mean(val_exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(val_exact_matches)}/{len(val_exact_matches)}")
+print(f"Exact Match Accuracy: {val_exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+partial_match_scores = []
+for i in range(len(val_labels_eval)):
+    correct_labels = np.sum(val_preds[i] == val_labels_eval[i])
+    partial_match_scores.append(correct_labels / len(label_cols))
+partial_match_scores = np.array(partial_match_scores)
+avg_partial_match = np.mean(partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_partial_match:.4f} ({avg_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions with match/mismatch
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH (VALIDATION)")
+print("="*60)
+num_samples = min(10, len(val_X))
+print(f"\nShowing {num_samples} validation samples:\n")
+for idx in range(num_samples):
+    review = val_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(val_labels_eval[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(val_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Final Evaluation on Test Set (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("FINAL EVALUATION ON TEST SET (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+all_preds, all_labels = predict_with_thresholds(model, test_loader, optimal_thresholds, device)
+print(f"\nPredicted data shape: {all_preds.shape}")
+print(f"Ground truth data shape: {all_labels.shape}")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT')
+print('='*60)
+print(classification_report(all_labels, all_preds, target_names=label_cols))
+# Hamming Loss
+hamming_loss_value = hamming_loss(all_labels, all_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {hamming_loss_value:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {hamming_loss_value:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = all_labels[:, i]
+    y_pred = all_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+exact_matches = np.all(all_preds == all_labels, axis=1)
+exact_match_acc = np.mean(exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(exact_matches)}/{len(exact_matches)}")
+print(f"Exact Match Accuracy: {exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+test_partial_match_scores = []
+for i in range(len(all_labels)):
+    correct_labels = np.sum(all_preds[i] == all_labels[i])
+    test_partial_match_scores.append(correct_labels / len(label_cols))
+test_partial_match_scores = np.array(test_partial_match_scores)
+avg_test_partial_match = np.mean(test_partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_test_partial_match:.4f} ({avg_test_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH")
+print("="*60)
+num_samples = min(10, len(test_X))
+print(f"\nShowing {num_samples} test samples:\n")
+for idx in range(num_samples):
+    review = test_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(all_labels[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(all_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Save model interactively (optional)
+# model_save_path = 'gemma_price_classifier.pth'
+# torch.save({
+#     'epoch': EPOCHS,
+#     'model_state_dict': model.state_dict(),
+#     'optimizer_state_dict': optimizer.state_dict(),
+#     'train_loss': avg_train_loss,
+#     'test_loss': avg_test_loss,
+# }, model_save_path)
+# print(f"Model saved to {model_save_path}")
+model_save_path = os.path.join(SAVE_DIR, 'gemma_price_classifier.pth')
+torch.save({
+    'epoch': best_epoch if best_model_state is not None else EPOCHS,
+    'model_state_dict': model.state_dict(),
+    'optimizer_state_dict': optimizer.state_dict(),
+    'train_loss': train_losses[best_epoch - 1] if best_model_state is not None else train_losses[-1] if train_losses else 0,
+    'val_loss': best_val_loss if best_model_state is not None else (val_losses[-1] if val_losses else 0),
+    'best_epoch': best_epoch,
+    'best_val_loss': best_val_loss,
+    'optimal_thresholds': optimal_thresholds,
+}, model_save_path)
+print(f"Model saved to {model_save_path}")
+print("\n" + "="*60)
+print("TRAINING COMPLETE")
+print("="*60)

6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_product_model.py ADDED Viewed

	@@ -0,0 +1,741 @@

+import pandas as pd
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from transformers import AutoTokenizer, GemmaModel
+from peft import LoraConfig, get_peft_model, TaskType
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, hamming_loss, accuracy_score, precision_score, recall_score, f1_score
+import numpy as np
+import random
+import matplotlib.pyplot as plt
+# For UTF-8 characters in output
+import sys
+sys.stdout.reconfigure(encoding='utf-8')
+# Set random seeds for reproducibility
+seed_value = 42
+random.seed(seed_value)
+np.random.seed(seed_value)
+torch.manual_seed(seed_value)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed_value)
+# Parameters
+MODEL_ID = 'google/gemma-3-1b-pt'
+BATCH_SIZE = 8
+EPOCHS = 10
+LR = 5e-5
+# Load data - product-specific
+print("Loading training data from product_specific_aspects.csv...")
+train_df = pd.read_csv('datasets/gemini/product_train_dataset.csv')
+print("Loading test data from Test_product_dataset.csv...")
+test_df = pd.read_csv('datasets/test_product_dataset.csv')
+# Define label columns (Product sub-aspects)
+label_cols = [
+    'Color_PRO',
+    'Condition_PRO',
+    'Correctness_PRO',
+    'Durability_PRO',
+    'Effectiveness_PRO',
+    'Functionality_PRO',
+    'Material_PRO',
+    'Sensory_PRO',
+    'Size_PRO',
+    'General_PRO'
+]
+# Prepare training data with 80/20 train/validation split
+train_X_full = train_df['Review'].astype(str).tolist()
+train_Y_full = train_df[label_cols].values.astype(np.float32)
+train_X, val_X, train_Y, val_Y = train_test_split(
+    train_X_full, train_Y_full,
+    test_size=0.2,
+    random_state=42
+)
+# Prepare test data
+test_X = test_df['Review'].astype(str).tolist()
+test_Y = test_df[label_cols].values.astype(np.float32)
+print(f"\nDataset sizes:")
+print(f"Training samples: {len(train_X)}")
+print(f"Validation samples: {len(val_X)}")
+print(f"Test samples: {len(test_X)}")
+print(f"Number of labels: {len(label_cols)}")
+# Compute class weights for imbalanced dataset
+def compute_class_weights(labels, label_names):
+    """
+    Compute class weights for multi-label classification
+    using the inverse of class frequency.
+    Args:
+        labels: numpy array of shape (n_samples, n_labels)
+        label_names: list of label column names
+    Returns:
+        pos_weight: torch tensor of positive class weights
+    """
+    n_samples = labels.shape[0]
+    n_labels = labels.shape[1]
+    pos_weights = []
+    print("\n" + "="*60)
+    print("CLASS IMBALANCE ANALYSIS")
+    print("="*60)
+    for i, label_name in enumerate(label_names):
+        pos_count = np.sum(labels[:, i] == 1)
+        neg_count = np.sum(labels[:, i] == 0)
+        # Calculate positive class weight (ratio of negative to positive)
+        if pos_count > 0:
+            raw_ratio = neg_count / pos_count
+            # Apply square root dampening to avoid extreme weights
+            pos_weight = np.sqrt(raw_ratio)
+        else:
+            pos_weight = 1.0
+        pos_weights.append(pos_weight)
+        print(f"\n{label_name}:")
+        print(f"  Positive samples: {pos_count} ({pos_count/n_samples*100:.2f}%)")
+        print(f"  Negative samples: {neg_count} ({neg_count/n_samples*100:.2f}%)")
+        print(f"  Raw imbalance ratio (neg/pos): {neg_count/pos_count if pos_count > 0 else 1.0:.4f}")
+        print(f"  Dampened weight (sqrt of ratio): {pos_weight:.4f}")
+    print("="*60 + "\n")
+    return torch.FloatTensor(pos_weights)
+def find_optimal_thresholds(model, dataloader, label_cols, device):
+    """
+    Find optimal decision threshold for each class independently
+    by maximizing F1-score on the validation set.
+    Args:
+        model: trained model
+        dataloader: validation data loader
+        label_cols: list of label column names
+        device: torch device
+    Returns:
+        optimal_thresholds: numpy array of optimal thresholds for each class
+    """
+    from sklearn.metrics import f1_score
+    print("\n" + "="*60)
+    print("OPTIMIZING DECISION THRESHOLDS")
+    print("="*60)
+    # Collect all predictions and labels
+    model.eval()
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            all_probs.append(probs)
+            all_labels.append(labels.cpu().numpy())
+    all_probs = np.vstack(all_probs)
+    all_labels = np.vstack(all_labels)
+    # Find optimal threshold for each class
+    optimal_thresholds = []
+    threshold_range = np.arange(0.1, 0.91, 0.05)  # 0.1 to 0.9 in steps of 0.05
+    for i, label_name in enumerate(label_cols):
+        best_threshold = 0.5
+        best_f1 = 0.0
+        for threshold in threshold_range:
+            preds = (all_probs[:, i] > threshold).astype(int)
+            f1 = f1_score(all_labels[:, i], preds, zero_division=0)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_threshold = threshold
+        optimal_thresholds.append(best_threshold)
+        print(f"\n{label_name}:")
+        print(f"  Optimal threshold: {best_threshold:.2f}")
+        print(f"  Best F1-score: {best_f1:.4f}")
+        print(f"  (Default 0.5 threshold F1: {f1_score(all_labels[:, i], (all_probs[:, i] > 0.5).astype(int), zero_division=0):.4f})")
+    print("="*60 + "\n")
+    return np.array(optimal_thresholds)
+def predict_with_thresholds(model, dataloader, thresholds, device):
+    """
+    Make predictions using custom thresholds for each class.
+    Args:
+        model: trained model
+        dataloader: data loader
+        thresholds: numpy array of thresholds for each class
+        device: torch device
+    Returns:
+        predictions: numpy array of predictions
+        labels: numpy array of true labels
+    """
+    model.eval()
+    all_preds = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            # Apply custom thresholds for each class
+            preds = np.zeros_like(probs, dtype=int)
+            for i in range(len(thresholds)):
+                preds[:, i] = (probs[:, i] > thresholds[i]).astype(int)
+            all_preds.append(preds)
+            all_labels.append(labels.cpu().numpy())
+    return np.vstack(all_preds), np.vstack(all_labels)
+# Dataset class
+class ReviewDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.texts = texts
+        self.labels = labels
+    def __len__(self):
+        return len(self.texts)
+    def __getitem__(self, idx):
+        encoding = tokenizer(
+            self.texts[idx],
+            padding='max_length',
+            truncation=True,
+            max_length=256,
+            return_tensors='pt'
+        )
+        input_ids = encoding['input_ids'].squeeze()
+        attention_mask = encoding['attention_mask'].squeeze()
+        label = torch.FloatTensor(self.labels[idx])
+        return input_ids, attention_mask, label
+# Initialize tokenizer
+print("\nInitializing tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=True)
+# Create datasets
+train_dataset = ReviewDataset(train_X, train_Y)
+val_dataset = ReviewDataset(val_X, val_Y)
+test_dataset = ReviewDataset(test_X, test_Y)
+# Create data loaders
+train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
+val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
+test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
+# Compute class weights based on training data
+print("Computing class weights for imbalanced dataset...")
+pos_weights = compute_class_weights(train_Y, label_cols)
+# Initialize model with LoRA
+print("Initializing model with LoRA...")
+backbone = GemmaModel.from_pretrained(MODEL_ID, token=True, dtype=torch.bfloat16)
+lora_config = LoraConfig(
+    task_type=TaskType.FEATURE_EXTRACTION,
+    r=8,
+    lora_alpha=16,
+    lora_dropout=0.05,
+    target_modules=["q_proj", "v_proj"]
+)
+backbone = get_peft_model(backbone, lora_config)
+# Classifier model
+class GemmaClassifier(nn.Module):
+    def __init__(self, backbone, num_labels):
+        super().__init__()
+        self.backbone = backbone
+        self.pooler = nn.AdaptiveAvgPool1d(1)
+        self.classifier = nn.Linear(backbone.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
+        output = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
+        hidden = output.last_hidden_state
+        pooled = self.pooler(hidden.permute(0, 2, 1)).squeeze(-1)
+        logits = self.classifier(pooled.float())
+        return logits
+# Initialize model, optimizer, and loss function
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Using device: {device}")
+model = GemmaClassifier(backbone, len(label_cols)).to(device)
+optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
+# Use computed pos_weight to handle class imbalance
+criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights.to(device))
+print(f"\nInitialized BCEWithLogitsLoss with pos_weight: {pos_weights.cpu().numpy()}")
+# Initialize loss tracking
+train_losses = []
+val_losses = []
+train_batch_losses = []  # Per-batch training losses
+val_batch_losses = []    # Per-batch validation losses
+# Early stopping variables
+best_val_loss = float('inf')
+best_epoch = 0
+best_model_state = None
+patience = 5  # Number of epochs to wait for improvement
+patience_counter = 0
+# Training loop
+print("\n" + "="*60)
+print("TRAINING")
+print("="*60)
+for epoch in range(EPOCHS):
+    model.train()
+    total_loss = 0
+    batch_count = 0
+    for input_ids, attention_mask, labels in train_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        labels = labels.to(device)
+        optimizer.zero_grad()
+        logits = model(input_ids, attention_mask)
+        loss = criterion(logits, labels)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+        batch_count += 1
+        train_batch_losses.append(loss.item())  # Store per-batch loss
+        # Print progress every 100 batches
+        if batch_count % 100 == 0:
+            print(f"  Epoch {epoch+1} | Batch {batch_count}/{len(train_loader)} | Current Loss: {loss.item():.4f}")
+    avg_train_loss = total_loss / len(train_loader)
+    train_losses.append(avg_train_loss)
+    print(f"\nEpoch {epoch+1}/{EPOCHS} completed")
+    print(f"Average Training Loss: {avg_train_loss:.4f}")
+    # Validation on validation set
+    model.eval()
+    val_loss = 0
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in val_loader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            labels = labels.to(device)
+            logits = model(input_ids, attention_mask)
+            loss = criterion(logits, labels)
+            val_loss += loss.item()
+            val_batch_losses.append(loss.item())  # Store per-batch validation loss
+    avg_val_loss = val_loss / len(val_loader)
+    val_losses.append(avg_val_loss)
+    print(f"Validation Loss: {avg_val_loss:.4f}")
+    # Early stopping check
+    if avg_val_loss < best_val_loss:
+        best_val_loss = avg_val_loss
+        best_epoch = epoch + 1
+        best_model_state = model.state_dict().copy()
+        patience_counter = 0
+        print(f"✓ New best validation loss: {best_val_loss:.4f} (Epoch {best_epoch})")
+    else:
+        patience_counter += 1
+        print(f"  No improvement for {patience_counter} epoch(s)")
+        if patience_counter >= patience:
+            print(f"\nEarly stopping triggered! Best validation loss: {best_val_loss:.4f} at epoch {best_epoch}")
+            break
+    print("-" * 60)
+# Load best model state
+if best_model_state is not None:
+    print(f"\nLoading best model from epoch {best_epoch} with validation loss: {best_val_loss:.4f}")
+    model.load_state_dict(best_model_state)
+else:
+    print("\nNo best model found, using final model state")
+# Optimize decision thresholds using validation set
+print("Finding optimal decision thresholds for each class...")
+optimal_thresholds = find_optimal_thresholds(model, val_loader, label_cols, device)
+print(f"Optimal thresholds: {optimal_thresholds}")
+# SAVE MODEL AFTER TRAINING
+SAVE_PATH = "gemma_product_specific.pt"
+torch.save(model.state_dict(), SAVE_PATH)
+print(f"\nModel saved to: {SAVE_PATH}")
+# Plot training and validation loss
+print("\n" + "="*60)
+print("PLOTTING TRAINING CURVES")
+print("="*60)
+plt.figure(figsize=(10, 6))
+epochs_range = range(1, EPOCHS + 1)
+plt.plot(epochs_range, train_losses, 'b-o', label='Training Loss', linewidth=2, markersize=8)
+plt.plot(epochs_range, val_losses, 'r-s', label='Validation Loss', linewidth=2, markersize=8)
+plt.xlabel('Epoch', fontsize=12)
+plt.ylabel('Loss', fontsize=12)
+plt.title('Training and Validation Loss Over Epochs', fontsize=14, fontweight='bold')
+plt.legend(fontsize=10)
+plt.grid(True, alpha=0.3)
+plt.tight_layout()
+# Save the plot
+plot_path = 'training_loss_plot.png'
+plt.savefig(plot_path, dpi=300, bbox_inches='tight')
+print(f"Training loss plot saved to: {plot_path}")
+# Display loss values
+print("\nLoss values per epoch:")
+print("-" * 40)
+for i, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses), 1):
+    print(f"Epoch {i}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
+print("-" * 40)
+# Plot detailed per-batch loss curves
+print("\nGenerating detailed per-batch loss plot...")
+# Create figure with two subplots
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
+# Calculate moving average for smoothing (window size = 50 batches)
+def moving_average(data, window_size):
+    if len(data) < window_size:
+        window_size = max(1, len(data) // 2)
+    cumsum = np.cumsum(np.insert(data, 0, 0))
+    return (cumsum[window_size:] - cumsum[:-window_size]) / window_size
+train_ma = moving_average(train_batch_losses, 50)
+val_ma = moving_average(val_batch_losses, 50)
+# Subplot 1: Training loss per batch
+ax1.plot(train_batch_losses, alpha=0.3, color='lightblue', linewidth=0.5, label='Raw Training Loss')
+ax1.plot(range(len(train_ma)), train_ma, color='blue', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax1.set_xlabel('Training Batch', fontsize=11)
+ax1.set_ylabel('Loss', fontsize=11)
+ax1.set_title('Training Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax1.legend(fontsize=9)
+ax1.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+batches_per_epoch = len(train_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax1.axvline(x=epoch_idx * batches_per_epoch, color='red', linestyle='--', linewidth=1, alpha=0.5)
+# Subplot 2: Validation loss per batch
+ax2.plot(val_batch_losses, alpha=0.3, color='lightcoral', linewidth=0.5, label='Raw Validation Loss')
+ax2.plot(range(len(val_ma)), val_ma, color='red', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax2.set_xlabel('Validation Batch', fontsize=11)
+ax2.set_ylabel('Loss', fontsize=11)
+ax2.set_title('Validation Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax2.legend(fontsize=9)
+ax2.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+val_batches_per_epoch = len(val_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax2.axvline(x=epoch_idx * val_batches_per_epoch, color='blue', linestyle='--', linewidth=1, alpha=0.5)
+plt.tight_layout()
+# Save the detailed plot
+detailed_plot_path = 'training_loss_per_batch_detailed.png'
+plt.savefig(detailed_plot_path, dpi=300, bbox_inches='tight')
+print(f"Detailed per-batch loss plot saved to: {detailed_plot_path}")
+# Print batch loss statistics
+print("\nBatch Loss Statistics:")
+print("-" * 60)
+print(f"Training batches: {len(train_batch_losses)}")
+print(f"  Min loss: {min(train_batch_losses):.4f}")
+print(f"  Max loss: {max(train_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(train_batch_losses):.4f}")
+print(f"  Std dev: {np.std(train_batch_losses):.4f}")
+print(f"\nValidation batches: {len(val_batch_losses)}")
+print(f"  Min loss: {min(val_batch_losses):.4f}")
+print(f"  Max loss: {max(val_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(val_batch_losses):.4f}")
+print(f"  Std dev: {np.std(val_batch_losses):.4f}")
+print("-" * 60)
+# VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+val_preds, val_labels_eval = predict_with_thresholds(model, val_loader, optimal_thresholds, device)
+# Also get predictions with default threshold for comparison
+model.eval()
+val_preds_default = []
+with torch.no_grad():
+    for input_ids, attention_mask, labels in val_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        logits = model(input_ids, attention_mask)
+        probs = torch.sigmoid(logits).cpu().numpy()
+        preds = (probs > 0.5).astype(int)
+        val_preds_default.append(preds)
+val_preds_default = np.vstack(val_preds_default)
+print(f"\nPredicted data shape: {val_preds.shape}")
+print(f"Ground truth data shape: {val_labels_eval.shape}")
+# Comparison: Default vs Optimized Thresholds
+print("\n" + "="*60)
+print("COMPARISON: Default vs Optimized Thresholds")
+print("="*60)
+print("\nDefault Threshold (0.5):")
+for i, label in enumerate(label_cols):
+    f1_default = f1_score(val_labels_eval[:, i], val_preds_default[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_default:.4f}")
+print("\nOptimized Thresholds:")
+for i, label in enumerate(label_cols):
+    f1_optimized = f1_score(val_labels_eval[:, i], val_preds[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_optimized:.4f} (threshold = {optimal_thresholds[i]:.2f})")
+print("="*60 + "\n")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT (VALIDATION)')
+print('='*60)
+print(classification_report(val_labels_eval, val_preds, target_names=label_cols))
+# Hamming Loss
+val_hamming_loss = hamming_loss(val_labels_eval, val_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {val_hamming_loss:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {val_hamming_loss:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS (VALIDATION)")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = val_labels_eval[:, i]
+    y_pred = val_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+val_exact_matches = np.all(val_preds == val_labels_eval, axis=1)
+val_exact_match_acc = np.mean(val_exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(val_exact_matches)}/{len(val_exact_matches)}")
+print(f"Exact Match Accuracy: {val_exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+partial_match_scores = []
+for i in range(len(val_labels_eval)):
+    correct_labels = np.sum(val_preds[i] == val_labels_eval[i])
+    partial_match_scores.append(correct_labels / len(label_cols))
+partial_match_scores = np.array(partial_match_scores)
+avg_partial_match = np.mean(partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_partial_match:.4f} ({avg_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions with match/mismatch
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH (VALIDATION)")
+print("="*60)
+num_samples = min(10, len(val_X))
+print(f"\nShowing {num_samples} validation samples:\n")
+for idx in range(num_samples):
+    review = val_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(val_labels_eval[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(val_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Final Evaluation on Test Set (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("FINAL EVALUATION ON TEST SET (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+all_preds, all_labels = predict_with_thresholds(model, test_loader, optimal_thresholds, device)
+print(f"\nPredicted data shape: {all_preds.shape}")
+print(f"Ground truth data shape: {all_labels.shape}")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT')
+print('='*60)
+print(classification_report(all_labels, all_preds, target_names=label_cols))
+# Hamming Loss
+hamming_loss_value = hamming_loss(all_labels, all_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {hamming_loss_value:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {hamming_loss_value:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = all_labels[:, i]
+    y_pred = all_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+exact_matches = np.all(all_preds == all_labels, axis=1)
+exact_match_acc = np.mean(exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(exact_matches)}/{len(exact_matches)}")
+print(f"Exact Match Accuracy: {exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+test_partial_match_scores = []
+for i in range(len(all_labels)):
+    correct_labels = np.sum(all_preds[i] == all_labels[i])
+    test_partial_match_scores.append(correct_labels / len(label_cols))
+test_partial_match_scores = np.array(test_partial_match_scores)
+avg_test_partial_match = np.mean(test_partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_test_partial_match:.4f} ({avg_test_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH")
+print("="*60)
+num_samples = min(10, len(test_X))
+print(f"\nShowing {num_samples} test samples:\n")
+for idx in range(num_samples):
+    review = test_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(all_labels[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(all_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Save model interactively (optional)
+model_save_path = 'gemma_product_classifier.pth'
+torch.save({
+    'epoch': best_epoch if best_model_state is not None else EPOCHS,
+    'model_state_dict': model.state_dict(),
+    'optimizer_state_dict': optimizer.state_dict(),
+    'train_loss': train_losses[best_epoch - 1] if best_model_state is not None else train_losses[-1] if train_losses else 0,
+    'val_loss': best_val_loss if best_model_state is not None else (val_losses[-1] if val_losses else 0),
+    'best_epoch': best_epoch,
+    'best_val_loss': best_val_loss,
+    'optimal_thresholds': optimal_thresholds,
+}, model_save_path)
+print(f"Model saved to {model_save_path}")
+print("\n" + "="*60)
+print("TRAINING COMPLETE")
+print("="*60)

6 _ Fine-Tuning (Gemma)/Specific Models/LLM trained Gemma Model/gemini_service_model.py ADDED Viewed

	@@ -0,0 +1,748 @@

+import pandas as pd
+import torch
+from torch.utils.data import Dataset, DataLoader
+from torch import nn
+from transformers import AutoTokenizer, GemmaModel
+from peft import LoraConfig, get_peft_model, TaskType
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import classification_report, hamming_loss, accuracy_score, precision_score, recall_score, f1_score
+import numpy as np
+import random
+import matplotlib.pyplot as plt
+import os
+# For UTF-8 characters in output
+import sys
+sys.stdout.reconfigure(encoding='utf-8')
+# Set random seeds for reproducibility
+seed_value = 42
+random.seed(seed_value)
+np.random.seed(seed_value)
+torch.manual_seed(seed_value)
+if torch.cuda.is_available():
+    torch.cuda.manual_seed_all(seed_value)
+# Parameters
+MODEL_ID = 'google/gemma-3-1b-pt'
+BATCH_SIZE = 8
+EPOCHS = 10
+LR = 5e-5
+# Load data - service-specific
+print("Loading training data from service_train_dataset.csv...")
+train_df = pd.read_csv('datasets/gemini/service_train_dataset.csv')
+print("Loading test data from test_service_dataset.csv...")
+test_df = pd.read_csv('datasets/test_service_dataset.csv')
+# Define label columns (Service sub-aspects)
+label_cols = [
+    'Handling_SER',
+    'Responsiveness_SER',
+    'Trustworthiness_SER',
+    'General_SER'
+]
+# Prepare training data with 80/20 train/validation split
+train_X_full = train_df['Review'].astype(str).tolist()
+train_Y_full = train_df[label_cols].values.astype(np.float32)
+train_X, val_X, train_Y, val_Y = train_test_split(
+    train_X_full, train_Y_full,
+    test_size=0.2,
+    random_state=42
+)
+# Prepare test data
+test_X = test_df['Review'].astype(str).tolist()
+test_Y = test_df[label_cols].values.astype(np.float32)
+print(f"\nDataset sizes:")
+print(f"Training samples: {len(train_X)}")
+print(f"Validation samples: {len(val_X)}")
+print(f"Test samples: {len(test_X)}")
+print(f"Number of labels: {len(label_cols)}")
+# Compute class weights for imbalanced dataset
+def compute_class_weights(labels, label_names):
+    """
+    Compute class weights for multi-label classification
+    using the inverse of class frequency.
+    Args:
+        labels: numpy array of shape (n_samples, n_labels)
+        label_names: list of label column names
+    Returns:
+        pos_weight: torch tensor of positive class weights
+    """
+    n_samples = labels.shape[0]
+    n_labels = labels.shape[1]
+    pos_weights = []
+    print("\n" + "="*60)
+    print("CLASS IMBALANCE ANALYSIS")
+    print("="*60)
+    for i, label_name in enumerate(label_names):
+        pos_count = np.sum(labels[:, i] == 1)
+        neg_count = np.sum(labels[:, i] == 0)
+        # Calculate positive class weight (ratio of negative to positive)
+        if pos_count > 0:
+            raw_ratio = neg_count / pos_count
+            # Apply square root dampening to avoid extreme weights
+            pos_weight = np.sqrt(raw_ratio)
+        else:
+            pos_weight = 1.0
+        pos_weights.append(pos_weight)
+        print(f"\n{label_name}:")
+        print(f"  Positive samples: {pos_count} ({pos_count/n_samples*100:.2f}%)")
+        print(f"  Negative samples: {neg_count} ({neg_count/n_samples*100:.2f}%)")
+        print(f"  Raw imbalance ratio (neg/pos): {neg_count/pos_count if pos_count > 0 else 1.0:.4f}")
+        print(f"  Dampened weight (sqrt of ratio): {pos_weight:.4f}")
+    print("="*60 + "\n")
+    return torch.FloatTensor(pos_weights)
+def find_optimal_thresholds(model, dataloader, label_cols, device):
+    """
+    Find optimal decision threshold for each class independently
+    by maximizing F1-score on the validation set.
+    Args:
+        model: trained model
+        dataloader: validation data loader
+        label_cols: list of label column names
+        device: torch device
+    Returns:
+        optimal_thresholds: numpy array of optimal thresholds for each class
+    """
+    from sklearn.metrics import f1_score
+    print("\n" + "="*60)
+    print("OPTIMIZING DECISION THRESHOLDS")
+    print("="*60)
+    # Collect all predictions and labels
+    model.eval()
+    all_probs = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            all_probs.append(probs)
+            all_labels.append(labels.cpu().numpy())
+    all_probs = np.vstack(all_probs)
+    all_labels = np.vstack(all_labels)
+    # Find optimal threshold for each class
+    optimal_thresholds = []
+    threshold_range = np.arange(0.1, 0.91, 0.05)  # 0.1 to 0.9 in steps of 0.05
+    for i, label_name in enumerate(label_cols):
+        best_threshold = 0.5
+        best_f1 = 0.0
+        for threshold in threshold_range:
+            preds = (all_probs[:, i] > threshold).astype(int)
+            f1 = f1_score(all_labels[:, i], preds, zero_division=0)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_threshold = threshold
+        optimal_thresholds.append(best_threshold)
+        print(f"\n{label_name}:")
+        print(f"  Optimal threshold: {best_threshold:.2f}")
+        print(f"  Best F1-score: {best_f1:.4f}")
+        print(f"  (Default 0.5 threshold F1: {f1_score(all_labels[:, i], (all_probs[:, i] > 0.5).astype(int), zero_division=0):.4f})")
+    print("="*60 + "\n")
+    return np.array(optimal_thresholds)
+def predict_with_thresholds(model, dataloader, thresholds, device):
+    """
+    Make predictions using custom thresholds for each class.
+    Args:
+        model: trained model
+        dataloader: data loader
+        thresholds: numpy array of thresholds for each class
+        device: torch device
+    Returns:
+        predictions: numpy array of predictions
+        labels: numpy array of true labels
+    """
+    model.eval()
+    all_preds = []
+    all_labels = []
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in dataloader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            logits = model(input_ids, attention_mask)
+            probs = torch.sigmoid(logits).cpu().numpy()
+            # Apply custom thresholds for each class
+            preds = np.zeros_like(probs, dtype=int)
+            for i in range(len(thresholds)):
+                preds[:, i] = (probs[:, i] > thresholds[i]).astype(int)
+            all_preds.append(preds)
+            all_labels.append(labels.cpu().numpy())
+    return np.vstack(all_preds), np.vstack(all_labels)
+# Dataset class
+class ReviewDataset(Dataset):
+    def __init__(self, texts, labels):
+        self.texts = texts
+        self.labels = labels
+    def __len__(self):
+        return len(self.texts)
+    def __getitem__(self, idx):
+        encoding = tokenizer(
+            self.texts[idx],
+            padding='max_length',
+            truncation=True,
+            max_length=256,
+            return_tensors='pt'
+        )
+        input_ids = encoding['input_ids'].squeeze()
+        attention_mask = encoding['attention_mask'].squeeze()
+        label = torch.FloatTensor(self.labels[idx])
+        return input_ids, attention_mask, label
+# Initialize tokenizer
+print("\nInitializing tokenizer...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=True)
+# Create datasets
+train_dataset = ReviewDataset(train_X, train_Y)
+val_dataset = ReviewDataset(val_X, val_Y)
+test_dataset = ReviewDataset(test_X, test_Y)
+# Create data loaders
+train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
+val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
+test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)
+# Compute class weights based on training data
+print("Computing class weights for imbalanced dataset...")
+pos_weights = compute_class_weights(train_Y, label_cols)
+# Initialize model with LoRA
+print("Initializing model with LoRA...")
+backbone = GemmaModel.from_pretrained(MODEL_ID, token=True, dtype=torch.bfloat16)
+lora_config = LoraConfig(
+    task_type=TaskType.FEATURE_EXTRACTION,
+    r=8,
+    lora_alpha=16,
+    lora_dropout=0.05,
+    target_modules=["q_proj", "v_proj"]
+)
+backbone = get_peft_model(backbone, lora_config)
+# Classifier model
+class GemmaClassifier(nn.Module):
+    def __init__(self, backbone, num_labels):
+        super().__init__()
+        self.backbone = backbone
+        self.pooler = nn.AdaptiveAvgPool1d(1)
+        self.classifier = nn.Linear(backbone.config.hidden_size, num_labels)
+    def forward(self, input_ids, attention_mask):
+        output = self.backbone(input_ids=input_ids, attention_mask=attention_mask)
+        hidden = output.last_hidden_state
+        pooled = self.pooler(hidden.permute(0, 2, 1)).squeeze(-1)
+        logits = self.classifier(pooled.float())
+        return logits
+# Initialize model, optimizer, and loss function
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"Using device: {device}")
+model = GemmaClassifier(backbone, len(label_cols)).to(device)
+optimizer = torch.optim.AdamW(model.parameters(), lr=LR)
+# Use computed pos_weight to handle class imbalance
+criterion = nn.BCEWithLogitsLoss(pos_weight=pos_weights.to(device))
+print(f"\nInitialized BCEWithLogitsLoss with pos_weight: {pos_weights.cpu().numpy()}")
+# Initialize loss tracking
+train_losses = []
+val_losses = []
+train_batch_losses = []  # Per-batch training losses
+val_batch_losses = []    # Per-batch validation losses
+# Early stopping variables
+best_val_loss = float('inf')
+best_epoch = 0
+best_model_state = None
+patience = 5  # Number of epochs to wait for improvement
+patience_counter = 0
+# Training loop
+print("\n" + "="*60)
+print("TRAINING")
+print("="*60)
+for epoch in range(EPOCHS):
+    model.train()
+    total_loss = 0
+    batch_count = 0
+    for input_ids, attention_mask, labels in train_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        labels = labels.to(device)
+        optimizer.zero_grad()
+        logits = model(input_ids, attention_mask)
+        loss = criterion(logits, labels)
+        loss.backward()
+        optimizer.step()
+        total_loss += loss.item()
+        batch_count += 1
+        train_batch_losses.append(loss.item())  # Store per-batch loss
+        # Print progress every 100 batches
+        if batch_count % 100 == 0:
+            print(f"  Epoch {epoch+1} | Batch {batch_count}/{len(train_loader)} | Current Loss: {loss.item():.4f}")
+    avg_train_loss = total_loss / len(train_loader)
+    train_losses.append(avg_train_loss)
+    print(f"\nEpoch {epoch+1}/{EPOCHS} completed")
+    print(f"Average Training Loss: {avg_train_loss:.4f}")
+    # Validation on validation set
+    model.eval()
+    val_loss = 0
+    with torch.no_grad():
+        for input_ids, attention_mask, labels in val_loader:
+            input_ids = input_ids.to(device)
+            attention_mask = attention_mask.to(device)
+            labels = labels.to(device)
+            logits = model(input_ids, attention_mask)
+            loss = criterion(logits, labels)
+            val_loss += loss.item()
+            val_batch_losses.append(loss.item())  # Store per-batch validation loss
+    avg_val_loss = val_loss / len(val_loader)
+    val_losses.append(avg_val_loss)
+    print(f"Validation Loss: {avg_val_loss:.4f}")
+    # Early stopping check
+    if avg_val_loss < best_val_loss:
+        best_val_loss = avg_val_loss
+        best_epoch = epoch + 1
+        best_model_state = model.state_dict().copy()
+        patience_counter = 0
+        print(f"✓ New best validation loss: {best_val_loss:.4f} (Epoch {best_epoch})")
+    else:
+        patience_counter += 1
+        print(f"  No improvement for {patience_counter} epoch(s)")
+        if patience_counter >= patience:
+            print(f"\nEarly stopping triggered! Best validation loss: {best_val_loss:.4f} at epoch {best_epoch}")
+            break
+    print("-" * 60)
+# Load best model state
+if best_model_state is not None:
+    print(f"\nLoading best model from epoch {best_epoch} with validation loss: {best_val_loss:.4f}")
+    model.load_state_dict(best_model_state)
+else:
+    print("\nNo best model found, using final model state")
+# Optimize decision thresholds using validation set
+print("Finding optimal decision thresholds for each class...")
+optimal_thresholds = find_optimal_thresholds(model, val_loader, label_cols, device)
+print(f"Optimal thresholds: {optimal_thresholds}")
+# SAVE MODEL AFTER TRAINING
+SAVE_DIR = r"C:\temp\new_models"  # make sure this folder exists
+os.makedirs(SAVE_DIR, exist_ok=True)
+SAVE_PATH = os.path.join(SAVE_DIR, "gemma_service_specific.pt")
+torch.save(model.to('cpu').state_dict(), SAVE_PATH)
+model.to(device)  # Move model back to device after saving
+print(f"\nModel saved to: {SAVE_PATH}")
+# Plot training and validation loss
+print("\n" + "="*60)
+print("PLOTTING TRAINING CURVES")
+print("="*60)
+plt.figure(figsize=(10, 6))
+epochs_range = range(1, EPOCHS + 1)
+plt.plot(epochs_range, train_losses, 'b-o', label='Training Loss', linewidth=2, markersize=8)
+plt.plot(epochs_range, val_losses, 'r-s', label='Validation Loss', linewidth=2, markersize=8)
+plt.xlabel('Epoch', fontsize=12)
+plt.ylabel('Loss', fontsize=12)
+plt.title('Training and Validation Loss Over Epochs', fontsize=14, fontweight='bold')
+plt.legend(fontsize=10)
+plt.grid(True, alpha=0.3)
+plt.tight_layout()
+# Save the plot
+plot_path = 'training_loss_plot_service.png'
+plt.savefig(plot_path, dpi=300, bbox_inches='tight')
+print(f"Training loss plot saved to: {plot_path}")
+# Display loss values
+print("\nLoss values per epoch:")
+print("-" * 40)
+for i, (train_loss, val_loss) in enumerate(zip(train_losses, val_losses), 1):
+    print(f"Epoch {i}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}")
+print("-" * 40)
+# Plot detailed per-batch loss curves
+print("\nGenerating detailed per-batch loss plot...")
+# Create figure with two subplots
+fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 10))
+# Calculate moving average for smoothing (window size = 50 batches)
+def moving_average(data, window_size):
+    if len(data) < window_size:
+        window_size = max(1, len(data) // 2)
+    cumsum = np.cumsum(np.insert(data, 0, 0))
+    return (cumsum[window_size:] - cumsum[:-window_size]) / window_size
+train_ma = moving_average(train_batch_losses, 50)
+val_ma = moving_average(val_batch_losses, 50)
+# Subplot 1: Training loss per batch
+ax1.plot(train_batch_losses, alpha=0.3, color='lightblue', linewidth=0.5, label='Raw Training Loss')
+ax1.plot(range(len(train_ma)), train_ma, color='blue', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax1.set_xlabel('Training Batch', fontsize=11)
+ax1.set_ylabel('Loss', fontsize=11)
+ax1.set_title('Training Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax1.legend(fontsize=9)
+ax1.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+batches_per_epoch = len(train_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax1.axvline(x=epoch_idx * batches_per_epoch, color='red', linestyle='--', linewidth=1, alpha=0.5)
+# Subplot 2: Validation loss per batch
+ax2.plot(val_batch_losses, alpha=0.3, color='lightcoral', linewidth=0.5, label='Raw Validation Loss')
+ax2.plot(range(len(val_ma)), val_ma, color='red', linewidth=2, label='Smoothed (Moving Avg, window=50)')
+ax2.set_xlabel('Validation Batch', fontsize=11)
+ax2.set_ylabel('Loss', fontsize=11)
+ax2.set_title('Validation Loss per Batch (Detailed View)', fontsize=13, fontweight='bold')
+ax2.legend(fontsize=9)
+ax2.grid(True, alpha=0.3)
+# Add vertical lines for epoch boundaries
+val_batches_per_epoch = len(val_loader)
+for epoch_idx in range(1, EPOCHS):
+    ax2.axvline(x=epoch_idx * val_batches_per_epoch, color='blue', linestyle='--', linewidth=1, alpha=0.5)
+plt.tight_layout()
+# Save the detailed plot
+detailed_plot_path = 'training_loss_per_batch_detailed_service.png'
+plt.savefig(detailed_plot_path, dpi=300, bbox_inches='tight')
+print(f"Detailed per-batch loss plot saved to: {detailed_plot_path}")
+# Print batch loss statistics
+print("\nBatch Loss Statistics:")
+print("-" * 60)
+print(f"Training batches: {len(train_batch_losses)}")
+print(f"  Min loss: {min(train_batch_losses):.4f}")
+print(f"  Max loss: {max(train_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(train_batch_losses):.4f}")
+print(f"  Std dev: {np.std(train_batch_losses):.4f}")
+print(f"\nValidation batches: {len(val_batch_losses)}")
+print(f"  Min loss: {min(val_batch_losses):.4f}")
+print(f"  Max loss: {max(val_batch_losses):.4f}")
+print(f"  Mean loss: {np.mean(val_batch_losses):.4f}")
+print(f"  Std dev: {np.std(val_batch_losses):.4f}")
+print("-" * 60)
+# VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("VALIDATION SET EVALUATION (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+val_preds, val_labels_eval = predict_with_thresholds(model, val_loader, optimal_thresholds, device)
+# Also get predictions with default threshold for comparison
+model.eval()
+val_preds_default = []
+with torch.no_grad():
+    for input_ids, attention_mask, labels in val_loader:
+        input_ids = input_ids.to(device)
+        attention_mask = attention_mask.to(device)
+        logits = model(input_ids, attention_mask)
+        probs = torch.sigmoid(logits).cpu().numpy()
+        preds = (probs > 0.5).astype(int)
+        val_preds_default.append(preds)
+val_preds_default = np.vstack(val_preds_default)
+print(f"\nPredicted data shape: {val_preds.shape}")
+print(f"Ground truth data shape: {val_labels_eval.shape}")
+# Comparison: Default vs Optimized Thresholds
+print("\n" + "="*60)
+print("COMPARISON: Default vs Optimized Thresholds")
+print("="*60)
+print("\nDefault Threshold (0.5):")
+for i, label in enumerate(label_cols):
+    f1_default = f1_score(val_labels_eval[:, i], val_preds_default[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_default:.4f}")
+print("\nOptimized Thresholds:")
+for i, label in enumerate(label_cols):
+    f1_optimized = f1_score(val_labels_eval[:, i], val_preds[:, i], zero_division=0)
+    print(f"  {label}: F1 = {f1_optimized:.4f} (threshold = {optimal_thresholds[i]:.2f})")
+print("="*60 + "\n")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT (VALIDATION)')
+print('='*60)
+print(classification_report(val_labels_eval, val_preds, target_names=label_cols))
+# Hamming Loss
+val_hamming_loss = hamming_loss(val_labels_eval, val_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {val_hamming_loss:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {val_hamming_loss:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS (VALIDATION)")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = val_labels_eval[:, i]
+    y_pred = val_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+val_exact_matches = np.all(val_preds == val_labels_eval, axis=1)
+val_exact_match_acc = np.mean(val_exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(val_exact_matches)}/{len(val_exact_matches)}")
+print(f"Exact Match Accuracy: {val_exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+partial_match_scores = []
+for i in range(len(val_labels_eval)):
+    correct_labels = np.sum(val_preds[i] == val_labels_eval[i])
+    partial_match_scores.append(correct_labels / len(label_cols))
+partial_match_scores = np.array(partial_match_scores)
+avg_partial_match = np.mean(partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_partial_match:.4f} ({avg_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions with match/mismatch
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH (VALIDATION)")
+print("="*60)
+num_samples = min(10, len(val_X))
+print(f"\nShowing {num_samples} validation samples:\n")
+for idx in range(num_samples):
+    review = val_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(val_labels_eval[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(val_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Final Evaluation on Test Set (WITH OPTIMIZED THRESHOLDS)
+print("\n" + "="*60)
+print("FINAL EVALUATION ON TEST SET (WITH OPTIMIZED THRESHOLDS)")
+print("="*60)
+all_preds, all_labels = predict_with_thresholds(model, test_loader, optimal_thresholds, device)
+print(f"\nPredicted data shape: {all_preds.shape}")
+print(f"Ground truth data shape: {all_labels.shape}")
+# Classification Report
+print('\n' + '='*60)
+print('CLASSIFICATION REPORT')
+print('='*60)
+print(classification_report(all_labels, all_preds, target_names=label_cols))
+# Hamming Loss
+hamming_loss_value = hamming_loss(all_labels, all_preds)
+print("="*60)
+print("HAMMING LOSS (Multi-label Error Rate)")
+print("="*60)
+print(f"Hamming Loss: {hamming_loss_value:.4f}")
+print(f"(Fraction of incorrectly predicted labels: {hamming_loss_value:.2%})")
+# Per-aspect metrics
+print("\n" + "="*60)
+print("PER-ASPECT METRICS")
+print("="*60)
+for i, aspect in enumerate(label_cols):
+    y_true = all_labels[:, i]
+    y_pred = all_preds[:, i]
+    acc = accuracy_score(y_true, y_pred)
+    prec = precision_score(y_true, y_pred, zero_division=0)
+    rec = recall_score(y_true, y_pred, zero_division=0)
+    f1 = f1_score(y_true, y_pred, zero_division=0)
+    print(f"\n=== {aspect.upper()} ===")
+    print(f"Accuracy:  {acc:.4f}")
+    print(f"Precision: {prec:.4f}")
+    print(f"Recall:    {rec:.4f}")
+    print(f"F1 Score:  {f1:.4f}")
+    tp = np.sum((y_true == 1) & (y_pred == 1))
+    tn = np.sum((y_true == 0) & (y_pred == 0))
+    fp = np.sum((y_true == 0) & (y_pred == 1))
+    fn = np.sum((y_true == 1) & (y_pred == 0))
+    print(f"  TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}")
+# Exact match accuracy
+exact_matches = np.all(all_preds == all_labels, axis=1)
+exact_match_acc = np.mean(exact_matches)
+print("\n" + "="*60)
+print("EXACT MATCH (ALL ASPECTS)")
+print("="*60)
+print(f"Samples with ALL aspects correct: {np.sum(exact_matches)}/{len(exact_matches)}")
+print(f"Exact Match Accuracy: {exact_match_acc:.4f}")
+# Partial match accuracy (per sample)
+test_partial_match_scores = []
+for i in range(len(all_labels)):
+    correct_labels = np.sum(all_preds[i] == all_labels[i])
+    test_partial_match_scores.append(correct_labels / len(label_cols))
+test_partial_match_scores = np.array(test_partial_match_scores)
+avg_test_partial_match = np.mean(test_partial_match_scores)
+print("\n" + "="*60)
+print("PARTIAL MATCH (PER-SAMPLE LABEL ACCURACY)")
+print("="*60)
+print(f"Average Partial Match: {avg_test_partial_match:.4f} ({avg_test_partial_match:.2%})")
+print(f"(Average fraction of labels correctly predicted per sample)")
+# Sample predictions
+print("\n" + "="*60)
+print("SAMPLE PREDICTIONS VS GROUND TRUTH")
+print("="*60)
+num_samples = min(10, len(test_X))
+print(f"\nShowing {num_samples} test samples:\n")
+for idx in range(num_samples):
+    review = test_X[idx]
+    true_labels = [label_cols[i] for i, v in enumerate(all_labels[idx]) if v == 1]
+    pred_labels = [label_cols[i] for i, v in enumerate(all_preds[idx]) if v == 1]
+    # Calculate partial match for this sample
+    # Count how many true labels were correctly predicted
+    matching_labels = len(set(true_labels) & set(pred_labels))
+    total_true_labels = len(true_labels) if len(true_labels) > 0 else 1
+    partial_match = matching_labels / total_true_labels
+    review_display = review[:150] + "..." if len(review) > 150 else review
+    print(f"Sample {idx + 1}:")
+    print(f"Review: {review_display}")
+    print(f"✓ True Labels:      {true_labels if true_labels else ['None']}")
+    print(f"→ Predicted Labels: {pred_labels if pred_labels else ['None']}")
+    print(f"Match: {'✓ Exact' if set(true_labels) == set(pred_labels) else '✗ Mismatch'}")
+    print(f"Partial Match: {matching_labels}/{total_true_labels} labels correct ({partial_match:.2%})")
+    print("-" * 40)
+# Save model interactively (optional)
+# model_save_path = 'gemma_service_classifier.pth'
+# torch.save({
+#     'epoch': EPOCHS,
+#     'model_state_dict': model.state_dict(),
+#     'optimizer_state_dict': optimizer.state_dict(),
+#     'train_loss': avg_train_loss,
+#     'test_loss': avg_test_loss,
+# }, model_save_path)
+# print(f"Model saved to {model_save_path}")
+model_save_path = os.path.join(SAVE_DIR, 'gemma_service_classifier.pth')
+torch.save({
+    'epoch': best_epoch if best_model_state is not None else EPOCHS,
+    'model_state_dict': model.state_dict(),
+    'optimizer_state_dict': optimizer.state_dict(),
+    'train_loss': train_losses[best_epoch - 1] if best_model_state is not None else train_losses[-1] if train_losses else 0,
+    'val_loss': best_val_loss if best_model_state is not None else (val_losses[-1] if val_losses else 0),
+    'best_epoch': best_epoch,
+    'best_val_loss': best_val_loss,
+    'optimal_thresholds': optimal_thresholds,
+}, model_save_path)
+print(f"Model saved to {model_save_path}")
+print("\n" + "="*60)
+print("TRAINING COMPLETE")
+print("="*60)