crossroderick
/

aramt5

@@ -125,9 +125,9 @@ The model training process follows a curriculum learning format and is comprised
 | 1     | 20000  | 15            | No                 | Expose the base T5 model to Syriac morphology
 | 2     | 40000  | 30            | Yes                | Introduce short sentences to AramT5
 | 3     | 60000  | 50            | Yes                | Introduce medium sentences to AramT5
-| 4     | 80000  | 70            | Yes                | Introduce longer sentences to AramT5
-| 5     | 100000 | 100           | Yes                | Reinforce longer sentences to AramT5
-| 6     | 120000 | 150           | Yes                | Introduce the full practical corpus to AramT5
 To do a stage 1-based training run, just run the script directly from your IDE or use the following command:

 | 1     | 20000  | 15            | No                 | Expose the base T5 model to Syriac morphology
 | 2     | 40000  | 30            | Yes                | Introduce short sentences to AramT5
 | 3     | 60000  | 50            | Yes                | Introduce medium sentences to AramT5
+| 4     | 120000 | 70            | Yes                | Introduce longer sentences to AramT5
+| 5     | 150000 | 100           | Yes                | Reinforce longer sentences to AramT5
+| 6     | 180000 | 150           | Yes                | Introduce the full practical corpus to AramT5
 To do a stage 1-based training run, just run the script directly from your IDE or use the following command:

src/train_t5.py CHANGED Viewed

@@ -190,37 +190,37 @@ STAGE_CONFIGS = {
     },
     4: {
         "description": "Extension: longer phrases",
-        "num_samples": 80_000,
         "max_src_length": 70,
         "short_mix_ratio": 0.18,  # 18% short examples from previous stages (boosted for retention)
         "short_threshold": 50,  # ≤50 chars (Stage 1+2+3)
         "new_range_ratio": 0.45,  # 45% from new range (51-70 chars)
         "new_range_min": 51,
         "num_epochs": 20,
-        "learning_rate": 6e-5,  # Lower LR to prevent forgetting
     },
     5: {
         "description": "Extension: longer sentences",
-        "num_samples": 100_000,
         "max_src_length": 100,
         "short_mix_ratio": 0.18,  # 18% short examples from previous stages (boosted for retention)
         "short_threshold": 70,  # ≤70 chars (Stage 1+2+3+4)
         "new_range_ratio": 0.45,  # 45% from new range (71-100 chars)
         "new_range_min": 71,
         "num_epochs": 20,
-        "learning_rate": 4e-5,  # Lower LR to prevent forgetting
         "repetition_penalty": 1.2,
     },
     6: {
         "description": "Full practical corpus: sentences and short paragraphs",
-        "num_samples": 120_000,
         "max_src_length": 150,
         "short_mix_ratio": 0.20,  # 20% short examples from previous stages (highest retention)
         "short_threshold": 100,  # ≤100 chars (Stage 1+2+3+4+5)
         "new_range_ratio": 0.40,  # 40% from new range (101-150 chars)
         "new_range_min": 101,
         "num_epochs": 15,
-        "learning_rate": 3e-5,  # Lower LR to prevent forgetting
         "repetition_penalty": 1.2,
     },
 }

     },
     4: {
         "description": "Extension: longer phrases",
+        "num_samples": 120_000,  # Increased to better learn multi-word patterns
         "max_src_length": 70,
         "short_mix_ratio": 0.18,  # 18% short examples from previous stages (boosted for retention)
         "short_threshold": 50,  # ≤50 chars (Stage 1+2+3)
         "new_range_ratio": 0.45,  # 45% from new range (51-70 chars)
         "new_range_min": 51,
         "num_epochs": 20,
+        "learning_rate": 8e-5,  # Higher LR to unlearn early-stopping bias from imbalanced data
     },
     5: {
         "description": "Extension: longer sentences",
+        "num_samples": 150_000,  # Increased to better learn multi-word patterns
         "max_src_length": 100,
         "short_mix_ratio": 0.18,  # 18% short examples from previous stages (boosted for retention)
         "short_threshold": 70,  # ≤70 chars (Stage 1+2+3+4)
         "new_range_ratio": 0.45,  # 45% from new range (71-100 chars)
         "new_range_min": 71,
         "num_epochs": 20,
+        "learning_rate": 5e-5,  # Slightly higher to reinforce multi-word patterns
         "repetition_penalty": 1.2,
     },
     6: {
         "description": "Full practical corpus: sentences and short paragraphs",
+        "num_samples": 180_000,  # Increased to better learn multi-word patterns
         "max_src_length": 150,
         "short_mix_ratio": 0.20,  # 20% short examples from previous stages (highest retention)
         "short_threshold": 100,  # ≤100 chars (Stage 1+2+3+4+5)
         "new_range_ratio": 0.40,  # 40% from new range (101-150 chars)
         "new_range_min": 101,
         "num_epochs": 15,
+        "learning_rate": 4e-5,  # Fine-tuning polish
         "repetition_penalty": 1.2,
     },
 }