Add Google Colab compatibility and developer documentation

- Fixed argparse compatibility for Google Colab/Jupyter environments
- Added train_notebook() function for easy notebook usage
- Created comprehensive developer documentation (DEVELOPERS.md)
- Includes setup instructions, API reference, and usage examples
- Documents all 14 UTS2017_Bank dataset categories
- Provides troubleshooting guide for common issues

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (2) hide show

DEVELOPERS.md +0 -0
train.py +35 -1

DEVELOPERS.md ADDED Viewed

Binary file (6.35 kB). View file

train.py CHANGED Viewed

@@ -354,8 +354,37 @@ def train_all_configurations():
     return results
 def main():
     """Main function with argument parsing"""
     parser = argparse.ArgumentParser(
         description="Train Vietnamese text classification model on UTS2017_Bank dataset"
     )
@@ -393,7 +422,12 @@ def main():
         help="Train and compare multiple configurations",
     )
-    args = parser.parse_args()
     if args.compare:
         print("Training and comparing multiple configurations...")

     return results
+def train_notebook(model_name="logistic", max_features=20000, ngram_min=1, ngram_max=2,
+                   split_ratio=0.2, n_samples=None, compare=False):
+    """
+    Convenience function for training in Jupyter/Colab notebooks without argparse.
+    Example usage:
+        from train import train_notebook
+        train_notebook(model_name="logistic", max_features=20000)
+    """
+    if compare:
+        print("Training and comparing multiple configurations...")
+        return train_all_configurations()
+    else:
+        print(f"Training {model_name} model on UTS2017_Bank dataset...")
+        print(f"Configuration: max_features={max_features}, ngram=({ngram_min}, {ngram_max})")
+        return train_model(
+            model_name=model_name,
+            max_features=max_features,
+            ngram_range=(ngram_min, ngram_max),
+            split_ratio=split_ratio,
+            n_samples=n_samples,
+        )
 def main():
     """Main function with argument parsing"""
+    # Detect if running in Jupyter/Colab
+    import sys
+    in_notebook = hasattr(sys, 'ps1') or 'ipykernel' in sys.modules or 'google.colab' in sys.modules
     parser = argparse.ArgumentParser(
         description="Train Vietnamese text classification model on UTS2017_Bank dataset"
     )
         help="Train and compare multiple configurations",
     )
+    # Use parse_known_args to ignore Jupyter/Colab kernel arguments
+    args, unknown = parser.parse_known_args()
+    # If running in notebook and there are unknown args, inform user
+    if in_notebook and unknown:
+        print(f"Note: Running in Jupyter/Colab environment. Ignoring kernel arguments: {unknown}")
     if args.compare:
         print("Training and comparing multiple configurations...")