boltuix
/

bert-lite

@@ -312,78 +312,82 @@ To adapt BERT-Lite for custom IoT tasks (e.g., specific smart home commands):
 1. **Prepare Dataset**: Collect labeled data (e.g., commands with intents or masked sentences).
 2. **Fine-Tune with Hugging Face**:
    ```python
-   #!pip uninstall -y transformers torch datasets
-   #!pip install transformers==4.44.2 torch==2.4.1 datasets==3.0.1
-   import torch
-   from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
-   from datasets import Dataset
-   import pandas as pd
-   # 1. Prepare the sample IoT dataset
-   data = {
-       "text": [
-           "Turn on the fan",
-           "Switch off the light",
-           "Invalid command",
-           "Activate the air conditioner",
-           "Turn off the heater",
-           "Gibberish input"
-       ],
-       "label": [1, 1, 0, 1, 1, 0]  # 1 for valid IoT commands, 0 for invalid
-   }
-   df = pd.DataFrame(data)
-   dataset = Dataset.from_pandas(df)
-   # 2. Load tokenizer and model
-   model_name = "boltuix/bert-lite"
-   tokenizer = BertTokenizer.from_pretrained(model_name)
-   model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
-   # 3. Tokenize the dataset
-   def tokenize_function(examples):
-       return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)  # Short max_length for IoT commands
-   tokenized_dataset = dataset.map(tokenize_function, batched=True)
-   # 4. Set format for PyTorch
-   tokenized_dataset.set_format("torch", columns=["input_ids", "attention_mask", "label"])
-   # 5. Define training arguments
-   training_args = TrainingArguments(
-       output_dir="./bert_lite_results",
-       num_train_epochs=5,  # Increased epochs for small dataset
-       per_device_train_batch_size=2,
-       logging_dir="./bert_lite_logs",
-       logging_steps=10,
-       save_steps=100,
-       evaluation_strategy="no",
-       learning_rate=5e-5,  # Adjusted for BERT-Lite
-   )
-   # 6. Initialize Trainer
-   trainer = Trainer(
-       model=model,
-       args=training_args,
-       train_dataset=tokenized_dataset,
-   )
-   # 7. Fine-tune the model
-   trainer.train()
-   # 8. Save the fine-tuned model
-   model.save_pretrained("./fine_tuned_bert_lite")
-   tokenizer.save_pretrained("./fine_tuned_bert_lite")
-   # 9. Example inference
-   text = "Turn on the light"
-   inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
-   model.eval()
-   with torch.no_grad():
-       outputs = model(**inputs)
-       logits = outputs.logits
-       predicted_class = torch.argmax(logits, dim=1).item()
-   print(f"Predicted class for '{text}': {'Valid IoT Command' if predicted_class == 1 else 'Invalid Command'}")
    ```
 3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.

 1. **Prepare Dataset**: Collect labeled data (e.g., commands with intents or masked sentences).
 2. **Fine-Tune with Hugging Face**:
    ```python
+     # !pip install transformers datasets torch --upgrade
+        import torch
+        from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
+        from datasets import Dataset
+        import pandas as pd
+        # 1. Prepare the sample IoT dataset
+        data = {
+            "text": [
+                "Turn on the fan",
+                "Switch off the light",
+                "Invalid command",
+                "Activate the air conditioner",
+                "Turn off the heater",
+                "Gibberish input"
+            ],
+            "label": [1, 1, 0, 1, 1, 0]  # 1 = Valid command, 0 = Invalid
+        }
+        df = pd.DataFrame(data)
+        dataset = Dataset.from_pandas(df)
+        # 2. Load tokenizer and model
+        model_name = "boltuix/bert-lite"  # Replace with any small/quantized BERT
+        tokenizer = BertTokenizer.from_pretrained(model_name)
+        model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)
+        # 3. Tokenize the dataset
+        def tokenize_function(examples):
+            return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=64)
+        tokenized_dataset = dataset.map(tokenize_function, batched=True)
+        # 4. Manually convert columns to tensors (NumPy 2.0 safe)
+        tokenized_dataset = tokenized_dataset.map(lambda x: {
+            "input_ids": torch.tensor(x["input_ids"]),
+            "attention_mask": torch.tensor(x["attention_mask"]),
+            "label": torch.tensor(x["label"])
+        })
+        # 5. Define training arguments
+        training_args = TrainingArguments(
+            output_dir="./bert_lite_results",
+            num_train_epochs=5,
+            per_device_train_batch_size=2,
+            logging_dir="./bert_lite_logs",
+            logging_steps=10,
+            save_steps=100,
+            eval_strategy="no",
+            learning_rate=5e-5,
+        )
+        # 6. Initialize Trainer
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=tokenized_dataset,
+        )
+        # 7. Fine-tune the model
+        trainer.train()
+        # 8. Save the fine-tuned model
+        model.save_pretrained("./fine_tuned_bert_lite")
+        tokenizer.save_pretrained("./fine_tuned_bert_lite")
+        # 9. Inference example
+        text = "Turn on the light"
+        inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=64)
+        model.eval()
+        with torch.no_grad():
+            outputs = model(**inputs)
+            logits = outputs.logits
+            predicted_class = torch.argmax(logits, dim=1).item()
+        print(f"Predicted class for '{text}': {'✅ Valid IoT Command' if predicted_class == 1 else '❌ Invalid Command'}")
    ```
 3. **Deploy**: Export the fine-tuned model to ONNX or TensorFlow Lite for edge devices.