Upload Indian Address NER model (checkpoint-20793)

Browse files

Files changed (15) hide show

README.md +405 -0
config.json +77 -0
entity_mappings.json +52 -0
model.safetensors +3 -0
model_card_metadata.json +42 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scaler.pt +3 -0
scheduler.pt +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
trainer_state.json +163 -0
training_args.bin +3 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,405 @@

+# 🏠 TinyBERT Indian Address NER Model
+This model is a fine-tuned **TinyBERT** for **Named Entity Recognition (NER)** on Indian addresses. It can extract and classify various address components from Indian address text with high accuracy, leveraging TinyBERT's efficient and lightweight architecture.
+## 🎯 Model Description
+TinyBERT fine-tuned for Indian address Named Entity Recognition (NER)
+### Key Capabilities
+- **Address Component Extraction**: Identify and classify various parts of Indian addresses
+- **Multi-format Support**: Handle various Indian address formats and styles
+- **Lightweight Architecture**: Built on TinyBERT's efficient transformer design
+- **High Accuracy**: Fine-tuned on augmented Indian address dataset
+- **Fast Inference**: Optimized TinyBERT for quick entity extraction
+- **Robust Recognition**: Handles partial, incomplete, or informal addresses
+- **Efficient Processing**: TinyBERT's compact design for better performance
+- **Mobile-Friendly**: Smaller model size suitable for edge deployment
+- **Resource Efficient**: Lower memory and computational requirements
+## 📊 Model Architecture
+- **Base Model**: huawei-noah/TinyBERT_General_6L_768D (TinyBERT)
+- **Model Type**: Token Classification (NER)
+- **Vocabulary Size**: 30,522 tokens
+- **Hidden Size**: 768
+- **Number of Layers**: 6
+- **Attention Heads**: 12
+- **Max Sequence Length**: 512 tokens
+- **Number of Labels**: 23
+- **Model Size**: ~761MB
+- **Checkpoint**: 20793
+## 🚀 Usage Examples
+```python
+import torch
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+import warnings
+warnings.filterwarnings("ignore")
+class IndianAddressNER:
+    def __init__(self):
+        model_name = "shiprocket-ai/open-tinybert-indian-address-ner"
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.model = AutoModelForTokenClassification.from_pretrained(model_name)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.model.to(self.device)
+        self.model.eval()
+        # Entity mappings
+        self.id2entity = {
+        "0": "O",
+        "1": "B-building_name",
+        "2": "I-building_name",
+        "3": "B-city",
+        "4": "I-city",
+        "5": "B-country",
+        "6": "I-country",
+        "7": "B-floor",
+        "8": "I-floor",
+        "9": "B-house_details",
+        "10": "I-house_details",
+        "11": "B-locality",
+        "12": "I-locality",
+        "13": "B-pincode",
+        "14": "I-pincode",
+        "15": "B-road",
+        "16": "I-road",
+        "17": "B-state",
+        "18": "I-state",
+        "19": "B-sub_locality",
+        "20": "I-sub_locality",
+        "21": "B-landmarks",
+        "22": "I-landmarks"
+}
+    def predict(self, address):
+        """Extract entities from an Indian address - FIXED VERSION"""
+        if not address.strip():
+            return {}
+        # Tokenize with offset mapping for better text reconstruction
+        inputs = self.tokenizer(
+            address,
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=128,
+            return_offsets_mapping=True
+        )
+        # Extract offset mapping before moving to device
+        offset_mapping = inputs.pop("offset_mapping")[0]
+        inputs = {k: v.to(self.device) for k, v in inputs.items()}
+        # Predict
+        with torch.no_grad():
+            outputs = self.model(**inputs)
+            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
+            predicted_ids = torch.argmax(predictions, dim=-1)
+            confidence_scores = torch.max(predictions, dim=-1)[0]
+        # Extract entities using offset mapping
+        entities = self.extract_entities_with_offsets(
+            address,
+            predicted_ids[0],
+            confidence_scores[0],
+            offset_mapping
+        )
+        return entities
+    def extract_entities_with_offsets(self, original_text, predicted_ids, confidences, offset_mapping):
+        """Extract entities using offset mapping for accurate text reconstruction"""
+        entities = {}
+        current_entity = None
+        for i, (pred_id, conf) in enumerate(zip(predicted_ids, confidences)):
+            if i >= len(offset_mapping):
+                break
+            start, end = offset_mapping[i]
+            # Skip special tokens (they have (0,0) mapping)
+            if start == end == 0:
+                continue
+            label = self.id2entity.get(str(pred_id.item()), "O")
+            if label.startswith("B-"):
+                # Save previous entity
+                if current_entity:
+                    entity_type = current_entity["type"]
+                    if entity_type not in entities:
+                        entities[entity_type] = []
+                    entities[entity_type].append({
+                        "text": current_entity["text"],
+                        "confidence": current_entity["confidence"]
+                    })
+                # Start new entity
+                entity_type = label[2:]  # Remove "B-"
+                current_entity = {
+                    "type": entity_type,
+                    "text": original_text[start:end],
+                    "confidence": conf.item(),
+                    "start": start,
+                    "end": end
+                }
+            elif label.startswith("I-") and current_entity:
+                # Continue current entity
+                entity_type = label[2:]  # Remove "I-"
+                if entity_type == current_entity["type"]:
+                    # Extend the entity to include this token
+                    current_entity["text"] = original_text[current_entity["start"]:end]
+                    current_entity["confidence"] = (current_entity["confidence"] + conf.item()) / 2
+                    current_entity["end"] = end
+            elif label == "O" and current_entity:
+                # End current entity
+                entity_type = current_entity["type"]
+                if entity_type not in entities:
+                    entities[entity_type] = []
+                entities[entity_type].append({
+                    "text": current_entity["text"],
+                    "confidence": current_entity["confidence"]
+                })
+                current_entity = None
+        # Add final entity if exists
+        if current_entity:
+            entity_type = current_entity["type"]
+            if entity_type not in entities:
+                entities[entity_type] = []
+            entities[entity_type].append({
+                "text": current_entity["text"],
+                "confidence": current_entity["confidence"]
+            })
+        return entities
+# Usage example
+ner = IndianAddressNER()
+# Test addresses
+test_addresses = [
+    "Shop No 123, Sunshine Apartments, Andheri West, Mumbai, 400058",
+    "DLF Cyber City, Sector 25, Gurgaon, Haryana",
+    "Flat 201, MG Road, Bangalore, Karnataka, 560001",
+    "Phoenix Mall, Kurla West, Mumbai"
+]
+print("🏠 INDIAN ADDRESS NER EXAMPLES")
+print("=" * 50)
+for address in test_addresses:
+    print(f"\n📍 Address: {address}")
+    entities = ner.predict(address)
+    if entities:
+        for entity_type, entity_list in sorted(entities.items()):
+            print(f"🏷️ {entity_type.replace('_', ' ').title()}:")
+            for entity in entity_list:
+                confidence = entity['confidence']
+                text = entity['text']
+                confidence_icon = "🟢" if confidence > 0.8 else "🟡" if confidence > 0.6 else "🔴"
+                print(f"   {confidence_icon} {text} (confidence: {confidence:.3f})")
+    else:
+        print("❌ No entities found")
+    print("-" * 40)
+```
+## 🏷️ Supported Entity Types
+The model can identify and extract the following address components:
+- **Building Name**: building_name
+- **City**: city
+- **Country**: country
+- **Floor**: floor
+- **House Details**: house_details
+- **Landmarks**: landmarks
+- **Locality**: locality
+- **Pincode**: pincode
+- **Road**: road
+- **State**: state
+- **Sub Locality**: sub_locality
+## 📈 Performance Highlights
+- **Indian Address Optimized**: Specialized for Indian address patterns and formats
+- **TinyBERT Advantage**: Efficient and lightweight transformer architecture
+- **High Precision**: Accurate entity boundary detection
+- **Multi-component Recognition**: Identifies multiple entities in complex addresses
+- **Confidence Scoring**: Provides confidence scores for each extracted entity
+- **Fast Inference**: Optimized for real-time applications
+- **Robust Handling**: Works with partial or informal address formats
+- **Compact Architecture**: TinyBERT's efficient design for deployment
+- **Resource Friendly**: Lower computational requirements
+## 🔧 Training Details
+- **Dataset**: 300% augmented Indian address dataset
+- **Training Strategy**: Fine-tuned from pre-trained TinyBERT
+- **Specialization**: Indian address entity extraction
+- **Context Length**: 128 tokens
+- **Version**: v1.0
+- **Framework**: PyTorch + Transformers
+- **BIO Tagging**: Uses Begin-Inside-Outside tagging scheme
+- **Base Model Advantage**: TinyBERT's efficient architecture and compact size
+## 💡 Use Cases
+### 1. **Address Parsing & Standardization**
+- Parse unstructured address text into components
+- Standardize address formats for databases
+- Extract specific components for validation
+### 2. **Form Auto-completion**
+- Auto-fill address forms by extracting components
+- Validate address field completeness
+- Suggest corrections for incomplete addresses
+### 3. **Data Processing & Migration**
+- Clean legacy address databases
+- Extract structured data from unstructured text
+- Migrate addresses between different systems
+### 4. **Logistics & Delivery**
+- Extract delivery-relevant components
+- Validate address completeness for shipping
+- Improve address accuracy for last-mile delivery
+### 5. **Geocoding Preprocessing**
+- Prepare addresses for geocoding APIs
+- Extract location components for mapping
+- Improve geocoding accuracy with clean components
+### 6. **Mobile & Edge Deployment**
+- Deploy on mobile devices with limited resources
+- Run inference on edge computing devices
+- Integrate into lightweight applications
+## ⚡ Performance Tips
+1. **Input Length**: Keep addresses under 128 tokens for optimal performance
+2. **Batch Processing**: Process multiple addresses in batches for efficiency
+3. **GPU Usage**: Use GPU for faster inference on large datasets
+4. **Confidence Filtering**: Filter results by confidence score for higher precision
+5. **Text Preprocessing**: Clean input text for better recognition
+6. **TinyBERT Advantage**: Model benefits from efficient architecture optimizations
+7. **Edge Deployment**: Suitable for mobile and edge computing scenarios
+## ⚠️ Limitations
+- **Language Support**: Primarily optimized for English Indian addresses
+- **Regional Variations**: May struggle with highly regional or colloquial formats
+- **New Localities**: Performance may vary on very recent developments
+- **Complex Formatting**: May have difficulty with highly unstructured text
+- **Context Dependency**: Works best with clear address context
+## 📋 Entity Mapping
+The model uses BIO (Begin-Inside-Outside) tagging scheme:
+```json
+{
+  "entity2id": {
+    "O": 0,
+    "B-building_name": 1,
+    "I-building_name": 2,
+    "B-city": 3,
+    "I-city": 4,
+    "B-country": 5,
+    "I-country": 6,
+    "B-floor": 7,
+    "I-floor": 8,
+    "B-house_details": 9,
+    "I-house_details": 10,
+    "B-locality": 11,
+    "I-locality": 12,
+    "B-pincode": 13,
+    "I-pincode": 14,
+    "B-road": 15,
+    "I-road": 16,
+    "B-state": 17,
+    "I-state": 18,
+    "B-sub_locality": 19,
+    "I-sub_locality": 20,
+    "B-landmarks": 21,
+    "I-landmarks": 22
+  },
+  "id2entity": {
+    "0": "O",
+    "1": "B-building_name",
+    "2": "I-building_name",
+    "3": "B-city",
+    "4": "I-city",
+    "5": "B-country",
+    "6": "I-country",
+    "7": "B-floor",
+    "8": "I-floor",
+    "9": "B-house_details",
+    "10": "I-house_details",
+    "11": "B-locality",
+    "12": "I-locality",
+    "13": "B-pincode",
+    "14": "I-pincode",
+    "15": "B-road",
+    "16": "I-road",
+    "17": "B-state",
+    "18": "I-state",
+    "19": "B-sub_locality",
+    "20": "I-sub_locality",
+    "21": "B-landmarks",
+    "22": "I-landmarks"
+  }
+}
+```
+## 📋 Model Files
+- `config.json`: Model configuration and hyperparameters
+- `pytorch_model.bin` / `model.safetensors`: Model weights
+- `tokenizer.json`: Tokenizer configuration
+- `tokenizer_config.json`: Tokenizer settings
+- `vocab.txt`: Vocabulary file
+- `entity_mappings.json`: Entity type mappings
+## 🔄 Model Updates
+- **Version**: v1.0 (Checkpoint 20793)
+- **Last Updated**: 2025-06-19
+- **Training Completion**: Based on augmented Indian address dataset
+- **Base Model**: TinyBERT for efficient transformer architecture
+## 📚 Citation
+If you use this model in your research or applications, please cite:
+```bibtex
+@misc{open-tinybert-indian-address-ner,
+  title={TinyBERT Indian Address NER Model},
+  year={2025},
+  publisher={Hugging Face},
+  url={https://huggingface.co/shiprocket-ai/open-tinybert-indian-address-ner}
+}
+```
+## 📞 Support & Contact
+For questions, issues, or feature requests:
+- Open an issue in this repository
+- Contact: shiprocket-ai team
+- Documentation: See usage examples above
+## 📜 License
+This model is released under the Apache 2.0 License. See LICENSE file for details.
+---
+*Specialized for Indian address entity recognition - Built with ❤️ by shiprocket-ai team using TinyBERT*

config.json ADDED Viewed

	@@ -0,0 +1,77 @@

+{
+  "architectures": [
+    "BertForTokenClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "cell": {},
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "O",
+    "1": "B-building_name",
+    "2": "I-building_name",
+    "3": "B-city",
+    "4": "I-city",
+    "5": "B-country",
+    "6": "I-country",
+    "7": "B-floor",
+    "8": "I-floor",
+    "9": "B-house_details",
+    "10": "I-house_details",
+    "11": "B-locality",
+    "12": "I-locality",
+    "13": "B-pincode",
+    "14": "I-pincode",
+    "15": "B-road",
+    "16": "I-road",
+    "17": "B-state",
+    "18": "I-state",
+    "19": "B-sub_locality",
+    "20": "I-sub_locality",
+    "21": "B-landmarks",
+    "22": "I-landmarks"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "B-building_name": 1,
+    "B-city": 3,
+    "B-country": 5,
+    "B-floor": 7,
+    "B-house_details": 9,
+    "B-landmarks": 21,
+    "B-locality": 11,
+    "B-pincode": 13,
+    "B-road": 15,
+    "B-state": 17,
+    "B-sub_locality": 19,
+    "I-building_name": 2,
+    "I-city": 4,
+    "I-country": 6,
+    "I-floor": 8,
+    "I-house_details": 10,
+    "I-landmarks": 22,
+    "I-locality": 12,
+    "I-pincode": 14,
+    "I-road": 16,
+    "I-state": 18,
+    "I-sub_locality": 20,
+    "O": 0
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "pre_trained": "",
+  "structure": [],
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

entity_mappings.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "entity2id": {
+    "O": 0,
+    "B-building_name": 1,
+    "I-building_name": 2,
+    "B-city": 3,
+    "I-city": 4,
+    "B-country": 5,
+    "I-country": 6,
+    "B-floor": 7,
+    "I-floor": 8,
+    "B-house_details": 9,
+    "I-house_details": 10,
+    "B-locality": 11,
+    "I-locality": 12,
+    "B-pincode": 13,
+    "I-pincode": 14,
+    "B-road": 15,
+    "I-road": 16,
+    "B-state": 17,
+    "I-state": 18,
+    "B-sub_locality": 19,
+    "I-sub_locality": 20,
+    "B-landmarks": 21,
+    "I-landmarks": 22
+  },
+  "id2entity": {
+    "0": "O",
+    "1": "B-building_name",
+    "2": "I-building_name",
+    "3": "B-city",
+    "4": "I-city",
+    "5": "B-country",
+    "6": "I-country",
+    "7": "B-floor",
+    "8": "I-floor",
+    "9": "B-house_details",
+    "10": "I-house_details",
+    "11": "B-locality",
+    "12": "I-locality",
+    "13": "B-pincode",
+    "14": "I-pincode",
+    "15": "B-road",
+    "16": "I-road",
+    "17": "B-state",
+    "18": "I-state",
+    "19": "B-sub_locality",
+    "20": "I-sub_locality",
+    "21": "B-landmarks",
+    "22": "I-landmarks"
+  }
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:822e8199f7caba4f6fa7ba38f0e006fc035c4a014acd1ec87d6c79f2ab185b4e
+size 265540428

model_card_metadata.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "language": [
+    "en"
+  ],
+  "tags": [
+    "token-classification",
+    "ner",
+    "indian-addresses",
+    "address-parsing",
+    "tinybert",
+    "entity-extraction",
+    "address-components",
+    "indian-postal",
+    "location-extraction",
+    "lightweight-model"
+  ],
+  "datasets": [
+    "custom-indian-addresses"
+  ],
+  "metrics": [
+    "precision",
+    "recall",
+    "f1"
+  ],
+  "model_type": "bert",
+  "base_model": "huawei-noah/TinyBERT_General_6L_768D",
+  "pipeline_tag": "token-classification",
+  "widget": [
+    {
+      "text": "Shop No 123, Sunshine Apartments, Andheri West, Mumbai, 400058",
+      "example_title": "Complete Address"
+    },
+    {
+      "text": "DLF Cyber City, Sector 25, Gurgaon, Haryana",
+      "example_title": "Commercial Address"
+    },
+    {
+      "text": "Flat 201, MG Road, Bangalore, Karnataka, 560001",
+      "example_title": "Residential Address"
+    }
+  ]
+}

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d10333f7f6f93e1967e6631f8b5e26dda56023e33c53336750a8d5c7020f684
+size 531143627

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a30f1e62dce4b06fe3b4ad6f17ae7ba4b40aa140dd38d5e09c0e9ea9e316f46
+size 14709

scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0156ea329dd483385c86313a0b0d09c7050936b4ce8a9ed2d386c43d99df3669
+size 1383

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ff3b4cc5efe2ab5b80c2cc4cbcc46ba5f5e5c4afbf9f7ef95071a6e348ef51d
+size 1465

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 1000000000000000019884624838656,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,163 @@

+{
+  "best_global_step": 20793,
+  "best_metric": 0.9469405010418878,
+  "best_model_checkpoint": "./ner_output_tinybert6L/combined_300percent_TinyBERT_General_6L_768D_20250619_074923/checkpoints/checkpoint-20793",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 20793,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24989179050642044,
+      "grad_norm": 0.6586679816246033,
+      "learning_rate": 4.5837541480305874e-05,
+      "loss": 0.3426,
+      "step": 1732
+    },
+    {
+      "epoch": 0.49978358101284087,
+      "grad_norm": 0.5292770266532898,
+      "learning_rate": 4.1672678305198864e-05,
+      "loss": 0.2178,
+      "step": 3464
+    },
+    {
+      "epoch": 0.7496753715192613,
+      "grad_norm": 1.1701136827468872,
+      "learning_rate": 3.751262444091762e-05,
+      "loss": 0.1982,
+      "step": 5196
+    },
+    {
+      "epoch": 0.9995671620256817,
+      "grad_norm": 3.1348302364349365,
+      "learning_rate": 3.3347761265810614e-05,
+      "loss": 0.1881,
+      "step": 6928
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9393524410059049,
+      "eval_f1": 0.9379816398725478,
+      "eval_loss": 0.17937302589416504,
+      "eval_precision": 0.9389823871991911,
+      "eval_recall": 0.9393524410059049,
+      "eval_runtime": 26.2651,
+      "eval_samples_per_second": 745.056,
+      "eval_steps_per_second": 46.602,
+      "step": 6931
+    },
+    {
+      "epoch": 1.2494589525321023,
+      "grad_norm": 3.0484063625335693,
+      "learning_rate": 2.9182898090703604e-05,
+      "loss": 0.1595,
+      "step": 8660
+    },
+    {
+      "epoch": 1.4993507430385224,
+      "grad_norm": 0.3237079679965973,
+      "learning_rate": 2.5020439571009476e-05,
+      "loss": 0.1524,
+      "step": 10392
+    },
+    {
+      "epoch": 1.749242533544943,
+      "grad_norm": 1.4062304496765137,
+      "learning_rate": 2.0857981051315347e-05,
+      "loss": 0.1491,
+      "step": 12124
+    },
+    {
+      "epoch": 1.9991343240513635,
+      "grad_norm": 3.2153682708740234,
+      "learning_rate": 1.6693117876208337e-05,
+      "loss": 0.1402,
+      "step": 13856
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9458694225866252,
+      "eval_f1": 0.9456418383157583,
+      "eval_loss": 0.15713337063789368,
+      "eval_precision": 0.9463121752269362,
+      "eval_recall": 0.9458694225866252,
+      "eval_runtime": 27.0546,
+      "eval_samples_per_second": 723.316,
+      "eval_steps_per_second": 45.242,
+      "step": 13862
+    },
+    {
+      "epoch": 2.249026114557784,
+      "grad_norm": 0.006557302549481392,
+      "learning_rate": 1.2528254701101333e-05,
+      "loss": 0.115,
+      "step": 15588
+    },
+    {
+      "epoch": 2.4989179050642045,
+      "grad_norm": 1.2286018133163452,
+      "learning_rate": 8.365796181407206e-06,
+      "loss": 0.1167,
+      "step": 17320
+    },
+    {
+      "epoch": 2.7488096955706247,
+      "grad_norm": 0.032648004591464996,
+      "learning_rate": 4.200933006300197e-06,
+      "loss": 0.1112,
+      "step": 19052
+    },
+    {
+      "epoch": 2.998701486077045,
+      "grad_norm": 4.031618118286133,
+      "learning_rate": 3.847448660606935e-08,
+      "loss": 0.1071,
+      "step": 20784
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9477408531081158,
+      "eval_f1": 0.9469405010418878,
+      "eval_loss": 0.15903286635875702,
+      "eval_precision": 0.946831252072933,
+      "eval_recall": 0.9477408531081158,
+      "eval_runtime": 26.9292,
+      "eval_samples_per_second": 726.682,
+      "eval_steps_per_second": 45.452,
+      "step": 20793
+    }
+  ],
+  "logging_steps": 1732,
+  "max_steps": 20793,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.001
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0869727318769664e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:948193f13be807bc54705ba1e696855c62505a8c02d93728604bddb5d56f1c98
+size 5841

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff