Update label mappings for FinanceBERT

Files changed (7) hide show

anotherscript.py ADDED Viewed

+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+# Correctly formatted path using a raw string to prevent escape sequence errors
+model_path = r'C:\Users\marco\financebert'
+# Load the tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+model = AutoModelForSequenceClassification.from_pretrained(model_path)
+# Update the model configuration with label mappings
+model.config.id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
+model.config.label2id = {'Negative': 0, 'Neutral': 1, 'Positive': 2}
+# Save the tokenizer and model with the updated configuration
+tokenizer.save_pretrained(model_path)
+model.save_pretrained(model_path)
+print("Tokenizer and model saved with updated labels.")

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "bert-base-uncased",
   "architectures": [
     "BertForSequenceClassification"
   ],
@@ -10,16 +10,16 @@
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1",
-    "2": "LABEL_2"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1,
-    "LABEL_2": 2
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,

 {
+  "_name_or_path": "C:\\Users\\marco\\financebert",
   "architectures": [
     "BertForSequenceClassification"
   ],
   "hidden_dropout_prob": 0.1,
   "hidden_size": 768,
   "id2label": {
+    "0": "Negative",
+    "1": "Neutral",
+    "2": "Positive"
   },
   "initializer_range": 0.02,
   "intermediate_size": 3072,
   "label2id": {
+    "Negative": 0,
+    "Neutral": 1,
+    "Positive": 2
   },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,

model_update.py ADDED Viewed

+from transformers import AutoModelForSequenceClassification
+# Load your model
+model = AutoModelForSequenceClassification.from_pretrained('path_to_your_local_model')
+# Update label mapping
+model.config.id2label = {0: 'Negative', 1: 'Neutral', 2: 'Positive'}
+model.config.label2id = {'Negative': 0, 'Neutral': 1, 'Positive': 2}

script.py ADDED Viewed

+import pickle
+try:
+    with open(r'C:\Users\marco\financebert\model.safetensors', 'rb') as f:
+        model = pickle.load(f)
+    print("Model loaded successfully using pickle:", model)
+except Exception as e:
+    print("Failed to load model using pickle:", str(e))

special_tokens_map.json CHANGED Viewed

@@ -1,7 +1,37 @@
 {
-  "cls_token": "[CLS]",
-  "mask_token": "[MASK]",
-  "pad_token": "[PAD]",
-  "sep_token": "[SEP]",
-  "unk_token": "[UNK]"
 }

 {
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -45,11 +45,18 @@
   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
   "model_max_length": 512,
   "pad_token": "[PAD]",
   "sep_token": "[SEP]",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
   "unk_token": "[UNK]"
 }

   "cls_token": "[CLS]",
   "do_lower_case": true,
   "mask_token": "[MASK]",
+  "max_length": 512,
   "model_max_length": 512,
+  "pad_to_multiple_of": null,
   "pad_token": "[PAD]",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "[SEP]",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "BertTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }

your_script_name.py ADDED Viewed

+import torch
+model_path = r'C:\Users\marco\financebert\model.safetensors'
+try:
+    # Try loading the model directly
+    model = torch.load(model_path)
+    print("Model loaded successfully:", model)
+except Exception as e:
+    print("Failed to load the model directly:", str(e))
+    # If direct loading fails, consider that the file might need handling of specific layers or configs
+    try:
+        # Sometimes models are wrapped in a dictionary or other structures
+        model_data = torch.load(model_path, map_location=torch.device('cpu'))
+        print("Model data loaded, attempt to extract model:", model_data.keys())
+        # If model is under a specific key or requires further processing
+        if 'model' in model_data:
+            model = model_data['model']
+            print("Extracted model from dictionary:", model)
+        else:
+            print("Check the keys in model_data and adjust accordingly")
+    except Exception as e2:
+        print("Failed in adjusted loading approach:", str(e2))