mustafoyev202 commited on
Commit
2811581
·
verified ·
1 Parent(s): 870202d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +25 -25
README.md CHANGED
@@ -87,52 +87,52 @@ The following hyperparameters were used during training:
87
  ### Usage
88
 
89
 
90
- from transformers import AutoTokenizer, AutoModelForTokenClassification
91
- import torch
92
 
93
- custom_id2label = {
94
  0: "O", 1: "B-CARDINAL", 2: "I-CARDINAL", 3: "B-DATE", 4: "I-DATE",
95
  5: "B-EVENT", 6: "I-EVENT", 7: "B-GPE", 8: "I-GPE", 9: "B-LOC", 10: "I-LOC",
96
  11: "B-MONEY", 12: "I-MONEY", 13: "B-ORDINAL", 14: "B-ORG", 15: "I-ORG",
97
  16: "B-PERCENT", 17: "I-PERCENT", 18: "B-PERSON", 19: "I-PERSON",
98
  20: "B-TIME", 21: "I-TIME"
99
  }
100
- custom_label2id = {v: k for k, v in custom_id2label.items()}
101
 
102
- model_name = "mustafoyev202/roberta-uz"
103
- tokenizer = AutoTokenizer.from_pretrained(model_name)
104
- model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=23)
105
 
106
- model.config.id2label = custom_id2label
107
- model.config.label2id = custom_label2id
108
 
109
- text = "Tesla kompaniyasi AQSHda joylashgan."
110
 
111
- tokens = tokenizer(text.split(), return_tensors="pt", is_split_into_words=True)
112
 
113
- with torch.no_grad():
114
  logits = model(**tokens).logits
115
 
116
- predicted_token_class_ids = logits.argmax(-1).squeeze().tolist()
117
 
118
- word_ids = tokens.word_ids()
119
- previous_word_id = None
120
- word_predictions = {}
121
 
122
- for i, word_id in enumerate(word_ids):
123
  if word_id is not None:
124
  label = custom_id2label[predicted_token_class_ids[i]]
125
  if word_id != previous_word_id: # New word
126
  word_predictions[word_id] = label
127
  previous_word_id = word_id
128
 
129
- words = text.split() # Splitting for simplicity
130
- final_predictions = [(word, word_predictions.get(i, "O")) for i, word in enumerate(words)]
131
 
132
- print("Predictions:")
133
- for word, label in final_predictions:
134
  print(f"{word}: {label}")
135
-
136
- labels = torch.tensor([predicted_token_class_ids]).unsqueeze(0) # Adjust dimensions
137
- loss = model(**tokens, labels=labels).loss
138
- print("\nLoss:", round(loss.item(), 2))
 
87
  ### Usage
88
 
89
 
90
+ -from transformers import AutoTokenizer, AutoModelForTokenClassification
91
+ -import torch
92
 
93
+ -custom_id2label = {
94
  0: "O", 1: "B-CARDINAL", 2: "I-CARDINAL", 3: "B-DATE", 4: "I-DATE",
95
  5: "B-EVENT", 6: "I-EVENT", 7: "B-GPE", 8: "I-GPE", 9: "B-LOC", 10: "I-LOC",
96
  11: "B-MONEY", 12: "I-MONEY", 13: "B-ORDINAL", 14: "B-ORG", 15: "I-ORG",
97
  16: "B-PERCENT", 17: "I-PERCENT", 18: "B-PERSON", 19: "I-PERSON",
98
  20: "B-TIME", 21: "I-TIME"
99
  }
100
+ -custom_label2id = {v: k for k, v in custom_id2label.items()}
101
 
102
+ -model_name = "mustafoyev202/roberta-uz"
103
+ -tokenizer = AutoTokenizer.from_pretrained(model_name)
104
+ -model = AutoModelForTokenClassification.from_pretrained(model_name, num_labels=23)
105
 
106
+ -model.config.id2label = custom_id2label
107
+ -model.config.label2id = custom_label2id
108
 
109
+ -text = "Tesla kompaniyasi AQSHda joylashgan."
110
 
111
+ -tokens = tokenizer(text.split(), return_tensors="pt", is_split_into_words=True)
112
 
113
+ -with torch.no_grad():
114
  logits = model(**tokens).logits
115
 
116
+ -predicted_token_class_ids = logits.argmax(-1).squeeze().tolist()
117
 
118
+ -word_ids = tokens.word_ids()
119
+ -previous_word_id = None
120
+ -word_predictions = {}
121
 
122
+ -for i, word_id in enumerate(word_ids):
123
  if word_id is not None:
124
  label = custom_id2label[predicted_token_class_ids[i]]
125
  if word_id != previous_word_id: # New word
126
  word_predictions[word_id] = label
127
  previous_word_id = word_id
128
 
129
+ -words = text.split() # Splitting for simplicity
130
+ -final_predictions = [(word, word_predictions.get(i, "O")) for i, word in enumerate(words)]
131
 
132
+ -print("Predictions:")
133
+ -for word, label in final_predictions:
134
  print(f"{word}: {label}")
135
+
136
+ -labels = torch.tensor([predicted_token_class_ids]).unsqueeze(0) # Adjust dimensions
137
+ -loss = model(**tokens, labels=labels).loss
138
+ -print("\nLoss:", round(loss.item(), 2))