Sina1138 commited on
Commit
ea770c8
·
1 Parent(s): 66efe3b

fix model training scripts

Browse files
alternative_polarity/deberta/{deberta_v3_large_polarity.py → deberta_v3_base_polarity.py} RENAMED
@@ -16,7 +16,7 @@ from glimpse.glimpse.data_loading.Glimpse_tokenizer import glimpse_tokenizer
16
 
17
  # === CONFIGURATION ===
18
 
19
- MODEL_DIR = BASE_DIR / "alternative_polarity" / "deberta" / "deberta_v3_large_polarity_final_model"
20
  DATA_DIR = BASE_DIR / "glimpse" / "data" / "processed"
21
  OUTPUT_DIR = BASE_DIR / "data" / "polarity_scored"
22
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
 
16
 
17
  # === CONFIGURATION ===
18
 
19
+ MODEL_DIR = BASE_DIR / "alternative_polarity" / "deberta" / "deberta_v3_base_polarity_final_model"
20
  DATA_DIR = BASE_DIR / "glimpse" / "data" / "processed"
21
  OUTPUT_DIR = BASE_DIR / "data" / "polarity_scored"
22
  OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
alternative_polarity/deberta/{deberta_v3_large_polarity_train.py → deberta_v3_base_polarity_train.py} RENAMED
@@ -8,37 +8,20 @@ from torch.nn import functional as F
8
 
9
  from transformers import Trainer
10
 
11
- class WeightedTrainer(Trainer):
12
- def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
13
- labels = inputs.pop("labels")
14
- outputs = model(**inputs)
15
- logits = outputs.logits
16
- weights = class_weights.to(logits.device)
17
- loss = F.cross_entropy(logits, labels, weight=weights)
18
- return (loss, outputs) if return_outputs else loss
19
-
20
-
21
 
22
  # Load data
23
  train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_train.csv")
24
  dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_dev.csv")
25
  test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_test.csv")
26
 
27
- # Compute class weights (inverse frequency)
28
- neg_weight = 1.0
29
- pos_weight = train_df['label'].value_counts()[0] / train_df['label'].value_counts()[1]
30
- class_weights = torch.tensor([neg_weight, pos_weight], dtype=torch.float32)
31
-
32
  # Convert to HuggingFace Datasets
33
  train_ds = Dataset.from_pandas(train_df)
34
  dev_ds = Dataset.from_pandas(dev_df)
35
  test_ds = Dataset.from_pandas(test_df)
36
 
37
  # Tokenize
38
- model_name = "microsoft/deberta-v3-large"
39
  tokenizer = AutoTokenizer.from_pretrained(model_name)
40
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
41
-
42
  def tokenize(batch):
43
  return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)
44
 
@@ -52,13 +35,30 @@ dev_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"]
52
  test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
53
 
54
  # Load model
55
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Metrics
58
  def compute_metrics(eval_pred):
59
  logits, labels = eval_pred
60
  preds = np.argmax(logits, axis=1)
61
- precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary")
62
  acc = accuracy_score(labels, preds)
63
  return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
64
 
@@ -66,7 +66,7 @@ def compute_metrics(eval_pred):
66
  args = TrainingArguments(
67
  output_dir="./alternative_polarity/deberta/checkpoints",
68
  eval_strategy="epoch",
69
- save_strategy="no",
70
  learning_rate=2e-5,
71
  per_device_train_batch_size=4,
72
  per_device_eval_batch_size=8,
@@ -94,5 +94,5 @@ results = trainer.evaluate(test_ds)
94
  print("Test results:", results)
95
 
96
  # Save the model and tokenizer
97
- model.save_pretrained("./alternative_polarity/deberta/deberta_v3_large_polarity_final_model")
98
- tokenizer.save_pretrained("./alternative_polarity/deberta/deberta_v3_large_polarity_final_model")
 
8
 
9
  from transformers import Trainer
10
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Load data
13
  train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_train.csv")
14
  dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_dev.csv")
15
  test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_test.csv")
16
 
 
 
 
 
 
17
  # Convert to HuggingFace Datasets
18
  train_ds = Dataset.from_pandas(train_df)
19
  dev_ds = Dataset.from_pandas(dev_df)
20
  test_ds = Dataset.from_pandas(test_df)
21
 
22
  # Tokenize
23
+ model_name = "microsoft/deberta-v3-base"
24
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
25
  def tokenize(batch):
26
  return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)
27
 
 
35
  test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
36
 
37
  # Load model
38
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
39
+
40
+ # Compute class weights
41
+ label_counts = train_df['label'].value_counts()
42
+ total_samples = len(train_df)
43
+ class_weights = torch.tensor([total_samples / (len(label_counts) * count) for count in label_counts.sort_index().values])
44
+ class_weights = class_weights.to(dtype=torch.float32)
45
+ print("Class weights:", class_weights)
46
+
47
+ class WeightedTrainer(Trainer):
48
+ def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
49
+ labels = inputs.pop("labels")
50
+ outputs = model(**inputs)
51
+ logits = outputs.logits
52
+ weights = class_weights.to(logits.device)
53
+ loss = F.cross_entropy(logits, labels, weight=weights)
54
+ return (loss, outputs) if return_outputs else loss
55
+
56
 
57
  # Metrics
58
  def compute_metrics(eval_pred):
59
  logits, labels = eval_pred
60
  preds = np.argmax(logits, axis=1)
61
+ precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
62
  acc = accuracy_score(labels, preds)
63
  return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
64
 
 
66
  args = TrainingArguments(
67
  output_dir="./alternative_polarity/deberta/checkpoints",
68
  eval_strategy="epoch",
69
+ save_strategy="epoch",
70
  learning_rate=2e-5,
71
  per_device_train_batch_size=4,
72
  per_device_eval_batch_size=8,
 
94
  print("Test results:", results)
95
 
96
  # Save the model and tokenizer
97
+ model.save_pretrained("./alternative_polarity/deberta/deberta_v3_base_polarity_final_model")
98
+ tokenizer.save_pretrained("./alternative_polarity/deberta/deberta_v3_base_polarity_final_model")
alternative_polarity/scideberta/scideberta_full_polarity_train.py CHANGED
@@ -24,11 +24,6 @@ train_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity
24
  dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_dev.csv")
25
  test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_test.csv")
26
 
27
- # Compute class weights (inverse frequency)
28
- neg_weight = 1.0
29
- pos_weight = train_df['label'].value_counts()[0] / train_df['label'].value_counts()[1]
30
- class_weights = torch.tensor([neg_weight, pos_weight], dtype=torch.float32)
31
-
32
  # Convert to HuggingFace Datasets
33
  train_ds = Dataset.from_pandas(train_df)
34
  dev_ds = Dataset.from_pandas(dev_df)
@@ -36,7 +31,6 @@ test_ds = Dataset.from_pandas(test_df)
36
 
37
  model_name = "KISTI-AI/Scideberta-full"
38
  tokenizer = AutoTokenizer.from_pretrained(model_name)
39
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
40
 
41
  def tokenize(batch):
42
  return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)
@@ -51,13 +45,30 @@ dev_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"]
51
  test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
52
 
53
  # Load model
54
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Metrics
57
  def compute_metrics(eval_pred):
58
  logits, labels = eval_pred
59
  preds = np.argmax(logits, axis=1)
60
- precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="binary")
61
  acc = accuracy_score(labels, preds)
62
  return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
63
 
@@ -65,7 +76,7 @@ def compute_metrics(eval_pred):
65
  args = TrainingArguments(
66
  output_dir="./alternative_polarity/scideberta/checkpoints",
67
  eval_strategy="epoch",
68
- save_strategy="no",
69
  learning_rate=2e-5,
70
  per_device_train_batch_size=4,
71
  per_device_eval_batch_size=8,
 
24
  dev_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_dev.csv")
25
  test_df = pd.read_csv("./data/DISAPERE-main/SELFExtractedData/disapere_polarity_test.csv")
26
 
 
 
 
 
 
27
  # Convert to HuggingFace Datasets
28
  train_ds = Dataset.from_pandas(train_df)
29
  dev_ds = Dataset.from_pandas(dev_df)
 
31
 
32
  model_name = "KISTI-AI/Scideberta-full"
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
34
 
35
  def tokenize(batch):
36
  return tokenizer(batch["text"], padding="max_length", truncation=True, max_length=512)
 
45
  test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
46
 
47
  # Load model
48
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
49
+
50
+ # Compute class weights
51
+ label_counts = train_df['label'].value_counts()
52
+ total_samples = len(train_df)
53
+ class_weights = torch.tensor([total_samples / (len(label_counts) * count) for count in label_counts.sort_index().values])
54
+ class_weights = class_weights.to(dtype=torch.float32)
55
+ print("Class weights:", class_weights)
56
+
57
+ class WeightedTrainer(Trainer):
58
+ def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
59
+ labels = inputs.pop("labels")
60
+ outputs = model(**inputs)
61
+ logits = outputs.logits
62
+ weights = class_weights.to(logits.device)
63
+ loss = F.cross_entropy(logits, labels, weight=weights)
64
+ return (loss, outputs) if return_outputs else loss
65
+
66
 
67
  # Metrics
68
  def compute_metrics(eval_pred):
69
  logits, labels = eval_pred
70
  preds = np.argmax(logits, axis=1)
71
+ precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average="macro")
72
  acc = accuracy_score(labels, preds)
73
  return {"accuracy": acc, "f1": f1, "precision": precision, "recall": recall}
74
 
 
76
  args = TrainingArguments(
77
  output_dir="./alternative_polarity/scideberta/checkpoints",
78
  eval_strategy="epoch",
79
+ save_strategy="epoch",
80
  learning_rate=2e-5,
81
  per_device_train_batch_size=4,
82
  per_device_eval_batch_size=8,
scibert/scibert_polarity/scibert_polarity_train.py CHANGED
@@ -67,7 +67,7 @@ def compute_metrics(eval_pred):
67
  args = TrainingArguments(
68
  output_dir="./scibert/scibert_polarity/checkpoints",
69
  eval_strategy="epoch",
70
- save_strategy="no",
71
  learning_rate=2e-5,
72
  per_device_train_batch_size=8,
73
  per_device_eval_batch_size=16,
 
67
  args = TrainingArguments(
68
  output_dir="./scibert/scibert_polarity/checkpoints",
69
  eval_strategy="epoch",
70
+ save_strategy="epoch",
71
  learning_rate=2e-5,
72
  per_device_train_batch_size=8,
73
  per_device_eval_batch_size=16,