Riverise commited on Nov 15, 2025

Commit

fc9ae4e

verified ·

1 Parent(s): aa54241

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +35 -0
bert_finetue_task2.py +223 -0
bert_finetue_task2_froze.py +247 -0
bert_finetune_task1.py +126 -0
bert_finetune_task1_froze.py +148 -0
continue_pretrain.py +132 -0
dapt_data_process.py +69 -0
data_process_task1.py +89 -0
data_process_task2.py +95 -0
data_source.xlsx +3 -0
dataset_pretrain/Experiment_sentences_training_filtered_part1.csv +3 -0
dataset_pretrain/domain_corpus.txt +3 -0
dataset_pretrain/预训练数据第二部分_年报.zip +3 -0
model_inference_task1.py +162 -0
model_inference_task2.py +153 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/config.json +43 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/model.safetensors +3 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/special_tokens_map.json +7 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/tokenizer.json +0 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/tokenizer_config.json +56 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/training_args.bin +3 -0
outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/vocab.txt +0 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/config.json +30 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/model.safetensors +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/optimizer.pt +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/rng_state.pth +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/scheduler.pt +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/trainer_state.json +191 -0
outputs/bert_bilabel_finetuned_model/checkpoint-1094/training_args.bin +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/config.json +30 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/model.safetensors +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/optimizer.pt +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/rng_state.pth +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/scheduler.pt +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/trainer_state.json +355 -0
outputs/bert_bilabel_finetuned_model/checkpoint-2188/training_args.bin +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/config.json +30 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/model.safetensors +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/optimizer.pt +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/rng_state.pth +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/scheduler.pt +3 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/trainer_state.json +519 -0
outputs/bert_bilabel_finetuned_model/checkpoint-3282/training_args.bin +3 -0
outputs/bert_bilabel_finetuned_model/final/config.json +30 -0
outputs/bert_bilabel_finetuned_model/final/model.safetensors +3 -0
outputs/bert_bilabel_finetuned_model/final/training_args.bin +3 -0
outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/config.json +30 -0
outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/model.safetensors +3 -0
outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/optimizer.pt +3 -0
outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/rng_state.pth +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,38 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data_source.xlsx filter=lfs diff=lfs merge=lfs -text
+dataset_pretrain/Experiment_sentences_training_filtered_part1.csv filter=lfs diff=lfs merge=lfs -text
+dataset_pretrain/domain_corpus.txt filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_020110-hwo46nr1/run-hwo46nr1.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_024451-lmhafthr/run-lmhafthr.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_055509-63h4lqr8/run-63h4lqr8.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_055942-lq4qoqk3/run-lq4qoqk3.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_074324-2o6q17un/run-2o6q17un.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_075410-cilrwgz8/run-cilrwgz8.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_080542-irf9fgra/run-irf9fgra.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251113_170012-vj6z0qct/run-vj6z0qct.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_144619-0hviozok/run-0hviozok.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_145658-0lepm1if/run-0lepm1if.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_150434-0nq8ji5k/run-0nq8ji5k.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_152637-xe0tjdf6/run-xe0tjdf6.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_153529-7r2aeenh/run-7r2aeenh.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_154223-xmrnfh0j/run-xmrnfh0j.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_154302-7tit87eb/run-7tit87eb.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_161829-g1azoa0i/run-g1azoa0i.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251114_171922-j1hfy78o/run-j1hfy78o.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_023230-j4s1o16p/run-j4s1o16p.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_024020-whj9y4hx/run-whj9y4hx.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_031217-29o94la6/run-29o94la6.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_032957-oljr07ni/run-oljr07ni.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_033525-i1hsksbs/run-i1hsksbs.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_033750-ybm95q2x/run-ybm95q2x.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_034104-e4a2rovd/run-e4a2rovd.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_034702-q5cv2xfu/run-q5cv2xfu.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_034922-e287xu9n/run-e287xu9n.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_034939-zlf3muf5/run-zlf3muf5.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_035223-nehpw594/run-nehpw594.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_035728-inhxwz05/run-inhxwz05.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_035746-cmttchar/run-cmttchar.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20251115_050557-37a3t1f4/run-37a3t1f4.wandb filter=lfs diff=lfs merge=lfs -text
+标注数据_更正后.xlsx filter=lfs diff=lfs merge=lfs -text

bert_finetue_task2.py ADDED Viewed

	@@ -0,0 +1,223 @@

+import os
+import torch
+import numpy as np
+import json
+import evaluate
+from datasets import load_dataset
+from transformers import BertPreTrainedModel, BertModel, BertTokenizerFast
+from transformers import TrainingArguments, Trainer
+from torch import nn
+from sklearn.metrics import f1_score, accuracy_score, jaccard_score
+# --- 配置参数 ---
+DATA_DIR = "./processed_data_task2_fixed"
+# MODEL_NAME = "bert-base-chinese"
+TOKENIZER_NAME = "valuesimplex-ai-lab/FinBERT2-base"
+MODEL_NAME = "/home/hsichen/part_time/BERT_finetune/outputs/finbert2_dapt_model"
+# MODEL_NAME = "valuesimplex-ai-lab/FinBERT2-base"
+# 标签总数：Data, Action, Gain, Regu, Vague
+NUM_LABELS = 5
+OUTPUT_DIR = "/home/hsichen/part_time/BERT_finetune/outputs/finbert2_multilabel_model_finetuned_from_dapt"
+EPOCHS = 5
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+SEED = 42
+# ----------------------------------------------------
+# A. 定义支持多标签分类的 BERT 模型
+# ----------------------------------------------------
+class BertForMultiLabelClassification(BertPreTrainedModel):
+    """
+    基于 BERT 的多标签分类模型，使用 BCEWithLogitsLoss
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.bert = BertModel(config)
+        classifier_dropout = config.hidden_dropout_prob
+        self.dropout = nn.Dropout(classifier_dropout)
+        # 线性层输出维度 = 标签数量 (5)
+        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
+        self.post_init()
+        self.loss_fct = nn.BCEWithLogitsLoss()
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                labels=None):
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+        )
+        # 取 [CLS] token 的隐藏状态 (即 pooler output)
+        pooled_output = outputs.pooler_output
+        pooled_output = self.dropout(pooled_output)
+        # 经过分类器层，输出 logits (未经 Sigmoid 的分数)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            # 确保 labels 是 float 类型，因为损失函数需要 float
+            loss = self.loss_fct(logits, labels.float())
+        return (loss, logits) if loss is not None else (logits,)
+from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
+# 标签名称，用于报告输出
+TAG_COLS = ['Data', 'Action', 'Gain', 'Regu', 'Vague']
+PREDICTION_THRESHOLD = 0.5
+def compute_metrics(p):
+    """
+    计算多标签分类的评估指标，包括全局指标和每个类别的指标。
+    要求：损失、F1、Precision、Accuracy、Recall。
+    """
+    # 损失（Loss）由 Trainer 自动处理并记录在 logs 中，这里主要关注评估指标
+    logits = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
+    labels = p.label_ids
+    # Sigmoid -> 概率
+    probs = 1 / (1 + np.exp(-logits))
+    # 应用阈值 0.5 得到二元预测
+    preds = (probs > PREDICTION_THRESHOLD).astype(int)
+    # ----------------------------------------
+    # 1. 全局汇总指标 (用于 metric_for_best_model)
+    # ----------------------------------------
+    metrics = {}
+    # Micro-F1 (通常用于选择最佳模型)
+    metrics["f1_micro"] = f1_score(labels, preds, average='micro')
+    # Macro-F1 (平衡各类别贡献)
+    metrics["f1_macro"] = f1_score(labels, preds, average='macro')
+    # 样本级 Jaccard (度量样本预测的标签集合与真实标签集合的重合度)
+    metrics["jaccard_samples"] = jaccard_score(labels, preds, average='samples')
+    # ----------------------------------------
+    # 2. 每个类别的指标 (Per-Class)
+    # ----------------------------------------
+    # 注意：多标签的 Per-Class Metrics 就是针对每一列（每个标签）做一次二分类指标计算。
+    for i, tag in enumerate(TAG_COLS):
+        y_true_class = labels[:, i]  # 第 i 个标签的真实值
+        y_pred_class = preds[:, i]   # 第 i 个标签的预测值
+        # 计算该类别的指标
+        # 类别级指标命名规范：{tag}_f1, {tag}_precision, {tag}_recall, {tag}_accuracy
+        # F1-Score (二分类指标)
+        metrics[f"{tag}_f1"] = f1_score(y_true_class, y_pred_class, average='binary', zero_division=0)
+        # Precision
+        metrics[f"{tag}_precision"] = precision_score(y_true_class, y_pred_class, average='binary', zero_division=0)
+        # Recall
+        metrics[f"{tag}_recall"] = recall_score(y_true_class, y_pred_class, average='binary', zero_division=0)
+        # Accuracy (当前类别预测对的样本数 / 总样本数)
+        # 注意：这里计算的是该标签本身的准确率，而非整个样本的准确率
+        metrics[f"{tag}_accuracy"] = accuracy_score(y_true_class, y_pred_class)
+    return metrics
+# ----------------------------------------------------
+# C. 主微调函数
+# ----------------------------------------------------
+def finetune_multilabel_bert():
+    # 1. 加载数据集
+    print("--- 1. 加载数据集 ---")
+    data_files = {
+        "train": os.path.join(DATA_DIR, "train.csv"),
+        "validation": os.path.join(DATA_DIR, "validation.csv"),
+        "test": os.path.join(DATA_DIR, "test.csv")
+    }
+    raw_datasets = load_dataset("csv", data_files=data_files)
+    # 2. 加载分词器和自定义模型
+    print("--- 2. 加载分词器和自定义模型 ---")
+    tokenizer = BertTokenizerFast.from_pretrained(TOKENIZER_NAME)
+    # 使用自定义模型 BertForMultiLabelClassification
+    model = BertForMultiLabelClassification.from_pretrained(
+        MODEL_NAME,
+        num_labels=NUM_LABELS,
+        ignore_mismatched_sizes=True
+    )
+    # 3. 数据集 Tokenization (分词)
+    def tokenize_function(examples):
+        # 假设文本在 'text' 列
+        tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
+        # 将 CSV 中的字符串形式的标签列表 (e.g., '[1, 0, 1, 0, 0]') 转换为 float 列表
+        label_list = json.loads(examples["labels"].replace("'", "\""))
+        tokenized["labels"] = label_list
+        return tokenized
+    # 注意：多标签任务 map 时 batched=False 确保标签解析正确
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=False)
+    # 移除原始的 'text' 列
+    tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+    # 调整 PyTorch 张量格式
+    tokenized_datasets.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])
+    train_dataset = tokenized_datasets["train"]
+    eval_dataset = tokenized_datasets["validation"]
+    test_dataset = tokenized_datasets["test"]
+    # 4. 设置训练参数
+    print("--- 3. 设置训练参数和 Trainer ---")
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        num_train_epochs=EPOCHS,
+        per_device_train_batch_size=BATCH_SIZE,
+        per_device_eval_batch_size=BATCH_SIZE,
+        warmup_steps=200,
+        weight_decay=0.01,
+        logging_steps=50,
+        eval_strategy="steps",
+        eval_steps=50,
+        save_strategy="steps",
+        save_steps=500,
+        load_best_model_at_end=True,
+        metric_for_best_model="f1_micro",
+        seed=SEED,
+        learning_rate=3e-5,
+        report_to="wandb"
+    )
+    # 5. 初始化 Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+    # 6. 开始训练
+    print("--- 4. 开始训练 ---")
+    trainer.train()
+    # 7. 评估测试集
+    print("--- 5. 评估测试集 ---")
+    results = trainer.evaluate(test_dataset)
+    print(f"测试集评估结果: {results}")
+    # 8. 保存最终模型
+    trainer.save_model(os.path.join(OUTPUT_DIR, "final"))
+    tokenizer.save_pretrained(os.path.join(OUTPUT_DIR, "final"))
+    print(f"模型和分词器已保存至: {os.path.join(OUTPUT_DIR, 'final')}")
+if __name__ == "__main__":
+    finetune_multilabel_bert()

bert_finetue_task2_froze.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import os
+import torch
+import numpy as np
+import json
+import evaluate
+from datasets import load_dataset
+from transformers import BertPreTrainedModel, BertModel, BertTokenizerFast
+from transformers import TrainingArguments, Trainer
+from torch import nn
+from sklearn.metrics import f1_score, accuracy_score, jaccard_score
+# --- 配置参数 ---
+DATA_DIR = "./processed_data_task2_fixed"
+MODEL_NAME = "bert-base-chinese"
+# MODEL_NAME = "/home/hsichen/part_time/BERT_finetune/outputs/finbert2_dapt_model"
+# MODEL_NAME = "valuesimplex-ai-lab/FinBERT2-base"
+# 标签总数：Data, Action, Gain, Regu, Vague
+NUM_LABELS = 5
+OUTPUT_DIR = "/home/hsichen/part_time/BERT_finetune/outputs/bert_multilabel_frozen_classifier_finetuned_model"
+EPOCHS = 5
+BATCH_SIZE = 16
+LEARNING_RATE = 1e-4
+SEED = 42
+# ----------------------------------------------------
+# A. 定义支持多标签分类的 BERT 模型
+# ----------------------------------------------------
+class BertForMultiLabelClassification(BertPreTrainedModel):
+    """
+    基于 BERT 的多标签分类模型，使用 BCEWithLogitsLoss
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        self.bert = BertModel(config)
+        classifier_dropout = config.hidden_dropout_prob
+        self.dropout = nn.Dropout(classifier_dropout)
+        # 线性层输出维度 = 标签数量 (5)
+        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
+        self.post_init()
+        self.loss_fct = nn.BCEWithLogitsLoss()
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                labels=None):
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+        )
+        # 取 [CLS] token 的隐藏状态 (即 pooler output)
+        pooled_output = outputs.pooler_output
+        pooled_output = self.dropout(pooled_output)
+        # 经过分类器层，输出 logits (未经 Sigmoid 的分数)
+        logits = self.classifier(pooled_output)
+        loss = None
+        if labels is not None:
+            # 确保 labels 是 float 类型，因为损失函数需要 float
+            loss = self.loss_fct(logits, labels.float())
+        return (loss, logits) if loss is not None else (logits,)
+# ----------------------------------------------------
+# B. 评估指标函数 (Multi-Label)
+# ----------------------------------------------------
+from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
+# 标签名称，用于报告输出
+TAG_COLS = ['Data', 'Action', 'Gain', 'Regu', 'Vague']
+PREDICTION_THRESHOLD = 0.5
+def compute_metrics(p):
+    """
+    计算多标签分类的评估指标，包括全局指标和每个类别的指标。
+    要求：损失、F1、Precision、Accuracy、Recall。
+    """
+    # 损失（Loss）由 Trainer 自动处理并记录在 logs 中，这里主要关注评估指标
+    # logits 可能是 tuple，需要提取
+    logits = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
+    labels = p.label_ids
+    # Sigmoid -> 概率
+    probs = 1 / (1 + np.exp(-logits))
+    # 应用阈值 0.5 得到二元预测
+    preds = (probs > PREDICTION_THRESHOLD).astype(int)
+    # ----------------------------------------
+    # 1. 全局汇总指标 (用于 metric_for_best_model)
+    # ----------------------------------------
+    metrics = {}
+    # Micro-F1 (通常用于选择最佳模型)
+    metrics["f1_micro"] = f1_score(labels, preds, average='micro')
+    # Macro-F1 (平衡各类别贡献)
+    metrics["f1_macro"] = f1_score(labels, preds, average='macro')
+    # 样本级 Jaccard (度量样本预测的标签集合与真实标签集合的重合度)
+    metrics["jaccard_samples"] = jaccard_score(labels, preds, average='samples')
+    # ----------------------------------------
+    # 2. 每个类别的指标 (Per-Class)
+    # ----------------------------------------
+    # 注意：多标签的 Per-Class Metrics 就是针对每一列（每个标签）做一次二分类指标计算。
+    for i, tag in enumerate(TAG_COLS):
+        y_true_class = labels[:, i]  # 第 i 个标签的真实值
+        y_pred_class = preds[:, i]   # 第 i 个标签的预测值
+        # 计算该类别的指标
+        # 类别级指标命名规范：{tag}_f1, {tag}_precision, {tag}_recall, {tag}_accuracy
+        # F1-Score (二分类指标)
+        metrics[f"{tag}_f1"] = f1_score(y_true_class, y_pred_class, average='binary', zero_division=0)
+        # Precision
+        metrics[f"{tag}_precision"] = precision_score(y_true_class, y_pred_class, average='binary', zero_division=0)
+        # Recall
+        metrics[f"{tag}_recall"] = recall_score(y_true_class, y_pred_class, average='binary', zero_division=0)
+        # Accuracy (当前类别预测对的样本数 / 总样本数)
+        # 注意：这里计算的是该标签本身的准确率，而非整个样本的准确率
+        metrics[f"{tag}_accuracy"] = accuracy_score(y_true_class, y_pred_class)
+    return metrics
+# ----------------------------------------------------
+# C. 主微调函数
+# ----------------------------------------------------
+def finetune_multilabel_bert():
+    # 1. 加载数据集
+    print("--- 1. 加载数据集 ---")
+    data_files = {
+        "train": os.path.join(DATA_DIR, "train.csv"),
+        "validation": os.path.join(DATA_DIR, "validation.csv"),
+        "test": os.path.join(DATA_DIR, "test.csv")
+    }
+    raw_datasets = load_dataset("csv", data_files=data_files)
+    # 2. 加载分词器和自定义模型
+    print("--- 2. 加载分词器和自定义模型 ---")
+    tokenizer = BertTokenizerFast.from_pretrained(MODEL_NAME)
+    # 使用自定义模型 BertForMultiLabelClassification
+    model = BertForMultiLabelClassification.from_pretrained(
+        MODEL_NAME,
+        num_labels=NUM_LABELS,
+    )
+    print("--- 2.1. 冻结 BERT 主体权重 (修正版) ---")
+    trainable_params_count = 0
+    for name, param in model.named_parameters():
+        # 检查参数名是否以 'bert.' 开头
+        if name.startswith('bert.'):
+            param.requires_grad = False
+        else:
+            # 如果不以 'bert.' 开头，则它是分类器 (classifier.weight/bias)
+            param.requires_grad = True
+            trainable_params_count += param.numel() # 计算可训练参数量
+    # 检查冻结是否生效
+    total_params = sum(p.numel() for p in model.parameters())
+    # 这里的 trainable_params 应该接近 train_param_count 的值
+    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print(f"总参数量: {total_params / 1e6:.2f} M")
+    print(f"可训练参数量 (仅分类头): {trainable_params / 1e6:.6f} M")
+    # 3. 数据集 Tokenization (分词)
+    def tokenize_function(examples):
+        # 假设文本在 'text' 列
+        tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
+        # 将 CSV 中的字符串形式的标签列表 (e.g., '[1, 0, 1, 0, 0]') 转换为 float 列表
+        # 使用 json.loads 比 eval() 更安全
+        label_list = json.loads(examples["labels"].replace("'", "\""))
+        tokenized["labels"] = label_list
+        return tokenized
+    # 注意：多标签任务 map 时 batched=False 确保标签解析正确
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=False)
+    # 移除原始的 'text' 列
+    tokenized_datasets = tokenized_datasets.remove_columns(["text"])
+    # 调整 PyTorch 张量格式
+    tokenized_datasets.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])
+    train_dataset = tokenized_datasets["train"]
+    eval_dataset = tokenized_datasets["validation"]
+    test_dataset = tokenized_datasets["test"]
+    # 4. 设置训练参数
+    print("--- 3. 设置训练参数和 Trainer ---")
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        num_train_epochs=EPOCHS,
+        per_device_train_batch_size=BATCH_SIZE,
+        per_device_eval_batch_size=BATCH_SIZE,
+        warmup_steps=200,
+        weight_decay=0.01,
+        logging_steps=50,
+        eval_strategy="steps",
+        eval_steps=50,
+        save_strategy="steps",
+        save_steps=50,
+        load_best_model_at_end=True,
+        metric_for_best_model="f1_micro",
+        seed=SEED,
+        learning_rate=3e-5,
+        report_to="wandb"
+    )
+    # 5. 初始化 Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+    # 6. 开始训练
+    print("--- 4. 开始训练 ---")
+    trainer.train()
+    # 7. 评估测试集
+    print("--- 5. 评估测试集 ---")
+    results = trainer.evaluate(test_dataset)
+    print(f"测试集评估结果: {results}")
+    # 8. 保存最终模型
+    trainer.save_model(os.path.join(OUTPUT_DIR, "final"))
+    tokenizer.save_pretrained(os.path.join(OUTPUT_DIR, "final"))
+    print(f"模型和分词器已保存至: {os.path.join(OUTPUT_DIR, 'final')}")
+if __name__ == "__main__":
+    finetune_multilabel_bert()

bert_finetune_task1.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import torch
+import numpy as np
+from datasets import load_dataset
+import evaluate
+from transformers import BertForSequenceClassification, BertTokenizerFast
+from transformers import TrainingArguments, Trainer
+# --- 配置参数 ---
+# 上一步处理好的数据目录
+DATA_DIR = "./processed_data_task1"
+# MODEL_NAME = "valuesimplex-ai-lab/FinBERT2-base"
+MODEL_NAME = '/home/hsichen/part_time/BERT_finetune/outputs/finbert2_dapt_model'
+# 预留给模型的标签数量 (0 和 1)
+NUM_LABELS = 2
+# 微调结果保存目录
+OUTPUT_DIR = "./finbert2_bilabel_finetuned_model_from_dapt"
+# 训练参数
+EPOCHS = 3
+BATCH_SIZE = 16
+LEARNING_RATE = 2e-5
+SEED = 42
+def compute_metrics(p):
+    """
+    计算评估指标 (准确率, F1, Precision, Recall)
+    """
+    preds = np.argmax(p.predictions, axis=1)
+    labels = p.label_ids
+    # 使用 Hugging Face 的 metrics 库
+    metric = evaluate.load("accuracy")
+    accuracy = metric.compute(predictions=preds, references=labels)["accuracy"]
+    # 也可以计算 F1, Precision, Recall
+    metric_f1 = evaluate.load("f1")
+    f1 = metric_f1.compute(predictions=preds, references=labels, average="binary")["f1"]
+    return {
+        'accuracy': accuracy,
+        'f1': f1,
+    }
+def finetune_bert():
+    """
+    执行BERT模型的微调
+    """
+    # 1. 加载数据集
+    print("--- 1. 加载数据集 ---")
+    try:
+        # 加载CSV文件作为DatasetDict对象
+        data_files = {
+            "train": os.path.join(DATA_DIR, "train.csv"),
+            "validation": os.path.join(DATA_DIR, "validation.csv"),
+            "test": os.path.join(DATA_DIR, "test.csv")
+        }
+        raw_datasets = load_dataset("csv", data_files=data_files)
+        print(raw_datasets)
+    except Exception as e:
+        print(f"加载数据集时发生错误，请检查 {DATA_DIR} 目录下的CSV文件: {e}")
+        return
+    # 2. 加载分词器和模型
+    print("--- 2. 加载分词器和模型 ---")
+    tokenizer = BertTokenizerFast.from_pretrained(MODEL_NAME)
+    model = BertForSequenceClassification.from_pretrained(
+        MODEL_NAME,
+        num_labels=NUM_LABELS
+    )
+    # 3. 数据集 Tokenization (分词)
+    def tokenize_function(examples):
+        # 假设文本在 'text' 列，标签在 'label' 列
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
+    # 选择训练、验证、测试子集
+    train_dataset = tokenized_datasets["train"]
+    eval_dataset = tokenized_datasets["validation"]
+    test_dataset = tokenized_datasets["test"]
+    # 4. 设置训练参数
+    print("--- 3. 设置训练参数和 Trainer ---")
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        num_train_epochs=EPOCHS,
+        per_device_train_batch_size=BATCH_SIZE,
+        per_device_eval_batch_size=BATCH_SIZE,
+        warmup_steps=500,
+        weight_decay=0.01,
+        logging_steps=50,
+        eval_strategy="epoch",  # 每个epoch结束时评估
+        save_strategy="epoch",
+        load_best_model_at_end=True, # 训练结束后加载效果最好的模型
+        metric_for_best_model="f1",
+        seed=SEED,
+        learning_rate=LEARNING_RATE,
+        report_to="wandb"  # 将训练日志保存到wandb
+    )
+    # 5. 初始化 Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+    # 6. 开始训练
+    print("--- 4. 开始训练 ---")
+    trainer.train()
+    # 7. 评估测试集
+    print("--- 5. 评估测试集 ---")
+    results = trainer.evaluate(test_dataset)
+    print(f"测试集评估结果: {results}")
+    # 8. 保存最终模型
+    trainer.save_model(os.path.join(OUTPUT_DIR, "final"))
+    print(f"模型和分词器已保存至: {os.path.join(OUTPUT_DIR, 'final')}")
+if __name__ == "__main__":
+    finetune_bert()

bert_finetune_task1_froze.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import os
+import torch
+import numpy as np
+from datasets import load_dataset
+import evaluate
+from transformers import BertForSequenceClassification, BertTokenizerFast
+from transformers import TrainingArguments, Trainer
+# --- 配置参数 ---
+DATA_DIR = "./processed_data_task1"
+# 使用的BERT模型，中文任务推荐使用BERT-base-chinese
+MODEL_NAME = "valuesimplex-ai-lab/FinBERT2-base"
+# MODEL_NAME = "bert-base-chinese"
+# 预留给模型的标签数量 (0 和 1)
+NUM_LABELS = 2
+# 微调结果保存目录
+OUTPUT_DIR = "/home/hsichen/part_time/BERT_finetune/outputs/finbert2_bilabel_frozen_classifier_finetuned_model"
+# 训练参数
+EPOCHS = 3
+BATCH_SIZE = 16
+LEARNING_RATE = 1e-4
+SEED = 42
+def compute_metrics(p):
+    """
+    计算评估指标 (准确率, F1, Precision, Recall)
+    """
+    preds = np.argmax(p.predictions, axis=1)
+    labels = p.label_ids
+    # 使用 Hugging Face 的 metrics 库
+    metric = evaluate.load("accuracy")
+    accuracy = metric.compute(predictions=preds, references=labels)["accuracy"]
+    # 也可以计算 F1, Precision, Recall
+    metric_f1 = evaluate.load("f1")
+    f1 = metric_f1.compute(predictions=preds, references=labels, average="binary")["f1"]
+    return {
+        'accuracy': accuracy,
+        'f1': f1,
+        # 可以根据需要添加其他指标，如 precision, recall 等
+    }
+def finetune_bert():
+    """
+    执行BERT模型的微调
+    """
+    # 1. 加载数据集
+    print("--- 1. 加载数据集 ---")
+    try:
+        # 加载CSV文件作为DatasetDict对象
+        data_files = {
+            "train": os.path.join(DATA_DIR, "train.csv"),
+            "validation": os.path.join(DATA_DIR, "validation.csv"),
+            "test": os.path.join(DATA_DIR, "test.csv")
+        }
+        raw_datasets = load_dataset("csv", data_files=data_files)
+        print(raw_datasets)
+    except Exception as e:
+        print(f"加载数据集时发生错误，请检查 {DATA_DIR} 目录下的CSV文件: {e}")
+        return
+    print("--- 2. 加载分词器和模型 ---")
+    tokenizer = BertTokenizerFast.from_pretrained(MODEL_NAME)
+    model = BertForSequenceClassification.from_pretrained(
+        MODEL_NAME,
+        num_labels=NUM_LABELS
+    )
+    print("--- 2.1. 冻结 BERT 主体权重 (修正版) ---")
+    trainable_params_count = 0
+    for name, param in model.named_parameters():
+        # 检查参数名是否以 'bert.' 开头
+        if name.startswith('bert.'):
+            param.requires_grad = False
+        else:
+            # 如果不以 'bert.' 开头，则它是分类器 (classifier.weight/bias)
+            param.requires_grad = True
+            trainable_params_count += param.numel() # 计算可训练参数量
+    # 检查冻结是否生效
+    total_params = sum(p.numel() for p in model.parameters())
+    # 这里的 trainable_params 应该接近 train_param_count 的值
+    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    print(f"总参数量: {total_params / 1e6:.2f} M")
+    print(f"可训练参数量 (仅分类头): {trainable_params / 1e6:.6f} M")
+    # 3. 数据集 Tokenization (分词)
+    def tokenize_function(examples):
+        # 假设文本在 'text' 列，标签在 'label' 列
+        return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
+    tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
+    # 选择训练、验证、测试子集
+    train_dataset = tokenized_datasets["train"]
+    eval_dataset = tokenized_datasets["validation"]
+    test_dataset = tokenized_datasets["test"]
+    # 4. 设置训练参数
+    print("--- 3. 设置训练参数和 Trainer ---")
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        num_train_epochs=EPOCHS,
+        per_device_train_batch_size=BATCH_SIZE,
+        per_device_eval_batch_size=BATCH_SIZE,
+        warmup_steps=500,
+        weight_decay=0.01,
+        logging_steps=50,
+        eval_strategy="epoch",  # 每个epoch结束时评估
+        save_strategy="epoch",
+        load_best_model_at_end=True, # 训练结束后加载效果最好的模型
+        metric_for_best_model="f1",
+        seed=SEED,
+        learning_rate=LEARNING_RATE,
+        report_to="wandb"  # 将训练日志保存到wandb
+    )
+    # 5. 初始化 Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+    # 6. 开始训练
+    print("--- 4. 开始训练 ---")
+    trainer.train()
+    # 7. 评估测试集
+    print("--- 5. 评估测试集 ---")
+    results = trainer.evaluate(test_dataset)
+    print(f"测试集评估结果: {results}")
+    # 8. 保存最终模型
+    trainer.save_model(os.path.join(OUTPUT_DIR, "final"))
+    print(f"模型和分词器已保存至: {os.path.join(OUTPUT_DIR, 'final')}")
+if __name__ == "__main__":
+    finetune_bert()

continue_pretrain.py ADDED Viewed

	@@ -0,0 +1,132 @@

+import os
+import torch
+from datasets import load_dataset
+from transformers import (
+    AutoModelForMaskedLM,
+    AutoTokenizer,
+    TrainingArguments,
+    Trainer,
+    DataCollatorForLanguageModeling,
+    set_seed
+)
+# --- 配置参数 ---
+DOMAIN_TEXT_FILE = "/home/hsichen/part_time/BERT_finetune/dataset_pretrain/domain_corpus.txt"
+MODEL_NAME = "valuesimplex-ai-lab/FinBERT2-base"
+OUTPUT_DIR = "./bert_dapt_model"
+# 预训练超参数
+DAPT_LR = 1e-5     # 较低的学习率，防止破坏原有知识
+DAPT_EPOCHS = 3    # 适中的训练轮数
+BATCH_SIZE = 16    # 批次大小 (请根据您的 GPU 显存调整)
+MLM_PROBABILITY = 0.15 # 掩码比例
+SEED = 42
+NUM_PROC = 64  # 并行处理的进程数
+# 设置随机种子以保证结果可复现
+set_seed(SEED)
+def domain_adaptive_pretrain():
+    # 路径检查
+    if not os.path.exists(DOMAIN_TEXT_FILE):
+        print(f"致命错误：领域语料库文件未找到在 {DOMAIN_TEXT_FILE}。请先运行数据预处理脚本。")
+        return
+    # 1. 加载模型和分词器
+    print("--- 1. 加载模型和分词器 ---")
+    # AutoTokenizer 会自动识别模型对应的分词器
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    # AutoModelForMaskedLM 专门用于 MLM 任务
+    model = AutoModelForMaskedLM.from_pretrained(MODEL_NAME)
+    # 2. 加载和处理文本数据集
+    print("--- 2. 加载和处理文本数据集 ---")
+    # 使用 datasets 库加载纯文本文件
+    # 文件必须包含在 'train' 键下，以支持 Trainer
+    raw_datasets = load_dataset("text", data_files={"train": DOMAIN_TEXT_FILE})
+    # 定义 tokenization 函数
+    def tokenize_function(examples):
+        # 截断但不填充，因为 DataCollatorForLanguageModeling 会处理填充
+        return tokenizer(
+            examples["text"],
+            truncation=True,
+            max_length=512, # 推荐修改
+            return_special_tokens_mask=True
+    )
+    tokenized_datasets = raw_datasets.map(
+        tokenize_function, batched=True, remove_columns=["text"], num_proc=NUM_PROC
+    )
+    # 将长文本切块 (Chunking) 和分组 (Grouping)
+    def group_texts(examples):
+        # 拼接所有文本
+        concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+        total_length = len(concatenated_examples[list(examples.keys())[0]])
+        # 设定切块大小
+        chunk_size = 512
+        # print(f"Total length: {total_length}, after chunking: {total_length // chunk_size}")
+        # 通过截断 total_length 来丢弃最后一个不完整的切块
+        total_length = (total_length // chunk_size) * chunk_size
+        # 将文本切分成 max_length (512) 的块
+        result = {
+            k: [t[i : i + chunk_size] for i in range(0, total_length, chunk_size)]
+            for k, t in concatenated_examples.items()
+        }
+        # 标签 ID 设为 input_ids，DataCollator 会将非掩码位置设置为 -100
+        result["labels"] = result["input_ids"].copy()
+        return result
+    # 最终的 DAPT 训练数据集
+    lm_datasets = tokenized_datasets.map(
+        group_texts, batched=True, num_proc=NUM_PROC
+    )
+    # # 3. 数据收集器 (动态掩码)
+    # # 这个 Collator 会在每个批次中随机应用 15% 的掩码
+    data_collator = DataCollatorForLanguageModeling(
+        tokenizer=tokenizer,
+        mlm=True,
+        mlm_probability=MLM_PROBABILITY
+    )
+    # 4. 设置训练参数
+    print("--- 3. 设置训练参数 ---")
+    training_args = TrainingArguments(
+        output_dir=OUTPUT_DIR,
+        num_train_epochs=DAPT_EPOCHS,
+        per_device_train_batch_size=BATCH_SIZE,
+        learning_rate=DAPT_LR,
+        weight_decay=0.01,
+        logging_steps=50,
+        save_strategy="epoch",
+        report_to="wandb",
+    )
+    # 5. 初始化 Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=lm_datasets["train"],
+        data_collator=data_collator,
+    )
+    # 6. 开始继续预训练
+    print("--- 4. 开始继续预训练 ---")
+    trainer.train()
+    # 7. 保存 DAPT 模型
+    trainer.save_model(OUTPUT_DIR)
+    tokenizer.save_pretrained(OUTPUT_DIR)
+    print(f"DAPT 模型已保存至: {OUTPUT_DIR}")
+if __name__ == "__main__":
+    domain_adaptive_pretrain()

dapt_data_process.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import pandas as pd
+import os
+from typing import List
+# --- 配置参数 ---
+INPUT_CSV_PATH = "/home/hsichen/part_time/BERT_finetune/dataset_pretrain/Experiment_sentences_training_filtered_part1.csv"
+# 输出的纯文本文件路径 (用于 DAPT 脚本中的 DOMAIN_TEXT_FILE)
+OUTPUT_TXT_PATH = "/home/hsichen/part_time/BERT_finetune/dataset_pretrain/domain_corpus.txt"
+# 纯文本文件的编码
+ENCODING = 'utf-8'
+def prepare_dapt_data(input_csv_path: str, output_txt_path: str, encoding: str):
+    """
+    从 CSV 文件中提取 'sentence' 列，并保存为纯文本文件，每行一个句子。
+    Args:
+        input_csv_path: 原始 CSV 文件的路径。
+        output_txt_path: 目标纯文本文件的路径。
+        encoding: 文件编码。
+    """
+    print(f"--- 1. 读取数据: {input_csv_path} ---")
+    try:
+        # 尝试读取 CSV 文件
+        # 假设文件不含复杂编码问题，使用默认读取
+        df = pd.read_csv(input_csv_path)
+    except FileNotFoundError:
+        print(f"错误：输入文件未找到在路径: {input_csv_path}")
+        return
+    except Exception as e:
+        print(f"读取 CSV 文件时发生错误: {e}")
+        return
+    # --- 2. 数据处理与清洗 ---
+    # 检查 'sentence' 列是否存在
+    if 'sentence' not in df.columns:
+        print("错误：CSV 文件中未找到 'sentence' 列。请检查列名是否正确。")
+        return
+    # 提取 'sentence' 列，并去除 NaN 值
+    sentences: List[str] = df['sentence'].dropna().astype(str).tolist()
+    if not sentences:
+        print("警告：'sentence' 列中没有有效数据，无法生成语料库。")
+        return
+    # 简单清洗：去除多余的空格或换行符（如果有的话）
+    sentences = [s.strip() for s in sentences]
+    print(f"提取到 {len(sentences)} 条有效句子。")
+    # --- 3. 保存为纯文本文件 ---
+    print(f"--- 3. 保存至纯文本文件: {output_txt_path} ---")
+    # 将句子列表写入文件，每行一个句子
+    try:
+        with open(output_txt_path, 'w', encoding=encoding) as f:
+            f.write('\n'.join(sentences))
+        print(f"数据成功保存！")
+    except Exception as e:
+        print(f"写入文件时发生错误: {e}")
+# --- 运行主函数 ---
+if __name__ == "__main__":
+    # 请确保您已安装 pandas: pip install pandas
+    prepare_dapt_data(INPUT_CSV_PATH, OUTPUT_TXT_PATH, ENCODING)

data_process_task1.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import pandas as pd
+from sklearn.model_selection import train_test_split
+import os
+# --- 配置参数 ---
+EXCEL_FILE_PATH = "/home/hsichen/part_time/BERT_finetune/data_source.xlsx"
+OUTPUT_DIR = "./processed_data"
+# 划分数据集的比例 (训练集:测试集)
+TEST_SIZE = 0.2
+# 随机种子，用于确保每次划分结果一致
+RANDOM_SEED = 42
+def preprocess_data(excel_path: str, output_dir: str, test_size: float, random_seed: int):
+    """
+    读取Excel数据，进行清洗和格式转换，并划分为训练集和测试集。
+    Args:
+        excel_path: 原始Excel文件的路径。
+        output_dir: 存放处理后CSV文件的目录。
+        test_size: 测试集占总数据的比例。
+        random_seed: 随机种子。
+    """
+    print(f"--- 1. 读取数据: {excel_path} ---")
+    try:
+        # 尝试读取Excel文件的第一个工作表
+        df = pd.read_excel(excel_path)
+    except FileNotFoundError:
+        print(f"错误：文件未找到在路径: {excel_path}")
+        return
+    except Exception as e:
+        print(f"读取Excel文件时发生错误: {e}")
+        return
+    # --- 2. 数据清洗与格式转换 ---
+    # 检查所需的列是否存在
+    required_cols = ['sentence', 'Envir']
+    if not all(col in df.columns for col in required_cols):
+        print(f"错误：Excel中缺少必需的列。找到的列有: {df.columns.tolist()}")
+        print(f"必需的列是: {required_cols}")
+        return
+    # 重命名列以符合通用NLP任务格式 (text 和 label)
+    df = df.rename(columns={'sentence': 'text', 'Envir': 'label'})
+    # 确保'label'列是整数类型 (0或1)
+    df['label'] = df['label'].astype(int)
+    # 仅保留 'text' 和 'label' 两列
+    df = df[['text', 'label']].dropna()
+    print(f"原始数据条数: {len(df)}")
+    # --- 3. 划分数据集 ---
+    print(f"--- 划分数据集 (训练集:{1-test_size}, 测试集:{test_size}) ---")
+    # 将数据划分为训练集和测试集，使用分层抽样 (stratify) 确保标签比例一致
+    train_df, test_df = train_test_split(
+        df,
+        test_size=test_size,
+        random_state=random_seed,
+        stratify=df['label']
+    )
+    val_size_from_train = 0.1 / (1 - test_size)
+    train_df, val_df = train_test_split(
+        train_df,
+        test_size=val_size_from_train,
+        random_state=random_seed,
+        stratify=train_df['label']
+    )
+    # --- 4. 保存为CSV文件 ---
+    os.makedirs(output_dir, exist_ok=True)
+    train_output_path = os.path.join(output_dir, 'train.csv')
+    val_output_path = os.path.join(output_dir, 'validation.csv')
+    test_output_path = os.path.join(output_dir, 'test.csv')
+    train_df.to_csv(train_output_path, index=False)
+    val_df.to_csv(val_output_path, index=False)
+    test_df.to_csv(test_output_path, index=False)
+    print("--- 结果保存成功 ---")
+    print(f"训练集条数: {len(train_df)}. 保存至: {train_output_path}")
+    print(f"验证集条数: {len(val_df)}. 保存至: {val_output_path}")
+    print(f"测试集条数: {len(test_df)}. 保存至: {test_output_path}")
+if __name__ == "__main__":
+    preprocess_data(EXCEL_FILE_PATH, OUTPUT_DIR, TEST_SIZE, RANDOM_SEED)

data_process_task2.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import pandas as pd
+from sklearn.model_selection import train_test_split
+import os
+# --- 配置参数 ---
+EXCEL_FILE_PATH = "/home/hsichen/part_time/BERT_finetune/标注数据_更正后.xlsx"
+OUTPUT_DIR = "./processed_data_task2_fixed"
+# 划分数据集的比例 (训练集:测试集)
+TEST_SIZE = 0.2
+# 随机种子，用于确保每次划分结果一致
+RANDOM_SEED = 42
+def preprocess_data(excel_path: str, output_dir: str, test_size: float, random_seed: int):
+    """
+    读取Excel数据，进行清洗和格式转换，并划分为训练集、验证集和测试集。
+    使用标签数量 (Label Count) 进行分层抽样。
+    """
+    print(f"--- 1. 读取数据: {excel_path} ---")
+    try:
+        df = pd.read_excel(excel_path)
+    except FileNotFoundError:
+        print(f"错误：文件未找到在路径: {excel_path}")
+        return
+    except Exception as e:
+        print(f"读取Excel文件时发生错误: {e}")
+        return
+    # --- 2. 数据清洗与格式转换 ---
+    # 1. 筛选数据：只保留 Envir=1 的行
+    df = df[df['Envir'] == 1].copy()
+    print(f"筛选 Envir=1 后数据条数: {len(df)}")
+    # 2. 整合标签
+    TAG_COLS = ['Data', 'Action', 'Gain', 'Regu', 'Vague']
+    # 将标签列转换为列表
+    df['labels'] = df[TAG_COLS].values.tolist()
+    df = df.rename(columns={'sentence': 'text'})
+# 3. 统计标签组合及其个数 (用于分析，保留逻辑)
+    print("--- 3. 标签组合类型统计 ---")
+    # 将标签列表转换为元组
+    df['label_tuple'] = df['labels'].apply(tuple)
+    # 将元组转换为字符串，作为 train_test_split 的 stratify 参数
+    # 【新增/修改】：创建用于分层的字符串列
+    df['stratify_col'] = df['label_tuple'].astype(str)
+    print("-" * 30)
+    # 仅保留 'text', 'labels', 'stratify_col' 列用于划分 (注意不再需要 'label_count')
+    df = df[['text', 'labels', 'stratify_col']].copy()
+    # --- 4. 划分数据集 (使用 stratify_col 进行分层) ---
+    print(f"--- 划分数据集 (训练集:{1-test_size}, 测试集:{test_size}) ---")
+    # 第一次划分：训练集+验证集 vs 测试集
+    train_val_df, test_df = train_test_split(
+        df,
+        test_size=test_size,
+        random_state=random_seed,
+        # 【关键修改】：使用 'stratify_col' 进行分层
+        stratify=df['stratify_col']
+    )
+    # 第二次划分：训练集 vs 验证集
+    val_size_from_train = 0.1 / (1 - test_size)
+    train_df, val_df = train_test_split(
+        train_val_df,
+        test_size=val_size_from_train,
+        random_state=random_seed,
+        # 【关键修改】：使用 'stratify_col' 进行分层
+        stratify=train_val_df['stratify_col']
+    )
+    # --- 5. 保存为CSV文件 ---
+    os.makedirs(output_dir, exist_ok=True)
+    # 保存时仅保留 BERT 需要的 'text' 和 'labels' 列
+    train_df[['text', 'labels']].to_csv(os.path.join(output_dir, 'train.csv'), index=False)
+    val_df[['text', 'labels']].to_csv(os.path.join(output_dir, 'validation.csv'), index=False)
+    test_df[['text', 'labels']].to_csv(os.path.join(output_dir, 'test.csv'), index=False)
+    print("--- 结果保存成功 ---")
+    print(f"训练集条数: {len(train_df)}. 保存至: {os.path.join(output_dir, 'train.csv')}")
+    print(f"验证集条数: {len(val_df)}. 保存至: {os.path.join(output_dir, 'validation.csv')}")
+    print(f"测试集条数: {len(test_df)}. 保存至: {os.path.join(output_dir, 'test.csv')}")
+if __name__ == "__main__":
+    # 请确保您已安装必要的库: pip install pandas openpyxl scikit-learn
+    preprocess_data(EXCEL_FILE_PATH, OUTPUT_DIR, TEST_SIZE, RANDOM_SEED)

data_source.xlsx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d691e8657afbf64b7d7e51fb69293651106ee4e890f8046bdbb588593936b45
+size 4571190

dataset_pretrain/Experiment_sentences_training_filtered_part1.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:165ed0ca475a1c026c609cb441ce9969d1ccf33cbec744cfe4277deffd60228e
+size 1365723082

dataset_pretrain/domain_corpus.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1337725c4e8ea9ca886466a88d9bb9185bdbafe1100465d368919a918519db4f
+size 787886543

dataset_pretrain/预训练数据第二部分_年报.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff51de07b828c9d4ea132ae5e91f66dac802187bc2598bcccb3ee58a4693b3c1
+size 698809156

model_inference_task1.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import os
+import torch
+import numpy as np
+from torch import nn
+from transformers import BertPreTrainedModel, BertModel, BertTokenizerFast, AutoConfig
+# 定义标签名称，与任务一致
+TAG_COLS = ['Data', 'Action', 'Gain', 'Regu', 'Vague']
+PREDICTION_THRESHOLD = 0.5  # 预测阈值
+# ----------------------------------------------------
+# A. 定义支持多标签分类的 BERT 模型（必须与训练时一致）
+# ----------------------------------------------------
+class BertForMultiLabelClassification(BertPreTrainedModel):
+    """
+    基于 BERT 的多标签分类模型，使用 BCEWithLogitsLoss
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        # 加载 BERT 主体
+        self.bert = BertModel(config)
+        # 加载训练时的 dropout 比例
+        classifier_dropout = config.hidden_dropout_prob
+        self.dropout = nn.Dropout(classifier_dropout)
+        # 加载训练时的分类器层
+        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
+        self.post_init()
+        # 注意：推理时不需要损失函数，但保持结构完整性
+        self.loss_fct = nn.BCEWithLogitsLoss()
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                labels=None):
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+        )
+        # 取 [CLS] token 的隐藏状态 (即 pooler output)
+        pooled_output = outputs.pooler_output
+        pooled_output = self.dropout(pooled_output)
+        # 经过分类器层，输出 logits (未经 Sigmoid 的分数)
+        logits = self.classifier(pooled_output)
+        # 推理时 labels 为 None，直接返回 logits
+        return logits
+# ----------------------------------------------------
+# B. 模型推理函数
+# ----------------------------------------------------
+def predict_multilabel(checkpoint_path: str, tokenizer_path: str, text_to_predict: str):
+    """
+    加载模型检查点，对单个文本进行多标签预测。
+    Args:
+        checkpoint_path: BERT 模型检查点目录（包含 config.json, model.safetensors）。
+        tokenizer_path: 分词器路径或名称。
+        text_to_predict: 待预测的输入文本。
+    Returns:
+        包含预测标签和概率的字典。
+    """
+    print(f"--- 1. 正在加载模型和分词器: {checkpoint_path} ---")
+    try:
+        config = AutoConfig.from_pretrained(checkpoint_path)
+        # 确保配置中的 num_labels 与实际标签数量匹配
+        if config.num_labels != len(TAG_COLS):
+             # 运行时动态修正 num_labels，以防 checkpoint-config.json 里的 num_labels 不匹配
+            config.num_labels = len(TAG_COLS)
+            print(f"警告: 检查点配置的 num_labels 已从 {config.num_labels} 修正为 {len(TAG_COLS)}")
+        # 从检查点加载分词器（假设分词器文件已存在或被复制）
+        tokenizer = BertTokenizerFast.from_pretrained(tokenizer_path)
+        # 使用自定义模型类加载模型权重
+        model = BertForMultiLabelClassification.from_pretrained(
+            checkpoint_path,
+            config=config # 传入更新后的 config
+        )
+    except Exception as e:
+        print(f"加载模型或分词器失败，请检查路径中是否包含所有必需文件（如 model.safetensors, config.json, vocab.txt）: {e}")
+        return None
+    model.eval() # 切换到评估模式 (关闭 Dropout等)
+    # 2. 文本编码
+    inputs = tokenizer(
+        text_to_predict,
+        padding="max_length",
+        truncation=True,
+        max_length=512,
+        return_tensors="pt"
+    )
+    # 3. 执行推理
+    with torch.no_grad():
+        # 模型返回的是 logits
+        outputs = model(**inputs)
+        logits = outputs.cpu().numpy() # 移动到 CPU 并转为 numpy
+    # 4. 后处理：Sigmoid 和 阈值
+    # 应用 Sigmoid 转换为概率
+    probs = 1 / (1 + np.exp(-logits))
+    # 应用阈值得到二元预测
+    preds = (probs > PREDICTION_THRESHOLD).astype(int)
+    # 5. 格式化输出
+    result = {}
+    # 遍历每个标签，并记录其预测结果和概率
+    for i, tag in enumerate(TAG_COLS):
+        # 结果只针对单个样本（批次大小为 1）
+        is_predicted = preds[0][i] == 1
+        probability = probs[0][i]
+        result[tag] = {
+            "predicted": is_predicted,
+            "probability": float(f"{probability:.4f}") # 保留 4 位小数
+        }
+    print("--- 5. 预测结果 ---")
+    # 提取所有预测为 True 的标签
+    predicted_tags = [tag for tag, info in result.items() if info["predicted"]]
+    if predicted_tags:
+        print(f"预测标签���别: {predicted_tags}")
+        print(f"对应概率:")
+        for tag in predicted_tags:
+            print(f"  - {tag}: {result[tag]['probability']}")
+    else:
+        print("未预测任何标签（所有标签概率均低于 0.5）。")
+        print(f"所有标签的最高概率: {max(p['probability'] for p in result.values()):.4f}")
+# ----------------------------------------------------
+# C. 示例运行
+# ----------------------------------------------------
+if __name__ == "__main__":
+    # 以下三个参数是需要替换的,TOKENIZER需要与MODEL匹配
+    MODEL_CHECKPOINT = "/home/hsichen/part_time/BERT_finetune/outputs/finbert2_multilabel_model_finetuned_from_dapt/final"
+    TOKENIZER = 'valuesimplex-ai-lab/FinBERT2-base'
+    # TOKENIZER = 'bert-base-chinese'
+    SAMPLE_TEXT = "密切关注安全环保对原料市场的影响，提前落实应对预案；"
+    # 确保检查点目录存在
+    if not os.path.exists(MODEL_CHECKPOINT):
+        print(f"错误：模型检查点目录不存在: {MODEL_CHECKPOINT}")
+    else:
+        predict_multilabel(MODEL_CHECKPOINT,TOKENIZER, SAMPLE_TEXT)

model_inference_task2.py ADDED Viewed

	@@ -0,0 +1,153 @@

+import os
+import torch
+import numpy as np
+from torch import nn
+from transformers import AutoModelForSequenceClassification, BertTokenizerFast, AutoConfig, pipeline, BertPreTrainedModel, BertModel
+# 定义标签名称，与任务一致
+BINARY_LABELS = ['Non-Envir', 'Envir']
+NUM_LABELS = 2
+# ----------------------------------------------------
+# A. 定义支持多标签分类的 BERT 模型（必须与训练时一致）
+# ----------------------------------------------------
+class BertForMultiLabelClassification(BertPreTrainedModel):
+    """
+    基于 BERT 的多标签分类模型，使用 BCEWithLogitsLoss
+    """
+    def __init__(self, config):
+        super().__init__(config)
+        self.num_labels = config.num_labels
+        # 加载 BERT 主体
+        self.bert = BertModel(config)
+        # 加载训练时的 dropout 比例
+        classifier_dropout = config.hidden_dropout_prob
+        self.dropout = nn.Dropout(classifier_dropout)
+        # 加载训练时的分类器层
+        self.classifier = nn.Linear(config.hidden_size, self.num_labels)
+        self.post_init()
+        # 注意：推理时不需要损失函数，但保持结构完整性
+        self.loss_fct = nn.BCEWithLogitsLoss()
+    def forward(self,
+                input_ids=None,
+                attention_mask=None,
+                token_type_ids=None,
+                labels=None):
+        outputs = self.bert(
+            input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+        )
+        # 取 [CLS] token 的隐藏状态 (即 pooler output)
+        pooled_output = outputs.pooler_output
+        pooled_output = self.dropout(pooled_output)
+        # 经过分类器层，输出 logits (未经 Sigmoid 的分数)
+        logits = self.classifier(pooled_output)
+        # 推理时 labels 为 None，直接返回 logits
+        return logits
+# ----------------------------------------------------
+# B. 模型推理函数
+# ----------------------------------------------------
+def predict_binary_classification(checkpoint_path: str, tokenizer_path: str, text_to_predict: str):
+    """
+    加载 BERT 二分类模型检查点，对单个文本进行二分类预测。
+    Args:
+        checkpoint_path: BERT 模型检查点目录（包含 config.json, model.safetensors）。
+        tokenizer_path: 分词器路径或名称。
+        text_to_predict: 待预测的输入文本。
+    Returns:
+        包含预测标签和概率的字典。
+    """
+    print(f"--- 1. 正在加载二分类模型和分词器: {checkpoint_path} ---")
+    try:
+        # 1. 加载配置和分词器
+        config = AutoConfig.from_pretrained(checkpoint_path, num_labels=NUM_LABELS)
+        tokenizer = BertTokenizerFast.from_pretrained(tokenizer_path)
+        # 2. 使用标准的 AutoModelForSequenceClassification 加载模型
+        # 这将自动处理模型加载和分类头维度不匹配的问题
+        model = AutoModelForSequenceClassification.from_pretrained(
+            checkpoint_path,
+            config=config,
+            ignore_mismatched_sizes=True # 容忍加载时的分类头尺寸不匹配
+        )
+    except Exception as e:
+        print(f"加载模型或分词器失败，请检查路径中是否包含所有必需文件: {e}")
+        return None
+    model.eval() # 切换到评估模式
+    # 3. 文本编码
+    inputs = tokenizer(
+        text_to_predict,
+        padding=True,
+        truncation=True,
+        max_length=512,
+        return_tensors="pt"
+    )
+    # 4. 执行推理
+    with torch.no_grad():
+        # 模型返回的是 Logits (维度通常是 [1, 2])
+        outputs = model(**inputs)
+        logits = outputs.logits # 获取 Logits
+        # 应用 Softmax 转换为概率分布
+        probabilities = torch.softmax(logits, dim=1).cpu().numpy()[0]
+        # 确定预测的类别索引 (0 或 1)
+        predicted_index = np.argmax(probabilities)
+    # 5. 格式化输出
+    # 预测的类别名称
+    predicted_label = BINARY_LABELS[predicted_index]
+    # 预测类别的概率
+    predicted_prob = probabilities[predicted_index]
+    # 打印结果
+    print("--- 5. 预测结果 ---")
+    print(f"输入文本: {text_to_predict}")
+    print(f"预测类别: {predicted_label}")
+    print(f"对应概率: {predicted_prob:.4f}")
+    # 返回所有类别的概率
+    result = {
+        'prediction': predicted_label,
+        'probability': float(f"{predicted_prob:.4f}"),
+        'all_probabilities': {
+            BINARY_LABELS[i]: float(f"{probabilities[i]:.4f}") for i in range(NUM_LABELS)
+        }
+    }
+    return result
+# ----------------------------------------------------
+# C. 示例运行
+# ----------------------------------------------------
+if __name__ == "__main__":
+    # 以下三个参数是需要替换的,TOKENIZER需要与MODEL匹��
+    MODEL_CHECKPOINT = "/home/hsichen/part_time/BERT_finetune/outputs/finbert2_bilabel_finetuned_model_from_dapt/final"
+    TOKENIZER = 'valuesimplex-ai-lab/FinBERT2-base'
+    # TOKENIZER = 'bert-base-chinese'
+    SAMPLE_TEXT = "密切关注安全环保对原料市场的影响，提前落实应对预案；"
+    # 确保检查点目录存在
+    if not os.path.exists(MODEL_CHECKPOINT):
+        print(f"错误：模型检查点目录不存在: {MODEL_CHECKPOINT}")
+    else:
+        predict_binary_classification(MODEL_CHECKPOINT,TOKENIZER, SAMPLE_TEXT)

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "architectures": [
+    "BertForMultiLabelClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "transformers_version": "5.0.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 21128
+}

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db1b29e958367916a505a9c6b0c691768326cd696d2a1f18b4977621aff808d4
+size 409109468

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": false,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6021ebdaf4d143aee6780f3f7323087af8fe80c7cadc2add939b077d330f0cc
+size 5201

outputs/bert2_multilabel_frozen_classifier_finetuned_model/final/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

outputs/bert_bilabel_finetuned_model/checkpoint-1094/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "problem_type": "single_label_classification",
+  "transformers_version": "5.0.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 21128
+}

outputs/bert_bilabel_finetuned_model/checkpoint-1094/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:008f6adad10108d3cc7a5c01474525cd308971bbfaeab910af694124fbb12750
+size 409100240

outputs/bert_bilabel_finetuned_model/checkpoint-1094/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84929c0ce67f5a4a63810f68dd4367f3a37b4648b4c6197ee21b5810ab0529b0
+size 818324875

outputs/bert_bilabel_finetuned_model/checkpoint-1094/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7df46a9f83e371cdeb326e2171479963c0b2372be2b82e7056ff56b48e5999c
+size 14645

outputs/bert_bilabel_finetuned_model/checkpoint-1094/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:819f3a38dcbba4f9f621d51359778f1704914a94d1d1ba3a7961e9fbf54ac1bb
+size 1465

outputs/bert_bilabel_finetuned_model/checkpoint-1094/trainer_state.json ADDED Viewed

	@@ -0,0 +1,191 @@

+{
+  "best_global_step": 1094,
+  "best_metric": 0.9395770392749244,
+  "best_model_checkpoint": "./bert_finetuned_model/checkpoint-1094",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1094,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04570383912248629,
+      "grad_norm": 6.597176551818848,
+      "learning_rate": 1.9600000000000003e-06,
+      "loss": 0.8315,
+      "step": 50
+    },
+    {
+      "epoch": 0.09140767824497258,
+      "grad_norm": 4.10335636138916,
+      "learning_rate": 3.96e-06,
+      "loss": 0.403,
+      "step": 100
+    },
+    {
+      "epoch": 0.13711151736745886,
+      "grad_norm": 5.460880756378174,
+      "learning_rate": 5.9600000000000005e-06,
+      "loss": 0.2138,
+      "step": 150
+    },
+    {
+      "epoch": 0.18281535648994515,
+      "grad_norm": 1.7257156372070312,
+      "learning_rate": 7.960000000000002e-06,
+      "loss": 0.0675,
+      "step": 200
+    },
+    {
+      "epoch": 0.22851919561243145,
+      "grad_norm": 0.3548933267593384,
+      "learning_rate": 9.960000000000001e-06,
+      "loss": 0.0887,
+      "step": 250
+    },
+    {
+      "epoch": 0.2742230347349177,
+      "grad_norm": 0.07574323564767838,
+      "learning_rate": 1.196e-05,
+      "loss": 0.0625,
+      "step": 300
+    },
+    {
+      "epoch": 0.31992687385740404,
+      "grad_norm": 0.27218344807624817,
+      "learning_rate": 1.396e-05,
+      "loss": 0.0909,
+      "step": 350
+    },
+    {
+      "epoch": 0.3656307129798903,
+      "grad_norm": 0.07252885401248932,
+      "learning_rate": 1.5960000000000003e-05,
+      "loss": 0.0388,
+      "step": 400
+    },
+    {
+      "epoch": 0.4113345521023766,
+      "grad_norm": 1.1499181985855103,
+      "learning_rate": 1.796e-05,
+      "loss": 0.0955,
+      "step": 450
+    },
+    {
+      "epoch": 0.4570383912248629,
+      "grad_norm": 13.650275230407715,
+      "learning_rate": 1.9960000000000002e-05,
+      "loss": 0.0869,
+      "step": 500
+    },
+    {
+      "epoch": 0.5027422303473492,
+      "grad_norm": 11.625408172607422,
+      "learning_rate": 1.9647735442127967e-05,
+      "loss": 0.0851,
+      "step": 550
+    },
+    {
+      "epoch": 0.5484460694698354,
+      "grad_norm": 0.3337002992630005,
+      "learning_rate": 1.92882818116463e-05,
+      "loss": 0.103,
+      "step": 600
+    },
+    {
+      "epoch": 0.5941499085923218,
+      "grad_norm": 7.300892353057861,
+      "learning_rate": 1.892882818116463e-05,
+      "loss": 0.082,
+      "step": 650
+    },
+    {
+      "epoch": 0.6398537477148081,
+      "grad_norm": 0.24430198967456818,
+      "learning_rate": 1.8569374550682964e-05,
+      "loss": 0.0711,
+      "step": 700
+    },
+    {
+      "epoch": 0.6855575868372943,
+      "grad_norm": 15.26744270324707,
+      "learning_rate": 1.8209920920201294e-05,
+      "loss": 0.0737,
+      "step": 750
+    },
+    {
+      "epoch": 0.7312614259597806,
+      "grad_norm": 0.24188373982906342,
+      "learning_rate": 1.7850467289719628e-05,
+      "loss": 0.0668,
+      "step": 800
+    },
+    {
+      "epoch": 0.7769652650822669,
+      "grad_norm": 0.1296696811914444,
+      "learning_rate": 1.7491013659237958e-05,
+      "loss": 0.0537,
+      "step": 850
+    },
+    {
+      "epoch": 0.8226691042047533,
+      "grad_norm": 0.13343055546283722,
+      "learning_rate": 1.7131560028756292e-05,
+      "loss": 0.0785,
+      "step": 900
+    },
+    {
+      "epoch": 0.8683729433272395,
+      "grad_norm": 4.3099517822265625,
+      "learning_rate": 1.6772106398274622e-05,
+      "loss": 0.1045,
+      "step": 950
+    },
+    {
+      "epoch": 0.9140767824497258,
+      "grad_norm": 0.024240005761384964,
+      "learning_rate": 1.6412652767792956e-05,
+      "loss": 0.023,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9597806215722121,
+      "grad_norm": 1.5524265766143799,
+      "learning_rate": 1.605319913731129e-05,
+      "loss": 0.0541,
+      "step": 1050
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.984,
+      "eval_f1": 0.9395770392749244,
+      "eval_loss": 0.06908556073904037,
+      "eval_runtime": 28.0922,
+      "eval_samples_per_second": 88.993,
+      "eval_steps_per_second": 5.589,
+      "step": 1094
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3282,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4604443468800000.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/bert_bilabel_finetuned_model/checkpoint-1094/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:525b07a66e715289db75a841e0609901e3ee221ba4268c678c362a7bbb781388
+size 5137

outputs/bert_bilabel_finetuned_model/checkpoint-2188/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "problem_type": "single_label_classification",
+  "transformers_version": "5.0.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 21128
+}

outputs/bert_bilabel_finetuned_model/checkpoint-2188/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:de2ee416e9b57c2f5950073423afa4ce4969acef04ab4fac2e67b511ad0d7828
+size 409100240

outputs/bert_bilabel_finetuned_model/checkpoint-2188/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3782131594e410bca55a994fe35dea25cabcc7266f22a9ffe9530377ab90826
+size 818324875

outputs/bert_bilabel_finetuned_model/checkpoint-2188/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1c014c8f02969df2fb6b09bfa058898bb6a730c9745ecc985b52eb65b54fddb
+size 14645

outputs/bert_bilabel_finetuned_model/checkpoint-2188/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e292baa34fe268bf54671510dc8dca778a92537e26e986b9a3f9c6b5645bd29d
+size 1465

outputs/bert_bilabel_finetuned_model/checkpoint-2188/trainer_state.json ADDED Viewed

	@@ -0,0 +1,355 @@

+{
+  "best_global_step": 2188,
+  "best_metric": 0.9396170839469808,
+  "best_model_checkpoint": "./bert_finetuned_model/checkpoint-2188",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 2188,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04570383912248629,
+      "grad_norm": 6.597176551818848,
+      "learning_rate": 1.9600000000000003e-06,
+      "loss": 0.8315,
+      "step": 50
+    },
+    {
+      "epoch": 0.09140767824497258,
+      "grad_norm": 4.10335636138916,
+      "learning_rate": 3.96e-06,
+      "loss": 0.403,
+      "step": 100
+    },
+    {
+      "epoch": 0.13711151736745886,
+      "grad_norm": 5.460880756378174,
+      "learning_rate": 5.9600000000000005e-06,
+      "loss": 0.2138,
+      "step": 150
+    },
+    {
+      "epoch": 0.18281535648994515,
+      "grad_norm": 1.7257156372070312,
+      "learning_rate": 7.960000000000002e-06,
+      "loss": 0.0675,
+      "step": 200
+    },
+    {
+      "epoch": 0.22851919561243145,
+      "grad_norm": 0.3548933267593384,
+      "learning_rate": 9.960000000000001e-06,
+      "loss": 0.0887,
+      "step": 250
+    },
+    {
+      "epoch": 0.2742230347349177,
+      "grad_norm": 0.07574323564767838,
+      "learning_rate": 1.196e-05,
+      "loss": 0.0625,
+      "step": 300
+    },
+    {
+      "epoch": 0.31992687385740404,
+      "grad_norm": 0.27218344807624817,
+      "learning_rate": 1.396e-05,
+      "loss": 0.0909,
+      "step": 350
+    },
+    {
+      "epoch": 0.3656307129798903,
+      "grad_norm": 0.07252885401248932,
+      "learning_rate": 1.5960000000000003e-05,
+      "loss": 0.0388,
+      "step": 400
+    },
+    {
+      "epoch": 0.4113345521023766,
+      "grad_norm": 1.1499181985855103,
+      "learning_rate": 1.796e-05,
+      "loss": 0.0955,
+      "step": 450
+    },
+    {
+      "epoch": 0.4570383912248629,
+      "grad_norm": 13.650275230407715,
+      "learning_rate": 1.9960000000000002e-05,
+      "loss": 0.0869,
+      "step": 500
+    },
+    {
+      "epoch": 0.5027422303473492,
+      "grad_norm": 11.625408172607422,
+      "learning_rate": 1.9647735442127967e-05,
+      "loss": 0.0851,
+      "step": 550
+    },
+    {
+      "epoch": 0.5484460694698354,
+      "grad_norm": 0.3337002992630005,
+      "learning_rate": 1.92882818116463e-05,
+      "loss": 0.103,
+      "step": 600
+    },
+    {
+      "epoch": 0.5941499085923218,
+      "grad_norm": 7.300892353057861,
+      "learning_rate": 1.892882818116463e-05,
+      "loss": 0.082,
+      "step": 650
+    },
+    {
+      "epoch": 0.6398537477148081,
+      "grad_norm": 0.24430198967456818,
+      "learning_rate": 1.8569374550682964e-05,
+      "loss": 0.0711,
+      "step": 700
+    },
+    {
+      "epoch": 0.6855575868372943,
+      "grad_norm": 15.26744270324707,
+      "learning_rate": 1.8209920920201294e-05,
+      "loss": 0.0737,
+      "step": 750
+    },
+    {
+      "epoch": 0.7312614259597806,
+      "grad_norm": 0.24188373982906342,
+      "learning_rate": 1.7850467289719628e-05,
+      "loss": 0.0668,
+      "step": 800
+    },
+    {
+      "epoch": 0.7769652650822669,
+      "grad_norm": 0.1296696811914444,
+      "learning_rate": 1.7491013659237958e-05,
+      "loss": 0.0537,
+      "step": 850
+    },
+    {
+      "epoch": 0.8226691042047533,
+      "grad_norm": 0.13343055546283722,
+      "learning_rate": 1.7131560028756292e-05,
+      "loss": 0.0785,
+      "step": 900
+    },
+    {
+      "epoch": 0.8683729433272395,
+      "grad_norm": 4.3099517822265625,
+      "learning_rate": 1.6772106398274622e-05,
+      "loss": 0.1045,
+      "step": 950
+    },
+    {
+      "epoch": 0.9140767824497258,
+      "grad_norm": 0.024240005761384964,
+      "learning_rate": 1.6412652767792956e-05,
+      "loss": 0.023,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9597806215722121,
+      "grad_norm": 1.5524265766143799,
+      "learning_rate": 1.605319913731129e-05,
+      "loss": 0.0541,
+      "step": 1050
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.984,
+      "eval_f1": 0.9395770392749244,
+      "eval_loss": 0.06908556073904037,
+      "eval_runtime": 28.0922,
+      "eval_samples_per_second": 88.993,
+      "eval_steps_per_second": 5.589,
+      "step": 1094
+    },
+    {
+      "epoch": 1.0054844606946984,
+      "grad_norm": 0.1564575880765915,
+      "learning_rate": 1.569374550682962e-05,
+      "loss": 0.066,
+      "step": 1100
+    },
+    {
+      "epoch": 1.0511882998171846,
+      "grad_norm": 0.014012756757438183,
+      "learning_rate": 1.5334291876347953e-05,
+      "loss": 0.0309,
+      "step": 1150
+    },
+    {
+      "epoch": 1.0968921389396709,
+      "grad_norm": 0.023974154144525528,
+      "learning_rate": 1.4974838245866285e-05,
+      "loss": 0.0341,
+      "step": 1200
+    },
+    {
+      "epoch": 1.1425959780621573,
+      "grad_norm": 0.013898388482630253,
+      "learning_rate": 1.4615384615384615e-05,
+      "loss": 0.0335,
+      "step": 1250
+    },
+    {
+      "epoch": 1.1882998171846435,
+      "grad_norm": 0.07936646789312363,
+      "learning_rate": 1.4255930984902949e-05,
+      "loss": 0.0479,
+      "step": 1300
+    },
+    {
+      "epoch": 1.2340036563071297,
+      "grad_norm": 0.10548417270183563,
+      "learning_rate": 1.389647735442128e-05,
+      "loss": 0.0481,
+      "step": 1350
+    },
+    {
+      "epoch": 1.2797074954296161,
+      "grad_norm": 0.015461038798093796,
+      "learning_rate": 1.3537023723939613e-05,
+      "loss": 0.0302,
+      "step": 1400
+    },
+    {
+      "epoch": 1.3254113345521024,
+      "grad_norm": 0.03913908079266548,
+      "learning_rate": 1.3177570093457945e-05,
+      "loss": 0.0196,
+      "step": 1450
+    },
+    {
+      "epoch": 1.3711151736745886,
+      "grad_norm": 0.0657438263297081,
+      "learning_rate": 1.2818116462976278e-05,
+      "loss": 0.07,
+      "step": 1500
+    },
+    {
+      "epoch": 1.416819012797075,
+      "grad_norm": 0.08092936873435974,
+      "learning_rate": 1.245866283249461e-05,
+      "loss": 0.0372,
+      "step": 1550
+    },
+    {
+      "epoch": 1.4625228519195612,
+      "grad_norm": 0.019851330667734146,
+      "learning_rate": 1.209920920201294e-05,
+      "loss": 0.0337,
+      "step": 1600
+    },
+    {
+      "epoch": 1.5082266910420477,
+      "grad_norm": 0.013996358960866928,
+      "learning_rate": 1.1739755571531272e-05,
+      "loss": 0.038,
+      "step": 1650
+    },
+    {
+      "epoch": 1.5539305301645339,
+      "grad_norm": 0.011369767598807812,
+      "learning_rate": 1.1380301941049606e-05,
+      "loss": 0.0281,
+      "step": 1700
+    },
+    {
+      "epoch": 1.59963436928702,
+      "grad_norm": 0.07967428863048553,
+      "learning_rate": 1.1020848310567938e-05,
+      "loss": 0.0426,
+      "step": 1750
+    },
+    {
+      "epoch": 1.6453382084095063,
+      "grad_norm": 0.005350353196263313,
+      "learning_rate": 1.066139468008627e-05,
+      "loss": 0.0334,
+      "step": 1800
+    },
+    {
+      "epoch": 1.6910420475319927,
+      "grad_norm": 0.007268950808793306,
+      "learning_rate": 1.0301941049604602e-05,
+      "loss": 0.0341,
+      "step": 1850
+    },
+    {
+      "epoch": 1.736745886654479,
+      "grad_norm": 0.007129556033760309,
+      "learning_rate": 9.942487419122934e-06,
+      "loss": 0.0139,
+      "step": 1900
+    },
+    {
+      "epoch": 1.7824497257769654,
+      "grad_norm": 1.3157267570495605,
+      "learning_rate": 9.583033788641266e-06,
+      "loss": 0.0412,
+      "step": 1950
+    },
+    {
+      "epoch": 1.8281535648994516,
+      "grad_norm": 6.9985222816467285,
+      "learning_rate": 9.223580158159599e-06,
+      "loss": 0.0383,
+      "step": 2000
+    },
+    {
+      "epoch": 1.8738574040219378,
+      "grad_norm": 0.008648707531392574,
+      "learning_rate": 8.86412652767793e-06,
+      "loss": 0.0308,
+      "step": 2050
+    },
+    {
+      "epoch": 1.919561243144424,
+      "grad_norm": 11.036811828613281,
+      "learning_rate": 8.504672897196263e-06,
+      "loss": 0.0444,
+      "step": 2100
+    },
+    {
+      "epoch": 1.9652650822669104,
+      "grad_norm": 0.005460981745272875,
+      "learning_rate": 8.145219266714595e-06,
+      "loss": 0.0288,
+      "step": 2150
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9836,
+      "eval_f1": 0.9396170839469808,
+      "eval_loss": 0.08339423686265945,
+      "eval_runtime": 28.9448,
+      "eval_samples_per_second": 86.371,
+      "eval_steps_per_second": 5.424,
+      "step": 2188
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3282,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9208886937600000.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/bert_bilabel_finetuned_model/checkpoint-2188/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:525b07a66e715289db75a841e0609901e3ee221ba4268c678c362a7bbb781388
+size 5137

outputs/bert_bilabel_finetuned_model/checkpoint-3282/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "problem_type": "single_label_classification",
+  "transformers_version": "5.0.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 21128
+}

outputs/bert_bilabel_finetuned_model/checkpoint-3282/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b58a46568ff450837fbf3ee0f51fa89fd82a450959464b503f893036b86b5a01
+size 409100240

outputs/bert_bilabel_finetuned_model/checkpoint-3282/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb1ea6fc0ae5e09b8a3be646658110ca41c0e6fc08b68cab2ddeb74c0ae82d38
+size 818324875

outputs/bert_bilabel_finetuned_model/checkpoint-3282/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5412faccf347e6ccc0399ab61829229374cd91c9d7662d44fcb0bb456d151a0d
+size 14645

outputs/bert_bilabel_finetuned_model/checkpoint-3282/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73208a74cd8690df2f7dd0a81633cdfde37ecda9e183b1a86782b8075ba454d0
+size 1465

outputs/bert_bilabel_finetuned_model/checkpoint-3282/trainer_state.json ADDED Viewed

	@@ -0,0 +1,519 @@

+{
+  "best_global_step": 3282,
+  "best_metric": 0.9413489736070382,
+  "best_model_checkpoint": "./bert_finetuned_model/checkpoint-3282",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 3282,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.04570383912248629,
+      "grad_norm": 6.597176551818848,
+      "learning_rate": 1.9600000000000003e-06,
+      "loss": 0.8315,
+      "step": 50
+    },
+    {
+      "epoch": 0.09140767824497258,
+      "grad_norm": 4.10335636138916,
+      "learning_rate": 3.96e-06,
+      "loss": 0.403,
+      "step": 100
+    },
+    {
+      "epoch": 0.13711151736745886,
+      "grad_norm": 5.460880756378174,
+      "learning_rate": 5.9600000000000005e-06,
+      "loss": 0.2138,
+      "step": 150
+    },
+    {
+      "epoch": 0.18281535648994515,
+      "grad_norm": 1.7257156372070312,
+      "learning_rate": 7.960000000000002e-06,
+      "loss": 0.0675,
+      "step": 200
+    },
+    {
+      "epoch": 0.22851919561243145,
+      "grad_norm": 0.3548933267593384,
+      "learning_rate": 9.960000000000001e-06,
+      "loss": 0.0887,
+      "step": 250
+    },
+    {
+      "epoch": 0.2742230347349177,
+      "grad_norm": 0.07574323564767838,
+      "learning_rate": 1.196e-05,
+      "loss": 0.0625,
+      "step": 300
+    },
+    {
+      "epoch": 0.31992687385740404,
+      "grad_norm": 0.27218344807624817,
+      "learning_rate": 1.396e-05,
+      "loss": 0.0909,
+      "step": 350
+    },
+    {
+      "epoch": 0.3656307129798903,
+      "grad_norm": 0.07252885401248932,
+      "learning_rate": 1.5960000000000003e-05,
+      "loss": 0.0388,
+      "step": 400
+    },
+    {
+      "epoch": 0.4113345521023766,
+      "grad_norm": 1.1499181985855103,
+      "learning_rate": 1.796e-05,
+      "loss": 0.0955,
+      "step": 450
+    },
+    {
+      "epoch": 0.4570383912248629,
+      "grad_norm": 13.650275230407715,
+      "learning_rate": 1.9960000000000002e-05,
+      "loss": 0.0869,
+      "step": 500
+    },
+    {
+      "epoch": 0.5027422303473492,
+      "grad_norm": 11.625408172607422,
+      "learning_rate": 1.9647735442127967e-05,
+      "loss": 0.0851,
+      "step": 550
+    },
+    {
+      "epoch": 0.5484460694698354,
+      "grad_norm": 0.3337002992630005,
+      "learning_rate": 1.92882818116463e-05,
+      "loss": 0.103,
+      "step": 600
+    },
+    {
+      "epoch": 0.5941499085923218,
+      "grad_norm": 7.300892353057861,
+      "learning_rate": 1.892882818116463e-05,
+      "loss": 0.082,
+      "step": 650
+    },
+    {
+      "epoch": 0.6398537477148081,
+      "grad_norm": 0.24430198967456818,
+      "learning_rate": 1.8569374550682964e-05,
+      "loss": 0.0711,
+      "step": 700
+    },
+    {
+      "epoch": 0.6855575868372943,
+      "grad_norm": 15.26744270324707,
+      "learning_rate": 1.8209920920201294e-05,
+      "loss": 0.0737,
+      "step": 750
+    },
+    {
+      "epoch": 0.7312614259597806,
+      "grad_norm": 0.24188373982906342,
+      "learning_rate": 1.7850467289719628e-05,
+      "loss": 0.0668,
+      "step": 800
+    },
+    {
+      "epoch": 0.7769652650822669,
+      "grad_norm": 0.1296696811914444,
+      "learning_rate": 1.7491013659237958e-05,
+      "loss": 0.0537,
+      "step": 850
+    },
+    {
+      "epoch": 0.8226691042047533,
+      "grad_norm": 0.13343055546283722,
+      "learning_rate": 1.7131560028756292e-05,
+      "loss": 0.0785,
+      "step": 900
+    },
+    {
+      "epoch": 0.8683729433272395,
+      "grad_norm": 4.3099517822265625,
+      "learning_rate": 1.6772106398274622e-05,
+      "loss": 0.1045,
+      "step": 950
+    },
+    {
+      "epoch": 0.9140767824497258,
+      "grad_norm": 0.024240005761384964,
+      "learning_rate": 1.6412652767792956e-05,
+      "loss": 0.023,
+      "step": 1000
+    },
+    {
+      "epoch": 0.9597806215722121,
+      "grad_norm": 1.5524265766143799,
+      "learning_rate": 1.605319913731129e-05,
+      "loss": 0.0541,
+      "step": 1050
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.984,
+      "eval_f1": 0.9395770392749244,
+      "eval_loss": 0.06908556073904037,
+      "eval_runtime": 28.0922,
+      "eval_samples_per_second": 88.993,
+      "eval_steps_per_second": 5.589,
+      "step": 1094
+    },
+    {
+      "epoch": 1.0054844606946984,
+      "grad_norm": 0.1564575880765915,
+      "learning_rate": 1.569374550682962e-05,
+      "loss": 0.066,
+      "step": 1100
+    },
+    {
+      "epoch": 1.0511882998171846,
+      "grad_norm": 0.014012756757438183,
+      "learning_rate": 1.5334291876347953e-05,
+      "loss": 0.0309,
+      "step": 1150
+    },
+    {
+      "epoch": 1.0968921389396709,
+      "grad_norm": 0.023974154144525528,
+      "learning_rate": 1.4974838245866285e-05,
+      "loss": 0.0341,
+      "step": 1200
+    },
+    {
+      "epoch": 1.1425959780621573,
+      "grad_norm": 0.013898388482630253,
+      "learning_rate": 1.4615384615384615e-05,
+      "loss": 0.0335,
+      "step": 1250
+    },
+    {
+      "epoch": 1.1882998171846435,
+      "grad_norm": 0.07936646789312363,
+      "learning_rate": 1.4255930984902949e-05,
+      "loss": 0.0479,
+      "step": 1300
+    },
+    {
+      "epoch": 1.2340036563071297,
+      "grad_norm": 0.10548417270183563,
+      "learning_rate": 1.389647735442128e-05,
+      "loss": 0.0481,
+      "step": 1350
+    },
+    {
+      "epoch": 1.2797074954296161,
+      "grad_norm": 0.015461038798093796,
+      "learning_rate": 1.3537023723939613e-05,
+      "loss": 0.0302,
+      "step": 1400
+    },
+    {
+      "epoch": 1.3254113345521024,
+      "grad_norm": 0.03913908079266548,
+      "learning_rate": 1.3177570093457945e-05,
+      "loss": 0.0196,
+      "step": 1450
+    },
+    {
+      "epoch": 1.3711151736745886,
+      "grad_norm": 0.0657438263297081,
+      "learning_rate": 1.2818116462976278e-05,
+      "loss": 0.07,
+      "step": 1500
+    },
+    {
+      "epoch": 1.416819012797075,
+      "grad_norm": 0.08092936873435974,
+      "learning_rate": 1.245866283249461e-05,
+      "loss": 0.0372,
+      "step": 1550
+    },
+    {
+      "epoch": 1.4625228519195612,
+      "grad_norm": 0.019851330667734146,
+      "learning_rate": 1.209920920201294e-05,
+      "loss": 0.0337,
+      "step": 1600
+    },
+    {
+      "epoch": 1.5082266910420477,
+      "grad_norm": 0.013996358960866928,
+      "learning_rate": 1.1739755571531272e-05,
+      "loss": 0.038,
+      "step": 1650
+    },
+    {
+      "epoch": 1.5539305301645339,
+      "grad_norm": 0.011369767598807812,
+      "learning_rate": 1.1380301941049606e-05,
+      "loss": 0.0281,
+      "step": 1700
+    },
+    {
+      "epoch": 1.59963436928702,
+      "grad_norm": 0.07967428863048553,
+      "learning_rate": 1.1020848310567938e-05,
+      "loss": 0.0426,
+      "step": 1750
+    },
+    {
+      "epoch": 1.6453382084095063,
+      "grad_norm": 0.005350353196263313,
+      "learning_rate": 1.066139468008627e-05,
+      "loss": 0.0334,
+      "step": 1800
+    },
+    {
+      "epoch": 1.6910420475319927,
+      "grad_norm": 0.007268950808793306,
+      "learning_rate": 1.0301941049604602e-05,
+      "loss": 0.0341,
+      "step": 1850
+    },
+    {
+      "epoch": 1.736745886654479,
+      "grad_norm": 0.007129556033760309,
+      "learning_rate": 9.942487419122934e-06,
+      "loss": 0.0139,
+      "step": 1900
+    },
+    {
+      "epoch": 1.7824497257769654,
+      "grad_norm": 1.3157267570495605,
+      "learning_rate": 9.583033788641266e-06,
+      "loss": 0.0412,
+      "step": 1950
+    },
+    {
+      "epoch": 1.8281535648994516,
+      "grad_norm": 6.9985222816467285,
+      "learning_rate": 9.223580158159599e-06,
+      "loss": 0.0383,
+      "step": 2000
+    },
+    {
+      "epoch": 1.8738574040219378,
+      "grad_norm": 0.008648707531392574,
+      "learning_rate": 8.86412652767793e-06,
+      "loss": 0.0308,
+      "step": 2050
+    },
+    {
+      "epoch": 1.919561243144424,
+      "grad_norm": 11.036811828613281,
+      "learning_rate": 8.504672897196263e-06,
+      "loss": 0.0444,
+      "step": 2100
+    },
+    {
+      "epoch": 1.9652650822669104,
+      "grad_norm": 0.005460981745272875,
+      "learning_rate": 8.145219266714595e-06,
+      "loss": 0.0288,
+      "step": 2150
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9836,
+      "eval_f1": 0.9396170839469808,
+      "eval_loss": 0.08339423686265945,
+      "eval_runtime": 28.9448,
+      "eval_samples_per_second": 86.371,
+      "eval_steps_per_second": 5.424,
+      "step": 2188
+    },
+    {
+      "epoch": 2.010968921389397,
+      "grad_norm": 0.8983257412910461,
+      "learning_rate": 7.785765636232927e-06,
+      "loss": 0.0354,
+      "step": 2200
+    },
+    {
+      "epoch": 2.056672760511883,
+      "grad_norm": 0.10194671899080276,
+      "learning_rate": 7.426312005751259e-06,
+      "loss": 0.0195,
+      "step": 2250
+    },
+    {
+      "epoch": 2.1023765996343693,
+      "grad_norm": 3.3761022090911865,
+      "learning_rate": 7.066858375269591e-06,
+      "loss": 0.0074,
+      "step": 2300
+    },
+    {
+      "epoch": 2.1480804387568555,
+      "grad_norm": 0.0022166408598423004,
+      "learning_rate": 6.707404744787923e-06,
+      "loss": 0.0116,
+      "step": 2350
+    },
+    {
+      "epoch": 2.1937842778793417,
+      "grad_norm": 0.007358817849308252,
+      "learning_rate": 6.347951114306255e-06,
+      "loss": 0.0038,
+      "step": 2400
+    },
+    {
+      "epoch": 2.2394881170018284,
+      "grad_norm": 0.004738911986351013,
+      "learning_rate": 5.988497483824587e-06,
+      "loss": 0.0224,
+      "step": 2450
+    },
+    {
+      "epoch": 2.2851919561243146,
+      "grad_norm": 0.003663586685433984,
+      "learning_rate": 5.629043853342919e-06,
+      "loss": 0.0122,
+      "step": 2500
+    },
+    {
+      "epoch": 2.330895795246801,
+      "grad_norm": 0.010519472882151604,
+      "learning_rate": 5.269590222861252e-06,
+      "loss": 0.0081,
+      "step": 2550
+    },
+    {
+      "epoch": 2.376599634369287,
+      "grad_norm": 0.007029661443084478,
+      "learning_rate": 4.910136592379584e-06,
+      "loss": 0.0302,
+      "step": 2600
+    },
+    {
+      "epoch": 2.422303473491773,
+      "grad_norm": 0.011014117859303951,
+      "learning_rate": 4.550682961897916e-06,
+      "loss": 0.0195,
+      "step": 2650
+    },
+    {
+      "epoch": 2.4680073126142594,
+      "grad_norm": 0.006674727890640497,
+      "learning_rate": 4.191229331416248e-06,
+      "loss": 0.0094,
+      "step": 2700
+    },
+    {
+      "epoch": 2.5137111517367456,
+      "grad_norm": 0.011101804673671722,
+      "learning_rate": 3.8317757009345796e-06,
+      "loss": 0.0292,
+      "step": 2750
+    },
+    {
+      "epoch": 2.5594149908592323,
+      "grad_norm": 0.032363053411245346,
+      "learning_rate": 3.472322070452912e-06,
+      "loss": 0.0074,
+      "step": 2800
+    },
+    {
+      "epoch": 2.6051188299817185,
+      "grad_norm": 0.0015758282970637083,
+      "learning_rate": 3.112868439971244e-06,
+      "loss": 0.0076,
+      "step": 2850
+    },
+    {
+      "epoch": 2.6508226691042047,
+      "grad_norm": 0.0019405486527830362,
+      "learning_rate": 2.753414809489576e-06,
+      "loss": 0.0178,
+      "step": 2900
+    },
+    {
+      "epoch": 2.696526508226691,
+      "grad_norm": 0.002395535819232464,
+      "learning_rate": 2.393961179007908e-06,
+      "loss": 0.0163,
+      "step": 2950
+    },
+    {
+      "epoch": 2.742230347349177,
+      "grad_norm": 0.051430843770504,
+      "learning_rate": 2.0345075485262404e-06,
+      "loss": 0.0281,
+      "step": 3000
+    },
+    {
+      "epoch": 2.787934186471664,
+      "grad_norm": 0.002579999854788184,
+      "learning_rate": 1.6750539180445723e-06,
+      "loss": 0.0241,
+      "step": 3050
+    },
+    {
+      "epoch": 2.83363802559415,
+      "grad_norm": 0.00829145684838295,
+      "learning_rate": 1.3156002875629045e-06,
+      "loss": 0.0229,
+      "step": 3100
+    },
+    {
+      "epoch": 2.8793418647166362,
+      "grad_norm": 0.003575286827981472,
+      "learning_rate": 9.561466570812366e-07,
+      "loss": 0.0016,
+      "step": 3150
+    },
+    {
+      "epoch": 2.9250457038391224,
+      "grad_norm": 0.00501601118594408,
+      "learning_rate": 5.966930265995687e-07,
+      "loss": 0.0069,
+      "step": 3200
+    },
+    {
+      "epoch": 2.9707495429616086,
+      "grad_norm": 0.01910424418747425,
+      "learning_rate": 2.3723939611790082e-07,
+      "loss": 0.016,
+      "step": 3250
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.984,
+      "eval_f1": 0.9413489736070382,
+      "eval_loss": 0.08269735425710678,
+      "eval_runtime": 27.2195,
+      "eval_samples_per_second": 91.846,
+      "eval_steps_per_second": 5.768,
+      "step": 3282
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 3282,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.38133304064e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/bert_bilabel_finetuned_model/checkpoint-3282/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:525b07a66e715289db75a841e0609901e3ee221ba4268c678c362a7bbb781388
+size 5137

outputs/bert_bilabel_finetuned_model/final/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "problem_type": "single_label_classification",
+  "transformers_version": "5.0.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 21128
+}

outputs/bert_bilabel_finetuned_model/final/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b58a46568ff450837fbf3ee0f51fa89fd82a450959464b503f893036b86b5a01
+size 409100240

outputs/bert_bilabel_finetuned_model/final/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:525b07a66e715289db75a841e0609901e3ee221ba4268c678c362a7bbb781388
+size 5137

outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "directionality": "bidi",
+  "dtype": "float32",
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooler_fc_size": 768,
+  "pooler_num_attention_heads": 12,
+  "pooler_num_fc_layers": 3,
+  "pooler_size_per_head": 128,
+  "pooler_type": "first_token_transform",
+  "problem_type": "single_label_classification",
+  "transformers_version": "5.0.0.dev0",
+  "type_vocab_size": 2,
+  "use_cache": false,
+  "vocab_size": 21128
+}

outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9097c32c49180f4aa454c161c6d4f7836309cae1da6f9b0999742b6126e974a1
+size 409100240

outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:364f58fb6b7311d72cc67732e12056c8397d13aab84128e44fccc9a4f96440a9
+size 15597

outputs/bert_bilabel_frozen_classifier_finetuned_model/checkpoint-1094/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7df46a9f83e371cdeb326e2171479963c0b2372be2b82e7056ff56b48e5999c
+size 14645