Spaces:

ogtega
/

datamining-project

Sleeping

Teslim Olunlade commited on Apr 27, 2023

Commit

ede725f

1 Parent(s): 5b67bcb

Fixed trained model

Files changed (1) hide show

app/train.py CHANGED Viewed

@@ -11,6 +11,7 @@ from transformers import (
     TFAutoModelForSequenceClassification,
 )
 output_dir = "out/model"
 checkpoint_path = "out/cp.ckpt"
@@ -19,7 +20,7 @@ label2id = {label: id for id, label in enumerate(labels)}
 id2label = {id: label for id, label in enumerate(labels)}
 dataset = load_dataset("csv", data_files="train.csv")
-tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 def process_data(row):
@@ -40,7 +41,7 @@ def process_data(row):
 model = TFAutoModelForSequenceClassification.from_pretrained(
-    "bert-base-uncased",
     problem_type="multi_label_classification",
     num_labels=len(labels),
     label2id=label2id,
@@ -67,5 +68,5 @@ push_to_hub_callback = PushToHubCallback(
     hub_model_id="ogtega/tweet-toxicity-classifier",
 )
-model.compile(optimizer=Adam(3e-5), loss="categorical_crossentropy")
 model.fit(tf_dataset, callbacks=[cp_callback, push_to_hub_callback])

     TFAutoModelForSequenceClassification,
 )
+base_model = "distilbert-base-uncased"
 output_dir = "out/model"
 checkpoint_path = "out/cp.ckpt"
 id2label = {id: label for id, label in enumerate(labels)}
 dataset = load_dataset("csv", data_files="train.csv")
+tokenizer = AutoTokenizer.from_pretrained(base_model)
 def process_data(row):
 model = TFAutoModelForSequenceClassification.from_pretrained(
+    base_model,
     problem_type="multi_label_classification",
     num_labels=len(labels),
     label2id=label2id,
     hub_model_id="ogtega/tweet-toxicity-classifier",
 )
+model.compile(optimizer=Adam(3e-5), loss="BinaryCrossentropy")
 model.fit(tf_dataset, callbacks=[cp_callback, push_to_hub_callback])