Spaces:

danielle2003
/

sentiment

Build error

danielle2003 commited on Mar 20, 2025

Commit

2b3de4f

1 Parent(s): 81a820f

adding scripts'

Files changed (3) hide show

scripts/evaluate.py ADDED Viewed

+from transformers import pipeline
+from datasets import load_dataset
+from sklearn.metrics import accuracy_score, f1_score
+# Load dataset
+dataset = load_dataset("allocine")["test"]
+# Load model
+classifier = pipeline("text-classification", model="./models")
+# Get predictions
+predictions = [classifier(text["review"])[0]["label"] for text in dataset]
+labels = dataset["label"]
+# Convert labels
+label_map = {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}
+predictions = [label_map[p] for p in predictions]
+# Compute metrics
+accuracy = accuracy_score(labels, predictions)
+f1 = f1_score(labels, predictions, average="weighted")
+print(f"Accuracy: {accuracy:.4f}")
+print(f"F1-score: {f1:.4f}")

scripts/test.py ADDED Viewed

+import unittest
+from transformers import pipeline
+classifier = pipeline("text-classification", model="./models")
+class TestModel(unittest.TestCase):
+    def test_positive_sentiment(self):
+        result = classifier("I love this product!")[0]
+        self.assertIn(result["label"], ["LABEL_0", "LABEL_1", "LABEL_2"])
+    def test_negative_sentiment(self):
+        result = classifier("This is terrible, I hate it.")[0]
+        self.assertIn(result["label"], ["LABEL_0", "LABEL_1", "LABEL_2"])
+if __name__ == "__main__":
+    unittest.main()

scripts/train.py ADDED Viewed

+from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
+from datasets import load_dataset
+# Load dataset (French dataset example: Allociné)
+dataset = load_dataset("allocine")
+# Load tokenizer
+model_name = "distilbert-base-multilingual-cased"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+# Tokenize data
+def tokenize(batch):
+    return tokenizer(batch["review"], padding="max_length", truncation=True)
+dataset = dataset.map(tokenize, batched=True)
+# Load model
+model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="./models",
+    per_device_train_batch_size=8,
+    num_train_epochs=3,
+    evaluation_strategy="epoch",
+    save_steps=1000,
+    load_best_model_at_end=True,
+)
+# Trainer setup
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=dataset["train"],
+    eval_dataset=dataset["test"],
+)
+# Train model
+trainer.train()
+# Save model
+model.save_pretrained("./models")
+tokenizer.save_pretrained("./models")