CIS5190GoGo
/

CustomModel

Safetensors

roberta

Model card Files Files and versions

xet

Community

Jiayi05 commited on Dec 16, 2024

Commit

b275ea2

verified ·

1 Parent(s): 4cfa7c5

Update README.md

Browse files

Files changed (1) hide show

README.md +22 -13

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ The following code load and test the models on colab notebook.
 ---
-## Prerequisites
 1. Import the required Python packages:
@@ -28,10 +28,11 @@ from huggingface_hub import login
 login("Replace with the key")
 ```
-# Define the preprocessing and dataset class
-1. Run the following preprocessing code
 class NewsDataset(Dataset):
     def __init__(self, texts, labels, tokenizer, max_len=128):
         self.texts = texts
@@ -75,33 +76,41 @@ def preprocess_text(text):
     text = text.lower()
     text = ' '.join(text.split())
     return text
-# Step 1: Load the model and tokenizer from Hugging Face Hub
 print("Loading model and tokenizer...")
-REPO_NAME = "CIS5190GoGo/CustomModel"  # Replace with your repo name on Hugging Face Hub
 model = RobertaForSequenceClassification.from_pretrained(REPO_NAME)
 tokenizer = RobertaTokenizer.from_pretrained(REPO_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 print("Model and tokenizer loaded successfully!")
-# Step 2: Load test dataset
 print("Loading test data...")
 test_data_path = "/content/drive/MyDrive/5190_project/test_data_random_subset.csv"  # Replace with your test set path
 test_data = pd.read_csv(test_data_path)
-# Preprocess test data
 X_test = test_data['title'].apply(preprocess_text).values
 y_test = test_data['labels'].values
-# Step 3: Prepare the dataset and dataloader
 test_dataset = NewsDataset(X_test, y_test, tokenizer)
 test_loader = DataLoader(test_dataset, batch_size=16, num_workers=2)
-# Step 4: Evaluate the model
 print("Evaluating the model...")
 model.eval()
 all_preds, all_labels = [], []
@@ -118,6 +127,6 @@ with torch.no_grad():
         all_preds.extend(preds.cpu().numpy())
         all_labels.extend(labels.cpu().numpy())
-# Step 5: Calculate accuracy
 accuracy = accuracy_score(all_labels, all_preds)
-print(f"Test Accuracy: {accuracy:.4f}")

 ---
+# Step 1: Prerequisites
 1. Import the required Python packages:
 login("Replace with the key")
 ```
+# Step 2: Define the preprocessing and dataset class
+1. Run the following class and functions designed to preprocess the test data
+```python
 class NewsDataset(Dataset):
     def __init__(self, texts, labels, tokenizer, max_len=128):
         self.texts = texts
     text = text.lower()
     text = ' '.join(text.split())
     return text
+```
+# Step 3: Load the model and tokenizer from Hugging Face Hub
+```python
 print("Loading model and tokenizer...")
+REPO_NAME = "CIS5190GoGo/CustomModel" #This is where we pushed the model to
 model = RobertaForSequenceClassification.from_pretrained(REPO_NAME)
 tokenizer = RobertaTokenizer.from_pretrained(REPO_NAME)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model.to(device)
 print("Model and tokenizer loaded successfully!")
+```
+# Step 4: Load test dataset
+```python
 print("Loading test data...")
 test_data_path = "/content/drive/MyDrive/5190_project/test_data_random_subset.csv"  # Replace with your test set path
 test_data = pd.read_csv(test_data_path)
+```
+# Step 5: Preprocess test data
+```python
 X_test = test_data['title'].apply(preprocess_text).values
 y_test = test_data['labels'].values
+```
+# Step 6: Prepare the dataset and dataloader
+```python
 test_dataset = NewsDataset(X_test, y_test, tokenizer)
 test_loader = DataLoader(test_dataset, batch_size=16, num_workers=2)
+```
+# Step 7: Evaluate the model and calculate accuracy
+```python
 print("Evaluating the model...")
 model.eval()
 all_preds, all_labels = [], []
         all_preds.extend(preds.cpu().numpy())
         all_labels.extend(labels.cpu().numpy())
 accuracy = accuracy_score(all_labels, all_preds)
+print(f"Test Accuracy: {accuracy:.4f}")
+```