RRPATEL228
/

test_repo

Model card Files Files and versions

xet

Community

RRPATEL228 commited on Jul 30, 2025

Commit

0b68e9f

verified ·

1 Parent(s): eb353ff

Upload hugging.py

Browse files

Files changed (1) hide show

hugging.py +232 -0

hugging.py ADDED Viewed

	@@ -0,0 +1,232 @@

+# -*- coding: utf-8 -*-
+"""hugging.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/1L3wB_9pZG9AWiAlibB_lGeZkfea-BqTW
+"""
+!pip install transformers
+!pip install huggingface_hub
+# Install the Hugging Face CLI
+!pip install -U "huggingface_hub[cli]"
+from google.colab import userdata
+userdata.get('HF_READ')
+HF_READ_TOKEN = userdata.get('HF_READ')
+!git config --global credential.helper store
+!hf auth logout  # clear old/invalid token
+!hf auth login
+!hf auth whoami
+from huggingface_hub import notebook_login
+notebook_login()
+!pip install --upgrade huggingface_hub
+!hf upload hf://datasets/Anthropic/EconomicIndex/release_2025_03_27/automation_vs_augmentation_by_task.csv.csv --repo https://huggingface.co/datasets/RRPATEL228/repo
+mkdir -p RRPATEL228/test_repo
+get_ipython().system('echo "Test upload" > RRPATEL228/test_repo/README.md')
+!hf upload RRPATEL228/test_repo --repo RRPATEL228/test_repo
+pip install llama-stack
+pip install llama-stack -U
+!llama model list
+from huggingface_hub import notebook_login
+notebook_login()
+from datasets import load_dataset
+from huggingface_hub import hf_hub_download
+import os
+# Define the repository ID and filename
+repo_id = "RRPATEL228/test_repo"
+filename = "Customer_Attributes_and_Purchase_Propensity.csv"
+# Download the file
+file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+# Load the dataset from the downloaded file
+ds = load_dataset("csv", data_files=file_path)
+display(ds)
+ds['train'].shape
+display(ds['train'].features)
+ds
+small_train = ds["train"].shuffle(seed=42).select(range(100))
+small_eval = ds["train"].shuffle(seed=42).select(range(100, 200)) # Selecting a different range for evaluation set
+small_train
+small_eval
+from transformers import Trainer, TrainingArguments
+import torch
+from torch.utils.data import Dataset # Import Dataset base class
+class CustomDataset(Dataset): # Inherit from torch.utils.data.Dataset
+    def __init__(self, data):
+        # Assuming 'Score' is the feature and 'Purchased' is the label
+        self.X = torch.tensor(data['Score'], dtype=torch.float32).unsqueeze(1) # Add unsqueeze(1) to make it a 2D tensor
+        self.y = torch.tensor(data['Purchased'], dtype=torch.long)
+    def __len__(self):
+        return len(self.y)
+    def __getitem__(self, idx):
+        return {'input': self.X[idx], 'label': self.y[idx]}
+train_dataset = CustomDataset(small_train)
+eval_dataset = CustomDataset(small_eval)
+import torch.nn as nn
+import torch
+class TabularMLP(nn.Module):
+    def __init__(self, input_dim, num_classes):
+        super().__init__()
+        self.network = nn.Sequential(
+            nn.Linear(input_dim, 64),
+            nn.ReLU(),
+            nn.Linear(64, num_classes),
+        )
+        self.loss_fct = nn.CrossEntropyLoss() # Define loss function
+    def forward(self, input, labels=None): # Accept 'input' and 'labels'
+        logits = self.network(input)
+        loss = None
+        if labels is not None:
+            loss = self.loss_fct(logits.view(-1, self.network[-1].out_features), labels.view(-1)) # Calculate loss
+        return {"loss": loss, "logits": logits} # Return loss and logits in a dictionary
+import torch
+import torch.nn as nn
+training_args = TrainingArguments(
+    output_dir='./results',
+    num_train_epochs=10,
+    per_device_train_batch_size=32,
+    # evaluation_strategy="epoch" # Removed the unexpected argument
+)
+# Initialize the model
+input_dim = train_dataset.X.shape[1] # Get input dimension from the dataset
+num_classes = len(torch.unique(train_dataset.y)) # Get number of classes from the dataset
+model = TabularMLP(input_dim=input_dim, num_classes=num_classes)
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir="Purchased_data",
+    learning_rate=2e-5,
+    per_device_train_batch_size=8,
+    per_device_eval_batch_size=8,
+    num_train_epochs=2,
+    push_to_hub=True,
+)
+from huggingface_hub import notebook_login # Corrected import
+notebook_login()
+!hf auth login
+import wandb
+wandb.init(project="huggingface")  # replace with your project name
+from transformers import TrainingArguments
+training_args = TrainingArguments(
+    output_dir='./results',
+    # evaluation_strategy="epoch",   # Removed the unexpected argument
+    logging_dir='./logs',
+    # logging_strategy="steps",      # Removed for consistency
+    logging_steps=10,
+    report_to="wandb",             # IMPORTANT: enables wandb logging
+    save_strategy="epoch",
+    per_device_train_batch_size=32,
+    per_device_eval_batch_size=32,
+    num_train_epochs=3,
+)
+trainer.evaluate()
+predictions = trainer.predict(eval_dataset)
+print(predictions.predictions)
+print(predictions.label_ids)
+import numpy as np
+from transformers import EvalPrediction
+import evaluate # Using the evaluate library for metrics
+# Load accuracy metric
+accuracy_metric = evaluate.load("accuracy")
+def compute_metrics(p: EvalPrediction):
+    # Get predicted labels by finding the class with the highest logit
+    predictions = np.argmax(p.predictions, axis=1)
+    # Compute accuracy
+    accuracy = accuracy_metric.compute(predictions=predictions, references=p.label_ids)
+    return accuracy
+!pip install evaluate
+from transformers import Trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=train_dataset,
+    eval_dataset=eval_dataset,
+    compute_metrics=compute_metrics, # Assuming compute_metrics is defined and needed
+)
+trainer.train()
+import wandb
+import matplotlib.pyplot as plt
+from sklearn.metrics import ConfusionMatrixDisplay
+# After evaluation step, e.g. after trainer.evaluate()
+eval_results = trainer.evaluate()
+# Log scalar metrics explicitly
+wandb.log(eval_results)
+# For logging confusion matrix or any plot
+def log_confusion_matrix(predictions, labels, class_names):
+    fig, ax = plt.subplots(figsize=(8,8))
+    ConfusionMatrixDisplay.from_predictions(labels, predictions, display_labels=class_names, ax=ax)
+    wandb.log({"confusion_matrix": wandb.Image(fig)})
+    plt.close(fig)
+# Example usage after predictions
+preds = trainer.predict(eval_dataset) # Corrected variable name
+predicted_labels = preds.predictions.argmax(axis=-1)
+true_labels = preds.label_ids
+log_confusion_matrix(predicted_labels, true_labels, class_names=['No','Yes'])