mrcuddle
/

Tiny-DarkLlama-SPIN-Implementation

Safetensors

English

llama

Model card Files Files and versions

xet

Community

mrcuddle commited on Feb 23, 2025

Commit

d3c53bc

verified ·

1 Parent(s): d063086

Update README.md

Browse files

Files changed (1) hide show

README.md +67 -20

README.md CHANGED Viewed

@@ -1,33 +1,39 @@
 ---
-# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
-# Doc / guide: https://huggingface.co/docs/hub/model-cards
-{}
----
 # Model Card for Model ID
 <!-- Provide a quick summary of what the model is/does. -->
-This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).
 ## Model Details
 ## Training Details
 ### Training Data
-ChaoticNeutrals/Synthetic-Dark-RP
 ### Training Procedure
-# Load and preprocess the dataset
 class ConversationDataset(Dataset):
     def __init__(self, data_path, tokenizer, max_length=512):
         with open(data_path, 'r') as file:
             data = json.load(file)
         self.examples = []
         for entry in data:
             conversation = entry['conversations']
@@ -44,33 +50,41 @@ class ConversationDataset(Dataset):
     def __getitem__(self, idx):
         return self.examples[idx]
-        # Create the dataset and dataloader
 dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer)
 dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
-# Define the SPIN loss function
 def spin_loss(model, opponent, inputs):
-    # Forward pass for the current model
     outputs = model(**inputs, labels=inputs["input_ids"])
     log_probs = F.log_softmax(outputs.logits, dim=-1)
     true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
-    # Forward pass for the opponent model
     with torch.no_grad():
         opponent_outputs = opponent(**inputs, labels=inputs["input_ids"])
         opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1)
         opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
-    # Compute SPIN loss
     loss = (true_log_probs - opponent_true_log_probs).mean()
     return loss
-# Training loop
 num_epochs = 10
 learning_rate = 0.0002
 # Initialize the opponent model
-opponent = AutoModelForCausalLM.from_pretrained(model_name)
 opponent.load_state_dict(model.state_dict())
 optimizer = AdamW(model.parameters(), lr=learning_rate)
@@ -80,18 +94,51 @@ for epoch in range(num_epochs):
     for batch in dataloader:
         inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()}
-        # Compute SPIN loss
         loss = spin_loss(model, opponent, inputs)
-        # Backpropagation
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
-    # Update the opponent model
     opponent.load_state_dict(model.state_dict())
     print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}")
-# Save the fine-tuned model
 model.save_pretrained("fine_tuned_gpt_neo_spin")
 tokenizer.save_pretrained("fine_tuned_gpt_neo_spin")

 ---
 # Model Card for Model ID
 <!-- Provide a quick summary of what the model is/does. -->
+This model card serves as a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).
 ## Model Details
+<!-- Provide details about the model architecture, purpose, and intended use. -->
 ## Training Details
 ### Training Data
+The model was trained on the `ChaoticNeutrals/Synthetic-Dark-RP` dataset.
 ### Training Procedure
+The model is trained using the SPIN (Self-Play with Imitation and Negotiation) implementation.
+#### Load and Preprocess the Dataset
+```python
+import json
+import torch
+from torch.utils.data import Dataset, DataLoader
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch.nn.functional as F
+from torch.optim import AdamW
 class ConversationDataset(Dataset):
     def __init__(self, data_path, tokenizer, max_length=512):
         with open(data_path, 'r') as file:
             data = json.load(file)
         self.examples = []
         for entry in data:
             conversation = entry['conversations']
     def __getitem__(self, idx):
         return self.examples[idx]
+# Initialize tokenizer and dataset
+tokenizer = AutoTokenizer.from_pretrained("model_name")
 dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer)
 dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
+```
+#### Define the SPIN Loss Function
+```python
 def spin_loss(model, opponent, inputs):
     outputs = model(**inputs, labels=inputs["input_ids"])
     log_probs = F.log_softmax(outputs.logits, dim=-1)
     true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
     with torch.no_grad():
         opponent_outputs = opponent(**inputs, labels=inputs["input_ids"])
         opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1)
         opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
     loss = (true_log_probs - opponent_true_log_probs).mean()
     return loss
+```
+#### Training Loop
+```python
 num_epochs = 10
 learning_rate = 0.0002
+# Load model
+model = AutoModelForCausalLM.from_pretrained("model_name")
 # Initialize the opponent model
+opponent = AutoModelForCausalLM.from_pretrained("model_name")
 opponent.load_state_dict(model.state_dict())
 optimizer = AdamW(model.parameters(), lr=learning_rate)
     for batch in dataloader:
         inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()}
         loss = spin_loss(model, opponent, inputs)
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
     opponent.load_state_dict(model.state_dict())
     print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}")
+```
+#### Save the Fine-Tuned Model
+```python
 model.save_pretrained("fine_tuned_gpt_neo_spin")
 tokenizer.save_pretrained("fine_tuned_gpt_neo_spin")
+```
+## Usage
+To use the fine-tuned model:
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model_path = "fine_tuned_gpt_neo_spin"
+model = AutoModelForCausalLM.from_pretrained(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model_path)
+def generate_response(prompt):
+    inputs = tokenizer(prompt, return_tensors="pt")
+    outputs = model.generate(**inputs)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+print(generate_response("Hello, how can I help you?"))
+```
+## Citation
+If you use this model, please cite:
+```
+@misc{model_id,
+  author = {Your Name},
+  title = {Model ID},
+  year = {2025},
+  url = {https://huggingface.co/your-model-id}
+}
+```