| | --- |
| | datasets: |
| | - ChaoticNeutrals/Synthetic-RP |
| | language: |
| | - en |
| | base_model: |
| | - mrcuddle/Tiny-DarkLlama-Chat |
| | --- |
| | --- |
| | # Model Card for Model ID |
| |
|
| | <!-- Provide a quick summary of what the model is/does. --> |
| |
|
| | This model card serves as a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1). |
| |
|
| | ## Model Details |
| |
|
| | <!-- Provide details about the model architecture, purpose, and intended use. --> |
| |
|
| | ## Training Details |
| |
|
| | ### Training Data |
| |
|
| | The model was trained on the `ChaoticNeutrals/Synthetic-Dark-RP` dataset. |
| |
|
| | ### Training Procedure |
| |
|
| | The model is trained using the SPIN (Self-Play with Imitation and Negotiation) implementation. |
| |
|
| | #### Load and Preprocess the Dataset |
| |
|
| | ```python |
| | import json |
| | import torch |
| | from torch.utils.data import Dataset, DataLoader |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | import torch.nn.functional as F |
| | from torch.optim import AdamW |
| | |
| | class ConversationDataset(Dataset): |
| | def __init__(self, data_path, tokenizer, max_length=512): |
| | with open(data_path, 'r') as file: |
| | data = json.load(file) |
| | |
| | self.examples = [] |
| | for entry in data: |
| | conversation = entry['conversations'] |
| | dialogue = "" |
| | for turn in conversation: |
| | speaker = turn['from'] |
| | message = turn['value'] |
| | dialogue += f"{speaker}: {message}\n" |
| | tokenized_input = tokenizer(dialogue, truncation=True, max_length=max_length, return_tensors="pt") |
| | self.examples.append(tokenized_input) |
| | |
| | def __len__(self): |
| | return len(self.examples) |
| | |
| | def __getitem__(self, idx): |
| | return self.examples[idx] |
| | |
| | # Initialize tokenizer and dataset |
| | tokenizer = AutoTokenizer.from_pretrained("model_name") |
| | dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer) |
| | dataloader = DataLoader(dataset, batch_size=3, shuffle=True) |
| | ``` |
| |
|
| | #### Define the SPIN Loss Function |
| |
|
| | ```python |
| | def spin_loss(model, opponent, inputs): |
| | outputs = model(**inputs, labels=inputs["input_ids"]) |
| | log_probs = F.log_softmax(outputs.logits, dim=-1) |
| | true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1) |
| | |
| | with torch.no_grad(): |
| | opponent_outputs = opponent(**inputs, labels=inputs["input_ids"]) |
| | opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1) |
| | opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1) |
| | |
| | loss = (true_log_probs - opponent_true_log_probs).mean() |
| | return loss |
| | ``` |
| |
|
| | #### Training Loop |
| |
|
| | ```python |
| | num_epochs = 10 |
| | learning_rate = 0.0002 |
| | |
| | # Load model |
| | model = AutoModelForCausalLM.from_pretrained("model_name") |
| | |
| | # Initialize the opponent model |
| | opponent = AutoModelForCausalLM.from_pretrained("model_name") |
| | opponent.load_state_dict(model.state_dict()) |
| | |
| | optimizer = AdamW(model.parameters(), lr=learning_rate) |
| | |
| | model.train() |
| | for epoch in range(num_epochs): |
| | for batch in dataloader: |
| | inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()} |
| | |
| | loss = spin_loss(model, opponent, inputs) |
| | |
| | optimizer.zero_grad() |
| | loss.backward() |
| | optimizer.step() |
| | |
| | opponent.load_state_dict(model.state_dict()) |
| | print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}") |
| | ``` |
| |
|
| | #### Save the Fine-Tuned Model |
| |
|
| | ```python |
| | model.save_pretrained("fine_tuned_gpt_neo_spin") |
| | tokenizer.save_pretrained("fine_tuned_gpt_neo_spin") |
| | ``` |
| |
|
| | ## Usage |
| |
|
| | To use the fine-tuned model: |
| |
|
| | ```python |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | |
| | model_path = "fine_tuned_gpt_neo_spin" |
| | model = AutoModelForCausalLM.from_pretrained(model_path) |
| | tokenizer = AutoTokenizer.from_pretrained(model_path) |
| | |
| | def generate_response(prompt): |
| | inputs = tokenizer(prompt, return_tensors="pt") |
| | outputs = model.generate(**inputs) |
| | return tokenizer.decode(outputs[0], skip_special_tokens=True) |
| | |
| | print(generate_response("Hello, how can I help you?")) |
| | ``` |
| |
|
| | ## Citation |
| |
|
| | If you use this model, please cite: |
| |
|
| | ``` |
| | @misc{model_id, |
| | author = {Your Name}, |
| | title = {Model ID}, |
| | year = {2025}, |
| | url = {https://huggingface.co/your-model-id} |
| | } |
| | ``` |