mrcuddle
/

Tiny-DarkLlama-SPIN-Implementation

+---
+# For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
+# Doc / guide: https://huggingface.co/docs/hub/model-cards
+{}
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).
+## Model Details
+## Training Details
+### Training Data
+ChaoticNeutrals/Synthetic-Dark-RP
+### Training Procedure
+# Load and preprocess the dataset
+class ConversationDataset(Dataset):
+    def __init__(self, data_path, tokenizer, max_length=512):
+        with open(data_path, 'r') as file:
+            data = json.load(file)
+        self.examples = []
+        for entry in data:
+            conversation = entry['conversations']
+            dialogue = ""
+            for turn in conversation:
+                speaker = turn['from']
+                message = turn['value']
+                dialogue += f"{speaker}: {message}\n"
+            tokenized_input = tokenizer(dialogue, truncation=True, max_length=max_length, return_tensors="pt")
+            self.examples.append(tokenized_input)
+    def __len__(self):
+        return len(self.examples)
+    def __getitem__(self, idx):
+        return self.examples[idx]
+        # Create the dataset and dataloader
+dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer)
+dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
+# Define the SPIN loss function
+def spin_loss(model, opponent, inputs):
+    # Forward pass for the current model
+    outputs = model(**inputs, labels=inputs["input_ids"])
+    log_probs = F.log_softmax(outputs.logits, dim=-1)
+    true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
+    # Forward pass for the opponent model
+    with torch.no_grad():
+        opponent_outputs = opponent(**inputs, labels=inputs["input_ids"])
+        opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1)
+        opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
+    # Compute SPIN loss
+    loss = (true_log_probs - opponent_true_log_probs).mean()
+    return loss
+# Training loop
+num_epochs = 10
+learning_rate = 0.0002
+# Initialize the opponent model
+opponent = AutoModelForCausalLM.from_pretrained(model_name)
+opponent.load_state_dict(model.state_dict())
+optimizer = AdamW(model.parameters(), lr=learning_rate)
+model.train()
+for epoch in range(num_epochs):
+    for batch in dataloader:
+        inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()}
+        # Compute SPIN loss
+        loss = spin_loss(model, opponent, inputs)
+        # Backpropagation
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+    # Update the opponent model
+    opponent.load_state_dict(model.state_dict())
+    print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}")
+# Save the fine-tuned model
+model.save_pretrained("fine_tuned_gpt_neo_spin")
+tokenizer.save_pretrained("fine_tuned_gpt_neo_spin")