mrcuddle commited on
Commit
d063086
·
verified ·
1 Parent(s): bb9125a

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +97 -0
README.md ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
3
+ # Doc / guide: https://huggingface.co/docs/hub/model-cards
4
+ {}
5
+ ---
6
+
7
+ # Model Card for Model ID
8
+
9
+ <!-- Provide a quick summary of what the model is/does. -->
10
+
11
+ This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).
12
+
13
+ ## Model Details
14
+
15
+
16
+ ## Training Details
17
+
18
+ ### Training Data
19
+
20
+ ChaoticNeutrals/Synthetic-Dark-RP
21
+
22
+ ### Training Procedure
23
+
24
+ # Load and preprocess the dataset
25
+
26
+ class ConversationDataset(Dataset):
27
+ def __init__(self, data_path, tokenizer, max_length=512):
28
+ with open(data_path, 'r') as file:
29
+ data = json.load(file)
30
+
31
+ self.examples = []
32
+ for entry in data:
33
+ conversation = entry['conversations']
34
+ dialogue = ""
35
+ for turn in conversation:
36
+ speaker = turn['from']
37
+ message = turn['value']
38
+ dialogue += f"{speaker}: {message}\n"
39
+ tokenized_input = tokenizer(dialogue, truncation=True, max_length=max_length, return_tensors="pt")
40
+ self.examples.append(tokenized_input)
41
+
42
+ def __len__(self):
43
+ return len(self.examples)
44
+
45
+ def __getitem__(self, idx):
46
+ return self.examples[idx]
47
+ # Create the dataset and dataloader
48
+ dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer)
49
+ dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
50
+
51
+ # Define the SPIN loss function
52
+ def spin_loss(model, opponent, inputs):
53
+ # Forward pass for the current model
54
+ outputs = model(**inputs, labels=inputs["input_ids"])
55
+ log_probs = F.log_softmax(outputs.logits, dim=-1)
56
+ true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
57
+
58
+ # Forward pass for the opponent model
59
+ with torch.no_grad():
60
+ opponent_outputs = opponent(**inputs, labels=inputs["input_ids"])
61
+ opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1)
62
+ opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
63
+
64
+ # Compute SPIN loss
65
+ loss = (true_log_probs - opponent_true_log_probs).mean()
66
+ return loss
67
+
68
+ # Training loop
69
+ num_epochs = 10
70
+ learning_rate = 0.0002
71
+
72
+ # Initialize the opponent model
73
+ opponent = AutoModelForCausalLM.from_pretrained(model_name)
74
+ opponent.load_state_dict(model.state_dict())
75
+
76
+ optimizer = AdamW(model.parameters(), lr=learning_rate)
77
+
78
+ model.train()
79
+ for epoch in range(num_epochs):
80
+ for batch in dataloader:
81
+ inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()}
82
+
83
+ # Compute SPIN loss
84
+ loss = spin_loss(model, opponent, inputs)
85
+
86
+ # Backpropagation
87
+ optimizer.zero_grad()
88
+ loss.backward()
89
+ optimizer.step()
90
+
91
+ # Update the opponent model
92
+ opponent.load_state_dict(model.state_dict())
93
+ print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}")
94
+
95
+ # Save the fine-tuned model
96
+ model.save_pretrained("fine_tuned_gpt_neo_spin")
97
+ tokenizer.save_pretrained("fine_tuned_gpt_neo_spin")