mrcuddle commited on
Commit
d3c53bc
·
verified ·
1 Parent(s): d063086

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +67 -20
README.md CHANGED
@@ -1,33 +1,39 @@
1
  ---
2
- # For reference on model card metadata, see the spec: https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1
3
- # Doc / guide: https://huggingface.co/docs/hub/model-cards
4
- {}
5
- ---
6
-
7
  # Model Card for Model ID
8
 
9
  <!-- Provide a quick summary of what the model is/does. -->
10
 
11
- This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).
12
 
13
  ## Model Details
14
 
 
15
 
16
  ## Training Details
17
 
18
  ### Training Data
19
 
20
- ChaoticNeutrals/Synthetic-Dark-RP
21
 
22
  ### Training Procedure
23
 
24
- # Load and preprocess the dataset
 
 
 
 
 
 
 
 
 
 
25
 
26
  class ConversationDataset(Dataset):
27
  def __init__(self, data_path, tokenizer, max_length=512):
28
  with open(data_path, 'r') as file:
29
  data = json.load(file)
30
-
31
  self.examples = []
32
  for entry in data:
33
  conversation = entry['conversations']
@@ -44,33 +50,41 @@ class ConversationDataset(Dataset):
44
 
45
  def __getitem__(self, idx):
46
  return self.examples[idx]
47
- # Create the dataset and dataloader
 
 
48
  dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer)
49
  dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
 
 
 
50
 
51
- # Define the SPIN loss function
52
  def spin_loss(model, opponent, inputs):
53
- # Forward pass for the current model
54
  outputs = model(**inputs, labels=inputs["input_ids"])
55
  log_probs = F.log_softmax(outputs.logits, dim=-1)
56
  true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
57
 
58
- # Forward pass for the opponent model
59
  with torch.no_grad():
60
  opponent_outputs = opponent(**inputs, labels=inputs["input_ids"])
61
  opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1)
62
  opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
63
 
64
- # Compute SPIN loss
65
  loss = (true_log_probs - opponent_true_log_probs).mean()
66
  return loss
 
 
 
67
 
68
- # Training loop
69
  num_epochs = 10
70
  learning_rate = 0.0002
71
 
 
 
 
72
  # Initialize the opponent model
73
- opponent = AutoModelForCausalLM.from_pretrained(model_name)
74
  opponent.load_state_dict(model.state_dict())
75
 
76
  optimizer = AdamW(model.parameters(), lr=learning_rate)
@@ -80,18 +94,51 @@ for epoch in range(num_epochs):
80
  for batch in dataloader:
81
  inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()}
82
 
83
- # Compute SPIN loss
84
  loss = spin_loss(model, opponent, inputs)
85
 
86
- # Backpropagation
87
  optimizer.zero_grad()
88
  loss.backward()
89
  optimizer.step()
90
 
91
- # Update the opponent model
92
  opponent.load_state_dict(model.state_dict())
93
  print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}")
 
 
 
94
 
95
- # Save the fine-tuned model
96
  model.save_pretrained("fine_tuned_gpt_neo_spin")
97
  tokenizer.save_pretrained("fine_tuned_gpt_neo_spin")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
 
 
 
 
 
2
  # Model Card for Model ID
3
 
4
  <!-- Provide a quick summary of what the model is/does. -->
5
 
6
+ This model card serves as a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).
7
 
8
  ## Model Details
9
 
10
+ <!-- Provide details about the model architecture, purpose, and intended use. -->
11
 
12
  ## Training Details
13
 
14
  ### Training Data
15
 
16
+ The model was trained on the `ChaoticNeutrals/Synthetic-Dark-RP` dataset.
17
 
18
  ### Training Procedure
19
 
20
+ The model is trained using the SPIN (Self-Play with Imitation and Negotiation) implementation.
21
+
22
+ #### Load and Preprocess the Dataset
23
+
24
+ ```python
25
+ import json
26
+ import torch
27
+ from torch.utils.data import Dataset, DataLoader
28
+ from transformers import AutoTokenizer, AutoModelForCausalLM
29
+ import torch.nn.functional as F
30
+ from torch.optim import AdamW
31
 
32
  class ConversationDataset(Dataset):
33
  def __init__(self, data_path, tokenizer, max_length=512):
34
  with open(data_path, 'r') as file:
35
  data = json.load(file)
36
+
37
  self.examples = []
38
  for entry in data:
39
  conversation = entry['conversations']
 
50
 
51
  def __getitem__(self, idx):
52
  return self.examples[idx]
53
+
54
+ # Initialize tokenizer and dataset
55
+ tokenizer = AutoTokenizer.from_pretrained("model_name")
56
  dataset = ConversationDataset('synthetic_dark_rp_ShareGPT.json', tokenizer)
57
  dataloader = DataLoader(dataset, batch_size=3, shuffle=True)
58
+ ```
59
+
60
+ #### Define the SPIN Loss Function
61
 
62
+ ```python
63
  def spin_loss(model, opponent, inputs):
 
64
  outputs = model(**inputs, labels=inputs["input_ids"])
65
  log_probs = F.log_softmax(outputs.logits, dim=-1)
66
  true_log_probs = torch.gather(log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
67
 
 
68
  with torch.no_grad():
69
  opponent_outputs = opponent(**inputs, labels=inputs["input_ids"])
70
  opponent_log_probs = F.log_softmax(opponent_outputs.logits, dim=-1)
71
  opponent_true_log_probs = torch.gather(opponent_log_probs, -1, inputs["input_ids"].unsqueeze(-1)).squeeze(-1)
72
 
 
73
  loss = (true_log_probs - opponent_true_log_probs).mean()
74
  return loss
75
+ ```
76
+
77
+ #### Training Loop
78
 
79
+ ```python
80
  num_epochs = 10
81
  learning_rate = 0.0002
82
 
83
+ # Load model
84
+ model = AutoModelForCausalLM.from_pretrained("model_name")
85
+
86
  # Initialize the opponent model
87
+ opponent = AutoModelForCausalLM.from_pretrained("model_name")
88
  opponent.load_state_dict(model.state_dict())
89
 
90
  optimizer = AdamW(model.parameters(), lr=learning_rate)
 
94
  for batch in dataloader:
95
  inputs = {key: val.squeeze(1).to(model.device) for key, val in batch.items()}
96
 
 
97
  loss = spin_loss(model, opponent, inputs)
98
 
 
99
  optimizer.zero_grad()
100
  loss.backward()
101
  optimizer.step()
102
 
 
103
  opponent.load_state_dict(model.state_dict())
104
  print(f"Epoch {epoch + 1}/{num_epochs} completed. Loss: {loss.item()}")
105
+ ```
106
+
107
+ #### Save the Fine-Tuned Model
108
 
109
+ ```python
110
  model.save_pretrained("fine_tuned_gpt_neo_spin")
111
  tokenizer.save_pretrained("fine_tuned_gpt_neo_spin")
112
+ ```
113
+
114
+ ## Usage
115
+
116
+ To use the fine-tuned model:
117
+
118
+ ```python
119
+ from transformers import AutoModelForCausalLM, AutoTokenizer
120
+
121
+ model_path = "fine_tuned_gpt_neo_spin"
122
+ model = AutoModelForCausalLM.from_pretrained(model_path)
123
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
124
+
125
+ def generate_response(prompt):
126
+ inputs = tokenizer(prompt, return_tensors="pt")
127
+ outputs = model.generate(**inputs)
128
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
129
+
130
+ print(generate_response("Hello, how can I help you?"))
131
+ ```
132
+
133
+ ## Citation
134
+
135
+ If you use this model, please cite:
136
+
137
+ ```
138
+ @misc{model_id,
139
+ author = {Your Name},
140
+ title = {Model ID},
141
+ year = {2025},
142
+ url = {https://huggingface.co/your-model-id}
143
+ }
144
+ ```