augustocsc commited on
Commit
ecafb0d
·
verified ·
1 Parent(s): c082aa2

GPT-2 Base trained on prefix dataset (682K)

Browse files
Files changed (1) hide show
  1. scripts/train_with_json.py +3 -3
scripts/train_with_json.py CHANGED
@@ -28,7 +28,7 @@ from peft import LoraConfig, get_peft_model
28
 
29
  def convert_to_json_format(example):
30
  """Convert dataset format to JSON format."""
31
- text = example['i_prompt_n']
32
 
33
  # Parse the text format
34
  lines = text.strip().split('\n')
@@ -115,12 +115,12 @@ def main():
115
 
116
  # Check original format
117
  print("Original format sample:")
118
- print(dataset["train"][0]['i_prompt_n'][:150])
119
  print()
120
 
121
  # Convert to JSON format
122
  print("Converting to JSON format...")
123
- train_dataset = dataset["train"].map(convert_to_json_format, remove_columns=['i_prompt_n'])
124
 
125
  # Split for validation (10%)
126
  split_dataset = train_dataset.train_test_split(test_size=0.1, seed=42)
 
28
 
29
  def convert_to_json_format(example):
30
  """Convert dataset format to JSON format."""
31
+ text = example['p_prompt_n_converted']
32
 
33
  # Parse the text format
34
  lines = text.strip().split('\n')
 
115
 
116
  # Check original format
117
  print("Original format sample:")
118
+ print(dataset["train"][0]['p_prompt_n_converted'][:150])
119
  print()
120
 
121
  # Convert to JSON format
122
  print("Converting to JSON format...")
123
+ train_dataset = dataset["train"].map(convert_to_json_format, remove_columns=['p_prompt_n_converted'])
124
 
125
  # Split for validation (10%)
126
  split_dataset = train_dataset.train_test_split(test_size=0.1, seed=42)