Robert Castagna commited on
Commit
12d3e6f
Β·
1 Parent(s): c00d132

update files

Browse files
chat.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+
3
+ # Load the model and tokenizer
4
+ model = AutoModelForCausalLM.from_pretrained("trained_models/")
5
+ tokenizer = AutoTokenizer.from_pretrained("trained_models/")
6
+
7
+ # Input text
8
+ input_text = "Hello, how are you?"
9
+
10
+ # Encode the input text
11
+ input_ids = tokenizer.encode(input_text, return_tensors='pt')
12
+
13
+ # Generate a response
14
+ output = model.generate(input_ids)
15
+
16
+ # Decode the response
17
+ response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
18
+
19
+ print(response)
performance_log_2024-01-06-17-46.json β†’ performance_log_2024-01-06_17-46.json RENAMED
File without changes
trained_models/config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 32,
17
+ "num_hidden_layers": 22,
18
+ "num_key_value_heads": 4,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.36.2",
26
+ "use_cache": true,
27
+ "vocab_size": 32000
28
+ }
trained_models/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "max_length": 2048,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.36.2"
7
+ }
trained_models/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1354fc4008a730b36cad46eb93c017f9ad6c7e455950b737b412e6c3f60627ea
3
+ size 4400216536
trainer.py β†’ training.py RENAMED
@@ -58,7 +58,7 @@ def evaluate_training(model, train_loader, device):
58
 
59
 
60
  # Assuming the JSON file 'output.json' is in the same directory as the script
61
- full_dataset = QuizletDataset(json_file='output.json')
62
 
63
  # Calculate the sizes of the splits for 80/20 train/test
64
  train_size = int(0.8 * len(full_dataset))
@@ -163,7 +163,10 @@ for epoch in range(epochs):
163
 
164
  # Save performance log to a JSON file
165
  print("Saving performance log...")
166
- with open(f"performance_log_{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}.json", "w") as file:
 
167
  json.dump(performance_log, file, indent=4)
168
 
 
 
169
  print("Done!")
 
58
 
59
 
60
  # Assuming the JSON file 'output.json' is in the same directory as the script
61
+ full_dataset = QuizletDataset(json_file='training_data_output.json')
62
 
63
  # Calculate the sizes of the splits for 80/20 train/test
64
  train_size = int(0.8 * len(full_dataset))
 
163
 
164
  # Save performance log to a JSON file
165
  print("Saving performance log...")
166
+ training_datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')
167
+ with open(f"performance_log_{training_datetime}.json", "w") as file:
168
  json.dump(performance_log, file, indent=4)
169
 
170
+ model.save_pretrained(f"trained_models/")
171
+ tokenizer.save_pretrained("trained_models/")
172
  print("Done!")
output.json β†’ training_data_output.json RENAMED
File without changes
txt_to_json.py CHANGED
@@ -31,7 +31,7 @@ with open('data.txt', 'r') as file:
31
  json_output = generate_json_full(data)
32
 
33
  # Save the output to a JSON file
34
- with open('output.json', 'w') as f:
35
  json.dump(json_output, f, indent=4)
36
 
37
  print('Dataset successfully saved to output.json')
 
31
  json_output = generate_json_full(data)
32
 
33
  # Save the output to a JSON file
34
+ with open('training_data_output.json', 'w') as f:
35
  json.dump(json_output, f, indent=4)
36
 
37
  print('Dataset successfully saved to output.json')