Spaces:

RobertCastagna
/

FIN_LLM

Sleeping

Robert Castagna commited on Jan 6, 2024

Commit

12d3e6f

1 Parent(s): c00d132

update files

Files changed (8) hide show

chat.py ADDED Viewed

+from transformers import AutoModelForCausalLM, AutoTokenizer
+# Load the model and tokenizer
+model = AutoModelForCausalLM.from_pretrained("trained_models/")
+tokenizer = AutoTokenizer.from_pretrained("trained_models/")
+# Input text
+input_text = "Hello, how are you?"
+# Encode the input text
+input_ids = tokenizer.encode(input_text, return_tensors='pt')
+# Generate a response
+output = model.generate(input_ids)
+# Decode the response
+response = tokenizer.decode(output[:, input_ids.shape[-1]:][0], skip_special_tokens=True)
+print(response)

performance_log_2024-01-06-17-46.json → performance_log_2024-01-06_17-46.json RENAMED Viewed

File without changes

trained_models/config.json ADDED Viewed

+{
+  "_name_or_path": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 32000
+}

trained_models/generation_config.json ADDED Viewed

+{
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "max_length": 2048,
+  "pad_token_id": 0,
+  "transformers_version": "4.36.2"
+}

trained_models/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1354fc4008a730b36cad46eb93c017f9ad6c7e455950b737b412e6c3f60627ea
+size 4400216536

trainer.py → training.py RENAMED Viewed

@@ -58,7 +58,7 @@ def evaluate_training(model, train_loader, device):
 # Assuming the JSON file 'output.json' is in the same directory as the script
-full_dataset = QuizletDataset(json_file='output.json')
 # Calculate the sizes of the splits for 80/20 train/test
 train_size = int(0.8 * len(full_dataset))
@@ -163,7 +163,10 @@ for epoch in range(epochs):
 # Save performance log to a JSON file
 print("Saving performance log...")
-with open(f"performance_log_{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}.json", "w") as file:
     json.dump(performance_log, file, indent=4)
 print("Done!")

 # Assuming the JSON file 'output.json' is in the same directory as the script
+full_dataset = QuizletDataset(json_file='training_data_output.json')
 # Calculate the sizes of the splits for 80/20 train/test
 train_size = int(0.8 * len(full_dataset))
 # Save performance log to a JSON file
 print("Saving performance log...")
+training_datetime = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')
+with open(f"performance_log_{training_datetime}.json", "w") as file:
     json.dump(performance_log, file, indent=4)
+model.save_pretrained(f"trained_models/")
+tokenizer.save_pretrained("trained_models/")
 print("Done!")

output.json → training_data_output.json RENAMED Viewed

File without changes

txt_to_json.py CHANGED Viewed

@@ -31,7 +31,7 @@ with open('data.txt', 'r') as file:
 json_output = generate_json_full(data)
 # Save the output to a JSON file
-with open('output.json', 'w') as f:
     json.dump(json_output, f, indent=4)
 print('Dataset successfully saved to output.json')

 json_output = generate_json_full(data)
 # Save the output to a JSON file
+with open('training_data_output.json', 'w') as f:
     json.dump(json_output, f, indent=4)
 print('Dataset successfully saved to output.json')