# -*- coding: utf-8 -*- """project3.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/19E9hoAzWKvn9c9SHqM4Xan_Ph4wNewHS ## Project 3: Write code to trade stocks ### An example code generator by fine-tuning StarCoder2 using QLoRA NOTE: This is a toy example to illustrate the technique – please don’t use any of this code to make trading decisions! Previously, we created a dataset and uploaded it to Hugging Face. Now we download the dataset and use it to fine-tune StarCoder2 using QLoRA. We'll see what kind of trade() functions our model can create before and after training. """ # pip installs !pip install -q requests==2.31.0 torch peft bitsandbytes transformers trl accelerate sentencepiece wandb # imports import os from google.colab import userdata from huggingface_hub import login import torch import transformers from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer, TrainingArguments from datasets import load_dataset, Dataset import wandb from peft import LoraConfig from trl import SFTTrainer, SFTConfig from datetime import datetime # Constants BASE_MODEL = "bigcode/starcoder2-3b" # choose 3b or 7b PROJECT_NAME = "trading" RUN_NAME = f"{datetime.now():%Y-%m-%d_%H.%M.%S}" PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}" DATASET_NAME = "ed-donner/trade_code_dataset" # Hyperparameters for QLoRA Fine-Tuning # Details of QLoRA are out of scope for today, but there's # more information and links in the resources EPOCHS = 1 LORA_ALPHA = 32 LORA_R = 16 LORA_DROPOUT = 0.1 BATCH_SIZE = 1 GRADIENT_ACCUMULATION_STEPS = 1 LEARNING_RATE = 2e-4 LR_SCHEDULER_TYPE = 'cosine' WEIGHT_DECAY = 0.001 TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"] MAX_SEQUENCE_LENGTH = 320 # Other config STEPS = 10 SAVE_STEPS = 300 """### Log in to HuggingFace and Weights & Biases If you don't already have a HuggingFace account, visit https://huggingface.co to sign up and create a token. Then select the Secrets for this Notebook by clicking on the key icon in the left, and add a new secret called `HF_TOKEN` with the value as your token. Repeat this for weightsandbiases at https://wandb.ai and add a secret called `WANDB_API_KEY` """ # Log in to HuggingFace hf_token = userdata.get('HF_TOKEN') login(hf_token, add_to_git_credential=True) # Log in to Weights & Biases wandb_api_key = userdata.get('WANDB_API_KEY') os.environ["WANDB_API_KEY"] = wandb_api_key wandb.login() # Configure Weights & Biases to record against our project os.environ["WANDB_PROJECT"] = PROJECT_NAME os.environ["WANDB_LOG_MODEL"] = "true" os.environ["WANDB_WATCH"] = "false" """## Now load the Tokenizer and Model""" # Load the Tokenizer and the Model tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" quant_config = BitsAndBytesConfig(load_in_8bit=True) base_model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, quantization_config=quant_config, device_map="auto", ) base_model.generation_config.pad_token_id = tokenizer.pad_token_id print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB") """## Let's try out the model before we do fine-tuning""" prompt = """ # tickers is a list of stock tickers import tickers # prices is a dict; the key is a ticker and the value is a list of historic prices, today first import prices # Trade represents a decision to buy or sell a quantity of a ticker import Trade import random import numpy as np def trade(): """ from transformers import TextStreamer streamer = TextStreamer(tokenizer) inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda") outputs = base_model.generate(inputs, max_new_tokens=100, streamer=streamer) # Load our dataset dataset = load_dataset(DATASET_NAME)['train'] dataset # First, specify the configuration parameters for LoRA peft_parameters = LoraConfig( lora_alpha=LORA_ALPHA, lora_dropout=LORA_DROPOUT, r=LORA_R, bias="none", task_type="CAUSAL_LM", target_modules=TARGET_MODULES, ) # Next, specify the general configuration parameters for training train_params = SFTConfig( output_dir=PROJECT_RUN_NAME, num_train_epochs=EPOCHS, per_device_train_batch_size=BATCH_SIZE, per_device_eval_batch_size=1, eval_strategy="no", gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, optim="paged_adamw_32bit", save_steps=SAVE_STEPS, save_total_limit=10, logging_steps=STEPS, learning_rate=LEARNING_RATE, weight_decay=WEIGHT_DECAY, fp16=False, bf16=True, max_grad_norm=0.3, max_steps=-1, warmup_ratio=0.03, group_by_length=True, lr_scheduler_type=LR_SCHEDULER_TYPE, report_to="wandb", run_name=RUN_NAME, max_seq_length=MAX_SEQUENCE_LENGTH, dataset_text_field="text", ) # And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning # Given these 2 sets of configuration parameters fine_tuning = SFTTrainer( model=base_model, train_dataset=dataset, peft_config=peft_parameters, tokenizer=tokenizer, args=train_params ) # Fine-tune! fine_tuning.train() # Push our fine-tuned model to Hugging Face fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True) # Code up a trade inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda") outputs = fine_tuning.model.generate(inputs, max_new_tokens=120, streamer=streamer) # Another! outputs = fine_tuning.model.generate(inputs, max_new_tokens=120, streamer=streamer) """## That's the example of QLoRA Fine Tuning to write code to carry out a specific function (but don't actually use this for trading!)"""