# -*- coding: utf-8 -*-
"""project3.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/19E9hoAzWKvn9c9SHqM4Xan_Ph4wNewHS

## Project 3: Write code to trade stocks

### An example code generator by fine-tuning StarCoder2 using QLoRA

NOTE: This is a toy example to illustrate the technique – please don’t use
any of this code to make trading decisions!

Previously, we created a dataset and uploaded it to Hugging Face. Now we download the dataset and use it to fine-tune StarCoder2 using QLoRA.

We'll see what kind of trade() functions our model can create before and after training.
"""

# pip installs

!pip install -q requests==2.31.0 torch peft bitsandbytes transformers trl accelerate sentencepiece wandb

# imports

import os
from google.colab import userdata
from huggingface_hub import login
import torch
import transformers
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextStreamer, TrainingArguments
from datasets import load_dataset, Dataset
import wandb
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from datetime import datetime

# Constants

BASE_MODEL = "bigcode/starcoder2-3b"   # choose 3b or 7b
PROJECT_NAME = "trading"
RUN_NAME =  f"{datetime.now():%Y-%m-%d_%H.%M.%S}"
PROJECT_RUN_NAME = f"{PROJECT_NAME}-{RUN_NAME}"
DATASET_NAME = "ed-donner/trade_code_dataset"

# Hyperparameters for QLoRA Fine-Tuning
# Details of QLoRA are out of scope for today, but there's
# more information and links in the resources

EPOCHS = 1
LORA_ALPHA = 32
LORA_R = 16
LORA_DROPOUT = 0.1
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 1
LEARNING_RATE = 2e-4
LR_SCHEDULER_TYPE = 'cosine'
WEIGHT_DECAY = 0.001
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj"]
MAX_SEQUENCE_LENGTH = 320

# Other config

STEPS = 10
SAVE_STEPS = 300

"""### Log in to HuggingFace and Weights & Biases

If you don't already have a HuggingFace account, visit https://huggingface.co to sign up and create a token.

Then select the Secrets for this Notebook by clicking on the key icon in the left, and add a new secret called `HF_TOKEN` with the value as your token.

Repeat this for weightsandbiases at https://wandb.ai and add a secret called `WANDB_API_KEY`
"""

# Log in to HuggingFace

hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

# Log in to Weights & Biases
wandb_api_key = userdata.get('WANDB_API_KEY')
os.environ["WANDB_API_KEY"] = wandb_api_key
wandb.login()

# Configure Weights & Biases to record against our project
os.environ["WANDB_PROJECT"] = PROJECT_NAME
os.environ["WANDB_LOG_MODEL"] = "true"
os.environ["WANDB_WATCH"] = "false"

"""## Now load the Tokenizer and Model"""

# Load the Tokenizer and the Model

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

quant_config = BitsAndBytesConfig(load_in_8bit=True)

base_model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=quant_config,
    device_map="auto",
)
base_model.generation_config.pad_token_id = tokenizer.pad_token_id

print(f"Memory footprint: {base_model.get_memory_footprint() / 1e6:.1f} MB")

"""## Let's try out the model before we do fine-tuning"""

prompt = """
# tickers is a list of stock tickers
import tickers

# prices is a dict; the key is a ticker and the value is a list of historic prices, today first
import prices

# Trade represents a decision to buy or sell a quantity of a ticker
import Trade

import random
import numpy as np

def trade():
"""

from transformers import TextStreamer
streamer = TextStreamer(tokenizer)

inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
outputs = base_model.generate(inputs, max_new_tokens=100, streamer=streamer)

# Load our dataset
dataset = load_dataset(DATASET_NAME)['train']
dataset

# First, specify the configuration parameters for LoRA

peft_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

# Next, specify the general configuration parameters for training

train_params = SFTConfig(
    output_dir=PROJECT_RUN_NAME,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="no",
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim="paged_adamw_32bit",
    save_steps=SAVE_STEPS,
    save_total_limit=10,
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    fp16=False,
    bf16=True,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb",
    run_name=RUN_NAME,
    max_seq_length=MAX_SEQUENCE_LENGTH,
    dataset_text_field="text",
)

# And now, the Supervised Fine Tuning Trainer will carry out the fine-tuning
# Given these 2 sets of configuration parameters

fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_parameters,
    tokenizer=tokenizer,
    args=train_params
)

# Fine-tune!
fine_tuning.train()

# Push our fine-tuned model to Hugging Face
fine_tuning.model.push_to_hub(PROJECT_RUN_NAME, private=True)

# Code up a trade

inputs = tokenizer.encode(prompt, return_tensors="pt").to("cuda")
outputs = fine_tuning.model.generate(inputs, max_new_tokens=120, streamer=streamer)

# Another!

outputs = fine_tuning.model.generate(inputs, max_new_tokens=120, streamer=streamer)

"""## That's the example of QLoRA Fine Tuning to write code to carry out a specific function (but don't actually use this for trading!)"""