Spaces:

AilexGPT
/

DOC

Paused

App Files Files Community

DOC / app.py

AilexGPT

Update app.py

61f6159 over 2 years ago

raw

history blame contribute delete

3.59 kB



	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer

	model_name = "TinyPixel/Llama-2-7B-bf16-sharded"

	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype=torch.float16,
	)

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	quantization_config=bnb_config,
	trust_remote_code=True
	)
	model.config.use_cache = False

	"""Let's also load the tokenizer below"""

	tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
	tokenizer.pad_token = tokenizer.eos_token

	from peft import LoraConfig, get_peft_model

	lora_alpha = 16
	lora_dropout = 0.1
	lora_r = 64

	peft_config = LoraConfig(
	lora_alpha=lora_alpha,
	lora_dropout=lora_dropout,
	r=lora_r,
	bias="none",
	task_type="CAUSAL_LM"
	)

	"""## Loading the trainer

	Here we will use the [`SFTTrainer` from TRL library](https://huggingface.co/docs/trl/main/en/sft_trainer) that gives a wrapper around transformers `Trainer` to easily fine-tune models on instruction based datasets using PEFT adapters. Let's first load the training arguments below.
	"""

	from transformers import TrainingArguments

	output_dir = "./results"
	per_device_train_batch_size = 4
	gradient_accumulation_steps = 4
	optim = "paged_adamw_32bit"
	save_steps = 100
	logging_steps = 10
	learning_rate = 2e-4
	max_grad_norm = 0.3
	max_steps = 100
	warmup_ratio = 0.03
	lr_scheduler_type = "constant"

	training_arguments = TrainingArguments(
	output_dir=output_dir,
	per_device_train_batch_size=per_device_train_batch_size,
	gradient_accumulation_steps=gradient_accumulation_steps,
	optim=optim,
	save_steps=save_steps,
	logging_steps=logging_steps,
	learning_rate=learning_rate,
	fp16=True,
	max_grad_norm=max_grad_norm,
	max_steps=max_steps,
	warmup_ratio=warmup_ratio,
	group_by_length=True,
	lr_scheduler_type=lr_scheduler_type,
	)

	"""Then finally pass everthing to the trainer"""

	from trl import SFTTrainer

	max_seq_length = 512

	trainer = SFTTrainer(
	model=model,
	train_dataset=dataset,
	peft_config=peft_config,
	dataset_text_field="text",
	max_seq_length=max_seq_length,
	tokenizer=tokenizer,
	args=training_arguments,
	)

	"""We will also pre-process the model by upcasting the layer norms in float 32 for more stable training"""

	for name, module in trainer.model.named_modules():
	if "norm" in name:
	module = module.to(torch.float32)

	"""## Train the model

	Now let's train the model! Simply call `trainer.train()`
	"""

	trainer.train()

	"""During training, the model should converge nicely as follows:

	![image](https://huggingface.co/datasets/trl-internal-testing/example-images/resolve/main/images/loss-falcon-7b.png)

	The `SFTTrainer` also takes care of properly saving only the adapters during training instead of saving the entire model.
	"""

	model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model # Take care of distributed/parallel training
	model_to_save.save_pretrained("outputs")

	lora_config = LoraConfig.from_pretrained('outputs')
	model = get_peft_model(model, lora_config)

	dataset['text']

	text = "Écrire un texte dans un style baroque sur la glace et le feu ### Assistant: Si j'en luis éton"
	device = "cuda:0"

	inputs = tokenizer(text, return_tensors="pt").to(device)
	outputs = model.generate(**inputs, max_new_tokens=50)
	print(tokenizer.decode(outputs[0], skip_special_tokens=True))

	from huggingface_hub import login
	login()

	model.push_to_hub("llama2-qlora-finetunined-french")