demo / run.py

Upload folder using huggingface_hub

cea6cdd verified 10 months ago

8.32 kB

	from datasets import load_dataset, concatenate_datasets
	from transformers import TrainingArguments, TextStreamer
	from trl import SFTTrainer
	from unsloth.chat_templates import get_chat_template
	from unsloth import FastLanguageModel, is_bfloat16_supported

	# ###############################################################################
	# # 1. Load/Initialize Model and Tokenizer
	# ###############################################################################
	# max_seq_length = 2048
	# model_name = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"

	# model, tokenizer = FastLanguageModel.from_pretrained(
	# model_name=model_name,
	# max_seq_length=max_seq_length,
	# load_in_4bit=True,
	# dtype=None,
	# )

	# model = FastLanguageModel.get_peft_model(
	# model,
	# r=16,
	# lora_alpha=16,
	# lora_dropout=0,
	# target_modules=[
	# "q_proj", "k_proj", "v_proj", "up_proj", "down_proj",
	# "o_proj", "gate_proj"
	# ],
	# use_rslora=True,
	# use_gradient_checkpointing="unsloth"
	# )

	# # Prepare the tokenizer for "chatml" format
	# tokenizer = get_chat_template(
	# tokenizer,
	# mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
	# chat_template="chatml",
	# )

	# ###############################################################################
	# # 2. Dataset Loading and Caching
	# ###############################################################################
	# # The user’s custom function to apply chat template:
	# def apply_template(examples):
	# messages_batch = examples["conversations"]
	# texts = []
	# for message in messages_batch:
	# text = tokenizer.apply_chat_template(
	# message,
	# tokenize=False,
	# add_generation_prompt=False
	# )
	# texts.append(text)
	# return {"text": texts}


	# def apply_template2(examples):
	# import json

	# conversation_batch = examples["conversation"]
	# tools_batch = examples["tools"]
	# texts = []

	# for i, conversation_json_str in enumerate(conversation_batch):
	# # 1) Load conversation & tools:
	# thread = json.loads(conversation_json_str)
	# tools_data = json.loads(tools_batch[i])

	# # 2) Convert "arguments" to "parameters"
	# for tool in tools_data:
	# if "arguments" in tool:
	# tool["parameters"] = tool["arguments"]

	# # 3) Create system prompt
	# system_prompt = {
	# "from": "system",
	# "value": (
	# "You are a function calling AI model. You are provided with "
	# "function signatures within <tools> </tools> XML tags. Don't make "
	# "assumptions about what values to plug into functions.\n"
	# f"<tools>{json.dumps(tools_data)}</tools>"
	# )
	# }

	# # 4) Build new conversation
	# clean_thread = [system_prompt]
	# for msg in thread:
	# # Possibly rename "role": "tool call" to something else
	# if msg["role"] == "tool call":
	# msg["role"] = "gtp"

	# # The code below ensures "value" is <tool_call> ... </tool_call>
	# if not isinstance(msg, dict):
	# # If it's not a dict, forcibly convert to dict
	# item = json.dumps({"type":"function", "function": msg['content']})
	# clean_thread.append({
	# "from": msg["role"],
	# "value": f"<tool_call>{item}</tool_call>"
	# })
	# else:
	# item = json.dumps({"type":"function", "function": msg['content']})
	# clean_thread.append({
	# "from": msg["role"],
	# "value": f"<tool_call>{item}</tool_call>"
	# })

	# # 6) PASS THE LIST (NOT the JSON string) to apply_chat_template
	# text = tokenizer.apply_chat_template(
	# clean_thread,
	# tokenize=False,
	# add_generation_prompt=False
	# )

	# texts.append(text)

	# return {"text": texts}



	# tool_intro = "You are a function calling AI model. You are provided with function signatures within <tools> </tools> XML tags. You may call one or more functions to assist with the user query. Don't make assumptions about what values to plug into functions."
	# # If you want a local cache file, specify cache_file_name
	# dataset_1 = load_dataset(
	# "interstellarninja/tool-calls-sharegpt",
	# split="train",
	# )

	# # Load second dataset
	# dataset_2 = load_dataset(
	# "interstellarninja/tool-calls-multiturn",
	# split="train",
	# )

	# dataset_3 = load_dataset(
	# "BitAgent/tool_calling",
	# split="train",
	# )

	# dataset_1 = dataset_1.map(apply_template, batched=True)
	# dataset_2 = dataset_2.map(apply_template, batched=True)
	# dataset_3 = dataset_3.map(apply_template2, batched=True)

	# # Concatenate both datasets
	# dataset = concatenate_datasets([dataset_1, dataset_2, dataset_3])

	# ###############################################################################
	# # 3. SFTTrainer and Training Arguments (with checkpointing)
	# ###############################################################################
	# training_args = TrainingArguments(
	# learning_rate=3e-4,
	# lr_scheduler_type="linear",
	# per_device_train_batch_size=8,
	# gradient_accumulation_steps=2,
	# num_train_epochs=1,
	# fp16=not is_bfloat16_supported(),
	# bf16=is_bfloat16_supported(),
	# logging_steps=1,
	# optim="adamw_8bit",
	# weight_decay=0.01,
	# warmup_steps=10,
	# output_dir="drive/MyDrive/Ribo/model-checkpoints",
	# seed=0,
	# report_to="none",
	# )

	# trainer = SFTTrainer(
	# model=model,
	# tokenizer=tokenizer,
	# train_dataset=dataset,
	# dataset_text_field="text",
	# max_seq_length=max_seq_length,
	# dataset_num_proc=2,
	# packing=True,
	# args=training_args,
	# )

	# ###############################################################################
	# # 4. Train and Save Checkpoints
	# ###############################################################################
	# trainer.train()
	# # After every `save_steps` steps, a checkpoint is saved in `output/checkpoint-*`.
	# # You can resume training from there by setting `resume_from_checkpoint`.

	# ###############################################################################
	# # 5. Convert to Inference Model
	# ###############################################################################
	# model = FastLanguageModel.for_inference(model)

	# ###############################################################################
	# # 7. Save & Push Final Merged Model
	# ###############################################################################
	# # Save model merged (16-bit) locally
	# model.save_pretrained_merged(
	# "drive/MyDrive/Ribo/model",
	# tokenizer,
	# save_method="merged_16bit"
	# )

	model, tokenizer = FastLanguageModel.from_pretrained("./")

	###############################################################################
	# 6. Example Inference with TextStreamer
	###############################################################################
	messages = [
	{
	"from": "system",
	"value": """
	Available tools:
	[
	{
	"type": "function",
	"function": {
	"name": "get_current_date",
	"description": "Returns the current date in the format specified",
	"parameters": {
	"type": "object",
	"required": ["format"],
	"properties": {
	"format": {
	"type": "string",
	"description": "will format the date in the format specified MM/DD/YY or similar"
	}
	}
	}
	}
	}
	]
	"""
	},
	{"from": "human", "value": "What is the current date?"},
	]

	formatted_text = tokenizer.apply_chat_template(
	messages,
	tokenize=True,
	add_generation_prompt=True,
	return_tensors="pt",
	)
	# If your GPU has limited memory, you might need smaller max_new_tokens
	# or streaming logic
	text_streamer = TextStreamer(tokenizer)

	output = model.generate(
	input_ids=formatted_text["input_ids"],
	attention_mask=formatted_text["attention_mask"],
	streamer=text_streamer,
	max_new_tokens=4096,
	use_cache=True
	)