Instructions to use usr256864/ee_gol_ep_746 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use usr256864/ee_gol_ep_746 with PEFT:

from peft import PeftModel
from transformers import AutoModelForCausalLM

base_model = AutoModelForCausalLM.from_pretrained("HiTZ/GoLLIE-7B")
model = PeftModel.from_pretrained(base_model, "usr256864/ee_gol_ep_746")

Transformers

How to use usr256864/ee_gol_ep_746 with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="usr256864/ee_gol_ep_746")

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("usr256864/ee_gol_ep_746", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use usr256864/ee_gol_ep_746 with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "usr256864/ee_gol_ep_746"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "usr256864/ee_gol_ep_746",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/usr256864/ee_gol_ep_746

SGLang

How to use usr256864/ee_gol_ep_746 with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "usr256864/ee_gol_ep_746" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "usr256864/ee_gol_ep_746",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "usr256864/ee_gol_ep_746" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "usr256864/ee_gol_ep_746",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use usr256864/ee_gol_ep_746 with Docker Model Runner:
```
docker model run hf.co/usr256864/ee_gol_ep_746
```

ee_gol_ep_746 / trainer_state.json

usr256864

Initial model upload

b112fab verified 5 months ago

raw

history blame contribute delete

16.3 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0,
	"eval_steps": 500,
	"global_step": 746,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.05,
	"completions/max_length": 246.3,
	"completions/max_terminated_length": 240.26,
	"completions/mean_length": 195.0275,
	"completions/mean_terminated_length": 191.87969146728517,
	"completions/min_length": 147.76,
	"completions/min_terminated_length": 147.76,
	"entropy": 0.06807037293910981,
	"epoch": 0.06702412868632708,
	"frac_reward_zero_std": 0.4475,
	"grad_norm": 0.1978774070739746,
	"learning_rate": 1e-05,
	"loss": -0.0022,
	"num_tokens": 6268258.0,
	"reward": 12.489985446929932,
	"reward_std": 1.05244723290205,
	"rewards/event_reward_fn/mean": 11.62375,
	"rewards/event_reward_fn/std": 7.598931360244751,
	"rewards/format_reward_fn/mean": 0.8662354218959808,
	"rewards/format_reward_fn/std": 0.24084076710045338,
	"step": 50,
	"step_time": 24.881226640827954
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.043125,
	"completions/max_length": 249.38,
	"completions/max_terminated_length": 244.26,
	"completions/mean_length": 198.4925,
	"completions/mean_terminated_length": 195.8674203491211,
	"completions/min_length": 155.1,
	"completions/min_terminated_length": 155.1,
	"entropy": 0.07096008479595184,
	"epoch": 0.13404825737265416,
	"frac_reward_zero_std": 0.42,
	"grad_norm": 0.31616032123565674,
	"learning_rate": 1e-05,
	"loss": -0.0052,
	"num_tokens": 12603730.0,
	"reward": 11.722552404403686,
	"reward_std": 1.104598103761673,
	"rewards/event_reward_fn/mean": 10.865,
	"rewards/event_reward_fn/std": 7.203483366966248,
	"rewards/format_reward_fn/mean": 0.8575523483753205,
	"rewards/format_reward_fn/std": 0.25920433282852173,
	"step": 100,
	"step_time": 23.881343694739044
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.069375,
	"completions/max_length": 251.04,
	"completions/max_terminated_length": 245.08,
	"completions/mean_length": 201.58125,
	"completions/mean_terminated_length": 197.60445678710937,
	"completions/min_length": 157.96,
	"completions/min_terminated_length": 157.96,
	"entropy": 0.07228697955608368,
	"epoch": 0.20107238605898123,
	"frac_reward_zero_std": 0.41,
	"grad_norm": 0.1767224669456482,
	"learning_rate": 1e-05,
	"loss": 0.002,
	"num_tokens": 19236102.0,
	"reward": 11.989666719436645,
	"reward_std": 1.2850025883316993,
	"rewards/event_reward_fn/mean": 11.1225,
	"rewards/event_reward_fn/std": 7.3152674865722656,
	"rewards/format_reward_fn/mean": 0.8671666479110718,
	"rewards/format_reward_fn/std": 0.24983404949307442,
	"step": 150,
	"step_time": 27.783113366477192
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.068125,
	"completions/max_length": 250.62,
	"completions/max_terminated_length": 244.54,
	"completions/mean_length": 201.1025,
	"completions/mean_terminated_length": 197.25198516845703,
	"completions/min_length": 156.4,
	"completions/min_terminated_length": 156.4,
	"entropy": 0.06773373357951641,
	"epoch": 0.2680965147453083,
	"frac_reward_zero_std": 0.415,
	"grad_norm": 0.13261352479457855,
	"learning_rate": 1e-05,
	"loss": -0.0029,
	"num_tokens": 25426958.0,
	"reward": 12.467143926620484,
	"reward_std": 1.1554639112949372,
	"rewards/event_reward_fn/mean": 11.59875,
	"rewards/event_reward_fn/std": 7.149877543449402,
	"rewards/format_reward_fn/mean": 0.8683938610553742,
	"rewards/format_reward_fn/std": 0.24253679752349855,
	"step": 200,
	"step_time": 24.421198091395198
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.058125,
	"completions/max_length": 250.22,
	"completions/max_terminated_length": 243.8,
	"completions/mean_length": 200.303125,
	"completions/mean_terminated_length": 196.90248321533204,
	"completions/min_length": 162.42,
	"completions/min_terminated_length": 162.42,
	"entropy": 0.06486415289342404,
	"epoch": 0.3351206434316354,
	"frac_reward_zero_std": 0.385,
	"grad_norm": 0.49442073702812195,
	"learning_rate": 1e-05,
	"loss": -0.0036,
	"num_tokens": 31582342.0,
	"reward": 12.355808296203612,
	"reward_std": 1.1142808997631073,
	"rewards/event_reward_fn/mean": 11.48875,
	"rewards/event_reward_fn/std": 7.448825697898865,
	"rewards/format_reward_fn/mean": 0.8670582604408265,
	"rewards/format_reward_fn/std": 0.24978963822126388,
	"step": 250,
	"step_time": 25.453000083304943
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.04875,
	"completions/max_length": 248.68,
	"completions/max_terminated_length": 244.22,
	"completions/mean_length": 198.759375,
	"completions/mean_terminated_length": 196.16592681884765,
	"completions/min_length": 156.2,
	"completions/min_terminated_length": 156.2,
	"entropy": 0.0681518343836069,
	"epoch": 0.40214477211796246,
	"frac_reward_zero_std": 0.39,
	"grad_norm": 0.48775437474250793,
	"learning_rate": 1e-05,
	"loss": -0.0057,
	"num_tokens": 37800719.0,
	"reward": 12.434584522247315,
	"reward_std": 1.183589797616005,
	"rewards/event_reward_fn/mean": 11.56375,
	"rewards/event_reward_fn/std": 7.52141658782959,
	"rewards/format_reward_fn/mean": 0.8708344352245331,
	"rewards/format_reward_fn/std": 0.23306368254125118,
	"step": 300,
	"step_time": 25.360634116120636
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.034375,
	"completions/max_length": 248.34,
	"completions/max_terminated_length": 245.28,
	"completions/mean_length": 203.264375,
	"completions/mean_terminated_length": 201.32774475097656,
	"completions/min_length": 157.54,
	"completions/min_terminated_length": 157.54,
	"entropy": 0.06739457175135613,
	"epoch": 0.4691689008042895,
	"frac_reward_zero_std": 0.3525,
	"grad_norm": 0.33356958627700806,
	"learning_rate": 1e-05,
	"loss": -0.004,
	"num_tokens": 44150011.0,
	"reward": 13.173797435760498,
	"reward_std": 1.2946509444713592,
	"rewards/event_reward_fn/mean": 12.28875,
	"rewards/event_reward_fn/std": 7.145490102767944,
	"rewards/format_reward_fn/mean": 0.885047378540039,
	"rewards/format_reward_fn/std": 0.22108205765485764,
	"step": 350,
	"step_time": 26.940150288008155
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.064375,
	"completions/max_length": 252.9,
	"completions/max_terminated_length": 247.8,
	"completions/mean_length": 203.064375,
	"completions/mean_terminated_length": 199.5350747680664,
	"completions/min_length": 158.26,
	"completions/min_terminated_length": 158.26,
	"entropy": 0.0657703248411417,
	"epoch": 0.5361930294906166,
	"frac_reward_zero_std": 0.435,
	"grad_norm": 0.26359474658966064,
	"learning_rate": 1e-05,
	"loss": -0.0021,
	"num_tokens": 50384400.0,
	"reward": 12.238037357330322,
	"reward_std": 1.057584773004055,
	"rewards/event_reward_fn/mean": 11.37,
	"rewards/event_reward_fn/std": 7.154304637908935,
	"rewards/format_reward_fn/mean": 0.8680373668670655,
	"rewards/format_reward_fn/std": 0.26109003871679304,
	"step": 400,
	"step_time": 25.59800311360508
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.049375,
	"completions/max_length": 249.06,
	"completions/max_terminated_length": 244.9,
	"completions/mean_length": 203.706875,
	"completions/mean_terminated_length": 200.99220581054686,
	"completions/min_length": 161.06,
	"completions/min_terminated_length": 161.06,
	"entropy": 0.06626586891710758,
	"epoch": 0.6032171581769437,
	"frac_reward_zero_std": 0.3775,
	"grad_norm": 0.48660293221473694,
	"learning_rate": 1e-05,
	"loss": -0.004,
	"num_tokens": 56771056.0,
	"reward": 13.009743461608887,
	"reward_std": 1.2429037857055665,
	"rewards/event_reward_fn/mean": 12.130625,
	"rewards/event_reward_fn/std": 7.234463820457458,
	"rewards/format_reward_fn/mean": 0.8791184043884277,
	"rewards/format_reward_fn/std": 0.23800445690751076,
	"step": 450,
	"step_time": 25.550446799769997
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.066875,
	"completions/max_length": 251.92,
	"completions/max_terminated_length": 246.12,
	"completions/mean_length": 204.07625,
	"completions/mean_terminated_length": 200.35590240478516,
	"completions/min_length": 160.74,
	"completions/min_terminated_length": 160.74,
	"entropy": 0.06663089752197265,
	"epoch": 0.6702412868632708,
	"frac_reward_zero_std": 0.4025,
	"grad_norm": 0.6319305300712585,
	"learning_rate": 1e-05,
	"loss": -0.0042,
	"num_tokens": 63078757.0,
	"reward": 12.313038005828858,
	"reward_std": 1.1368902394175529,
	"rewards/event_reward_fn/mean": 11.4575,
	"rewards/event_reward_fn/std": 6.7143393945693965,
	"rewards/format_reward_fn/mean": 0.8555380630493165,
	"rewards/format_reward_fn/std": 0.2657873314619064,
	"step": 500,
	"step_time": 26.24973841637373
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.091875,
	"completions/max_length": 252.82,
	"completions/max_terminated_length": 246.88,
	"completions/mean_length": 203.815,
	"completions/mean_terminated_length": 198.69242126464843,
	"completions/min_length": 161.16,
	"completions/min_terminated_length": 161.16,
	"entropy": 0.06187104433774948,
	"epoch": 0.7372654155495979,
	"frac_reward_zero_std": 0.425,
	"grad_norm": 0.40395304560661316,
	"learning_rate": 1e-05,
	"loss": -0.0025,
	"num_tokens": 69170452.0,
	"reward": 12.482298536300659,
	"reward_std": 1.0457301473617553,
	"rewards/event_reward_fn/mean": 11.64625,
	"rewards/event_reward_fn/std": 7.317771224975586,
	"rewards/format_reward_fn/mean": 0.8360484623908997,
	"rewards/format_reward_fn/std": 0.2895883430540562,
	"step": 550,
	"step_time": 24.193240740820766
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.110625,
	"completions/max_length": 252.62,
	"completions/max_terminated_length": 246.8,
	"completions/mean_length": 208.275625,
	"completions/mean_terminated_length": 202.49910614013672,
	"completions/min_length": 165.54,
	"completions/min_terminated_length": 165.54,
	"entropy": 0.0649487990140915,
	"epoch": 0.8042895442359249,
	"frac_reward_zero_std": 0.38,
	"grad_norm": 0.37119486927986145,
	"learning_rate": 1e-05,
	"loss": 0.0006,
	"num_tokens": 75499314.0,
	"reward": 12.80059557914734,
	"reward_std": 1.1889909988641738,
	"rewards/event_reward_fn/mean": 11.97375,
	"rewards/event_reward_fn/std": 7.475857477188111,
	"rewards/format_reward_fn/mean": 0.8268455564975739,
	"rewards/format_reward_fn/std": 0.29714462146162984,
	"step": 600,
	"step_time": 24.3176869976148
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.05625,
	"completions/max_length": 249.28,
	"completions/max_terminated_length": 244.8,
	"completions/mean_length": 202.789375,
	"completions/mean_terminated_length": 199.91522064208985,
	"completions/min_length": 161.74,
	"completions/min_terminated_length": 161.74,
	"entropy": 0.06481640346348286,
	"epoch": 0.871313672922252,
	"frac_reward_zero_std": 0.3975,
	"grad_norm": 0.08866075426340103,
	"learning_rate": 1e-05,
	"loss": -0.0023,
	"num_tokens": 81673001.0,
	"reward": 12.689926280975342,
	"reward_std": 1.2458794575929641,
	"rewards/event_reward_fn/mean": 11.815625,
	"rewards/event_reward_fn/std": 7.275726590156555,
	"rewards/format_reward_fn/mean": 0.8743013119697571,
	"rewards/format_reward_fn/std": 0.23756251022219657,
	"step": 650,
	"step_time": 25.04028965227306
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.100625,
	"completions/max_length": 253.72,
	"completions/max_terminated_length": 248.28,
	"completions/mean_length": 205.536875,
	"completions/mean_terminated_length": 200.1349432373047,
	"completions/min_length": 162.16,
	"completions/min_terminated_length": 162.16,
	"entropy": 0.0658975774794817,
	"epoch": 0.938337801608579,
	"frac_reward_zero_std": 0.3975,
	"grad_norm": 0.2268964648246765,
	"learning_rate": 1e-05,
	"loss": -0.0008,
	"num_tokens": 87934795.0,
	"reward": 12.72035478591919,
	"reward_std": 1.1722034803032875,
	"rewards/event_reward_fn/mean": 11.888125,
	"rewards/event_reward_fn/std": 7.583159003257752,
	"rewards/format_reward_fn/mean": 0.8322297859191895,
	"rewards/format_reward_fn/std": 0.29026631206274034,
	"step": 700,
	"step_time": 24.744350045956672
	}
	],
	"logging_steps": 50,
	"max_steps": 7460,
	"num_input_tokens_seen": 93493541,
	"num_train_epochs": 10,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}