Instructions to use Gege24/test_gin_rummy_np with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries
PEFT
How to use Gege24/test_gin_rummy_np with PEFT:
```
Base model is not found.
```

How to use Gege24/test_gin_rummy_np with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="Gege24/test_gin_rummy_np")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("Gege24/test_gin_rummy_np", dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use Gege24/test_gin_rummy_np with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "Gege24/test_gin_rummy_np"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "Gege24/test_gin_rummy_np",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker

docker model run hf.co/Gege24/test_gin_rummy_np

SGLang

How to use Gege24/test_gin_rummy_np with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "Gege24/test_gin_rummy_np" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "Gege24/test_gin_rummy_np",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "Gege24/test_gin_rummy_np" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/chat/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "Gege24/test_gin_rummy_np",
		"messages": [
			{
				"role": "user",
				"content": "What is the capital of France?"
			}
		]
	}'

Docker Model Runner
How to use Gege24/test_gin_rummy_np with Docker Model Runner:
```
docker model run hf.co/Gege24/test_gin_rummy_np
```

test_gin_rummy_np / trainer_state.json

Gege24

Upload task output 1

cb62910 verified about 1 month ago

raw

history blame contribute delete

13.2 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.00026,
	"eval_steps": 500,
	"global_step": 13,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1779.0,
	"completions/max_terminated_length": 1779.0,
	"completions/mean_length": 1698.46875,
	"completions/mean_terminated_length": 1698.46875,
	"completions/min_length": 1279.0,
	"completions/min_terminated_length": 1279.0,
	"entropy": 0.48651931062340736,
	"epoch": 2e-05,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 1.5409082174301147,
	"kl": 0.0,
	"learning_rate": 0.0,
	"loss": 0.0112,
	"num_tokens": 75031.0,
	"reward": -9.59624195098877,
	"reward_std": 5.939093589782715,
	"rewards/rollout_reward_func/mean": -9.59624195098877,
	"rewards/rollout_reward_func/std": 10.368197441101074,
	"sampling/importance_sampling_ratio/max": 1.3440189361572266,
	"sampling/importance_sampling_ratio/mean": 0.9953499436378479,
	"sampling/importance_sampling_ratio/min": 0.564490556716919,
	"sampling/sampling_logp_difference/max": 0.45447802543640137,
	"sampling/sampling_logp_difference/mean": 0.016698362305760384,
	"step": 1,
	"step_time": 36.680761918001735
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"entropy": 0.48651931062340736,
	"epoch": 4e-05,
	"grad_norm": 1.5392467975616455,
	"kl": 0.0,
	"learning_rate": 2.8571428571428575e-07,
	"loss": 0.0112,
	"step": 2,
	"step_time": 5.709443093002847
	},
	{
	"clip_ratio/high_max": 0.00390625,
	"clip_ratio/high_mean": 0.001953125,
	"clip_ratio/low_mean": 0.001953125,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.00390625,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1816.0,
	"completions/max_terminated_length": 1816.0,
	"completions/mean_length": 1625.21875,
	"completions/mean_terminated_length": 1625.21875,
	"completions/min_length": 1159.0,
	"completions/min_terminated_length": 1159.0,
	"entropy": 0.48103801161050797,
	"epoch": 6e-05,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 2.117859125137329,
	"kl": 0.0010091230506077409,
	"learning_rate": 5.714285714285715e-07,
	"loss": -0.0237,
	"num_tokens": 147721.0,
	"reward": -7.404824733734131,
	"reward_std": 11.744457244873047,
	"rewards/rollout_reward_func/mean": -7.404824733734131,
	"rewards/rollout_reward_func/std": 15.456405639648438,
	"sampling/importance_sampling_ratio/max": 1.4090882539749146,
	"sampling/importance_sampling_ratio/mean": 1.0395634174346924,
	"sampling/importance_sampling_ratio/min": 0.7728875279426575,
	"sampling/sampling_logp_difference/max": 0.2340834140777588,
	"sampling/sampling_logp_difference/mean": 0.019678719341754913,
	"step": 3,
	"step_time": 35.33501763899767
	},
	{
	"clip_ratio/high_max": 0.01171875,
	"clip_ratio/high_mean": 0.005859375,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.005859375,
	"entropy": 0.48065420985221863,
	"epoch": 8e-05,
	"grad_norm": 2.1440107822418213,
	"kl": 0.0009154866565950215,
	"learning_rate": 8.571428571428572e-07,
	"loss": -0.0232,
	"step": 4,
	"step_time": 5.808208025997374
	},
	{
	"clip_ratio/high_max": 0.0078125,
	"clip_ratio/high_mean": 0.00390625,
	"clip_ratio/low_mean": 0.001953125,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.005859375,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1766.0,
	"completions/max_terminated_length": 1766.0,
	"completions/mean_length": 1573.65625,
	"completions/mean_terminated_length": 1573.65625,
	"completions/min_length": 1107.0,
	"completions/min_terminated_length": 1107.0,
	"entropy": 0.43740712106227875,
	"epoch": 0.0001,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 1.864342212677002,
	"kl": 0.0005766874128312338,
	"learning_rate": 1.142857142857143e-06,
	"loss": -0.0206,
	"num_tokens": 218674.0,
	"reward": -14.006583213806152,
	"reward_std": 12.985024452209473,
	"rewards/rollout_reward_func/mean": -14.006583213806152,
	"rewards/rollout_reward_func/std": 17.190784454345703,
	"sampling/importance_sampling_ratio/max": 1.3863141536712646,
	"sampling/importance_sampling_ratio/mean": 0.9954429864883423,
	"sampling/importance_sampling_ratio/min": 0.6810365915298462,
	"sampling/sampling_logp_difference/max": 0.2415471076965332,
	"sampling/sampling_logp_difference/mean": 0.016646649688482285,
	"step": 5,
	"step_time": 34.27298692500153
	},
	{
	"clip_ratio/high_max": 0.0078125,
	"clip_ratio/high_mean": 0.00390625,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.00390625,
	"entropy": 0.4379243813455105,
	"epoch": 0.00012,
	"grad_norm": 1.9039454460144043,
	"kl": 0.00071882207703311,
	"learning_rate": 1.4285714285714286e-06,
	"loss": -0.0202,
	"step": 6,
	"step_time": 5.641448482998385
	},
	{
	"clip_ratio/high_max": 0.0078125,
	"clip_ratio/high_mean": 0.00390625,
	"clip_ratio/low_mean": 0.00390625,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0078125,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1748.0,
	"completions/max_terminated_length": 1748.0,
	"completions/mean_length": 1575.5,
	"completions/mean_terminated_length": 1575.5,
	"completions/min_length": 1186.0,
	"completions/min_terminated_length": 1186.0,
	"entropy": 0.4470406360924244,
	"epoch": 0.00014,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 1.4503103494644165,
	"kl": 0.0008566801006963942,
	"learning_rate": 1.7142857142857145e-06,
	"loss": 0.0132,
	"num_tokens": 289160.0,
	"reward": -3.2668540477752686,
	"reward_std": 10.61334228515625,
	"rewards/rollout_reward_func/mean": -3.2668540477752686,
	"rewards/rollout_reward_func/std": 16.216392517089844,
	"sampling/importance_sampling_ratio/max": 1.3690364360809326,
	"sampling/importance_sampling_ratio/mean": 1.0221995115280151,
	"sampling/importance_sampling_ratio/min": 0.6548231840133667,
	"sampling/sampling_logp_difference/max": 0.392575740814209,
	"sampling/sampling_logp_difference/mean": 0.01853613555431366,
	"step": 7,
	"step_time": 34.67648999299854
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.001953125,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.001953125,
	"entropy": 0.44634250551462173,
	"epoch": 0.00016,
	"grad_norm": 1.4721945524215698,
	"kl": 0.0007410887337755412,
	"learning_rate": 2.0000000000000003e-06,
	"loss": 0.014,
	"step": 8,
	"step_time": 5.566038421000485
	},
	{
	"clip_ratio/high_max": 0.01171875,
	"clip_ratio/high_mean": 0.0078125,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0078125,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1773.0,
	"completions/max_terminated_length": 1773.0,
	"completions/mean_length": 1650.5,
	"completions/mean_terminated_length": 1650.5,
	"completions/min_length": 1169.0,
	"completions/min_terminated_length": 1169.0,
	"entropy": 0.5013628304004669,
	"epoch": 0.00018,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 2.5520236492156982,
	"kl": 0.001372927552438341,
	"learning_rate": 2.285714285714286e-06,
	"loss": -0.0308,
	"num_tokens": 362601.0,
	"reward": -13.83917236328125,
	"reward_std": 12.006336212158203,
	"rewards/rollout_reward_func/mean": -13.83917236328125,
	"rewards/rollout_reward_func/std": 14.237728118896484,
	"sampling/importance_sampling_ratio/max": 1.3693691492080688,
	"sampling/importance_sampling_ratio/mean": 0.9588738679885864,
	"sampling/importance_sampling_ratio/min": 0.5098013281822205,
	"sampling/sampling_logp_difference/max": 0.735576868057251,
	"sampling/sampling_logp_difference/mean": 0.02071024850010872,
	"step": 9,
	"step_time": 34.420860370997616
	},
	{
	"clip_ratio/high_max": 0.00390625,
	"clip_ratio/high_mean": 0.001953125,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.001953125,
	"entropy": 0.49781131744384766,
	"epoch": 0.0002,
	"grad_norm": 2.5958364009857178,
	"kl": 0.0012885355827165768,
	"learning_rate": 2.571428571428571e-06,
	"loss": -0.0278,
	"step": 10,
	"step_time": 5.687426060998405
	},
	{
	"clip_ratio/high_max": 0.00390625,
	"clip_ratio/high_mean": 0.001953125,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.001953125,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1775.0,
	"completions/max_terminated_length": 1775.0,
	"completions/mean_length": 1532.0,
	"completions/mean_terminated_length": 1532.0,
	"completions/min_length": 264.0,
	"completions/min_terminated_length": 264.0,
	"entropy": 0.42485806345939636,
	"epoch": 0.00022,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 1.8452433347702026,
	"kl": 0.000992896981188096,
	"learning_rate": 2.8571428571428573e-06,
	"loss": 0.0203,
	"num_tokens": 431550.0,
	"reward": -0.7765803337097168,
	"reward_std": 14.750946044921875,
	"rewards/rollout_reward_func/mean": -0.7765803337097168,
	"rewards/rollout_reward_func/std": 21.5161190032959,
	"sampling/importance_sampling_ratio/max": 1.3237504959106445,
	"sampling/importance_sampling_ratio/mean": 1.0001271963119507,
	"sampling/importance_sampling_ratio/min": 0.6408203840255737,
	"sampling/sampling_logp_difference/max": 0.33285045623779297,
	"sampling/sampling_logp_difference/mean": 0.01815984398126602,
	"step": 11,
	"step_time": 33.55919377600003
	},
	{
	"clip_ratio/high_max": 0.0,
	"clip_ratio/high_mean": 0.0,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.0,
	"entropy": 0.4245442971587181,
	"epoch": 0.00024,
	"grad_norm": 1.6485886573791504,
	"kl": 0.0013787990028504282,
	"learning_rate": 3.142857142857143e-06,
	"loss": 0.019,
	"step": 12,
	"step_time": 5.637909467997815
	},
	{
	"clip_ratio/high_max": 0.00390625,
	"clip_ratio/high_mean": 0.001953125,
	"clip_ratio/low_mean": 0.0,
	"clip_ratio/low_min": 0.0,
	"clip_ratio/region_mean": 0.001953125,
	"completions/clipped_ratio": 0.0,
	"completions/max_length": 1791.0,
	"completions/max_terminated_length": 1791.0,
	"completions/mean_length": 1633.25,
	"completions/mean_terminated_length": 1633.25,
	"completions/min_length": 1195.0,
	"completions/min_terminated_length": 1195.0,
	"entropy": 0.478180218487978,
	"epoch": 0.00026,
	"frac_reward_zero_std": 0.0,
	"grad_norm": 2.3034250736236572,
	"kl": 0.0017315489676548168,
	"learning_rate": 3.428571428571429e-06,
	"loss": -0.0346,
	"num_tokens": 504355.0,
	"reward": -17.26664924621582,
	"reward_std": 14.347229957580566,
	"rewards/rollout_reward_func/mean": -17.26664924621582,
	"rewards/rollout_reward_func/std": 18.007043838500977,
	"sampling/importance_sampling_ratio/max": 1.7957122325897217,
	"sampling/importance_sampling_ratio/mean": 1.0002973079681396,
	"sampling/importance_sampling_ratio/min": 0.5741486549377441,
	"sampling/sampling_logp_difference/max": 0.5055437088012695,
	"sampling/sampling_logp_difference/mean": 0.024692352861166,
	"step": 13,
	"step_time": 32.15085511000143
	}
	],
	"logging_steps": 1.0,
	"max_steps": 100000,
	"num_input_tokens_seen": 504355,
	"num_train_epochs": 2,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 0.0,
	"train_batch_size": 2,
	"trial_name": null,
	"trial_params": null
	}